OSHMEM: better error messages when failing
Provide users with right fail reason. fixes trac:4433 This commit was SVN r31202. The following Trac tickets were found above: Ticket 4433 --> https://svn.open-mpi.org/trac/ompi/ticket/4433
Этот коммит содержится в:
родитель
8c2b9658ce
Коммит
be3fc7bf20
@ -7,6 +7,8 @@
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_ompidata_DATA = base/help-oshmem-sshmem.txt
|
||||
|
||||
headers += \
|
||||
base/base.h
|
||||
|
||||
|
25
oshmem/mca/sshmem/base/help-oshmem-sshmem.txt
Обычный файл
25
oshmem/mca/sshmem/base/help-oshmem-sshmem.txt
Обычный файл
@ -0,0 +1,25 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open SHMEM MCA error messages.
|
||||
#
|
||||
[create segment failure]
|
||||
The OpenSHMEM "(%s)" plugin in the "sshmem" framework failed to
|
||||
allocate a shared memory segment via the %s system call. This
|
||||
usually means that there are not enough resources available to memory subsystem on your server.
|
||||
|
||||
Your OpenSHMEM job will now abort.
|
||||
|
||||
Server: %s
|
||||
Requested shared
|
||||
memory segment size: %llu
|
||||
Specific error: %s (%d)
|
||||
|
@ -10,19 +10,16 @@
|
||||
#
|
||||
#
|
||||
|
||||
[mmap segment failed]
|
||||
The OpenSHMEM "mmap" plugin in the "sshmem" framework failed to
|
||||
allocate a shared memory segement via the mmap system call. This
|
||||
usually means that there are not enough resources available to your
|
||||
memory subsystem on your server.
|
||||
[mmap:create segment failure]
|
||||
|
||||
Your OpenSHMEM job will now abort.
|
||||
You can try the following:
|
||||
|
||||
Server: %s
|
||||
Requested mmap
|
||||
segment size: %u
|
||||
Specific error: %s (%d)
|
||||
1. Decrease the symmetric heap area with
|
||||
"-x SHMEM_SYMMETRIC_HEAP_SIZE=<value>".
|
||||
2. Set "--mca sshmem_base_start_address 0" for
|
||||
automatic selection by OS of virtual start address for sshmem.
|
||||
|
||||
You can try to decrease the symmetric heap area with:
|
||||
|
||||
"-x SHMEM_SYMMETRIC_HEAP_SIZE=<value>".
|
||||
This issue could also be related to CONFIG_STRICT_DEVMEM
|
||||
kernel option which if enabled prevents access to physical
|
||||
memory via "mmap". In this case you could try using other
|
||||
sshmem components instead.
|
||||
|
@ -194,16 +194,15 @@ segment_create(map_segment_t *ds_buf,
|
||||
0);
|
||||
|
||||
if (MAP_FAILED == addr) {
|
||||
OPAL_OUTPUT_VERBOSE(
|
||||
(5, oshmem_sshmem_base_framework.framework_output,
|
||||
"Failed to mmap() %llu bytes (errno=%d)",
|
||||
(unsigned long long)size, errno)
|
||||
);
|
||||
opal_show_help("help-oshmem-sshmem-mmap.txt",
|
||||
"mmap segment failed",
|
||||
opal_show_help("help-oshmem-sshmem.txt",
|
||||
"create segment failure",
|
||||
"mmap",
|
||||
true,
|
||||
orte_process_info.nodename, (unsigned) size,
|
||||
strerror(errno),errno);
|
||||
orte_process_info.nodename, (unsigned long long) size,
|
||||
strerror(errno), errno);
|
||||
opal_show_help("help-oshmem-sshmem-mmap.txt",
|
||||
"mmap:create segment failure",
|
||||
true);
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
@ -11,18 +11,7 @@
|
||||
#
|
||||
# This is the US/English help file for Open SHMEM MCA error messages.
|
||||
#
|
||||
[create segment failure]
|
||||
The OpenSHMEM "sysv" plugin in the "sshmem" framework failed to
|
||||
allocate a shared memory segment via the shmat(2) system call. This
|
||||
usually means that there are not enough resources available to the
|
||||
SYSV shared memory subsystem on your server.
|
||||
|
||||
Your OpenSHMEM job will now abort.
|
||||
|
||||
Server: %s
|
||||
Requested shared
|
||||
memory segment size: %u
|
||||
Specific error: %s (%d)
|
||||
[sysv:create segment failure]
|
||||
|
||||
You can try the following:
|
||||
|
||||
@ -31,3 +20,5 @@ You can try the following:
|
||||
SHMEM_SYMMETRIC_HEAP_SIZE=<value>".
|
||||
3. Increase your system's allowable SYSV shared memory segment size
|
||||
(e.g., via the SHMMAX and/or SMMAX kernel parameters).
|
||||
4. Set "--mca sshmem_base_start_address 0" for
|
||||
automatic selection by OS of virtual start address for sshmem.
|
||||
|
@ -194,28 +194,31 @@ segment_create(map_segment_t *ds_buf,
|
||||
/* Create a new shared memory segment and save the shmid. */
|
||||
shmid = shmget(IPC_PRIVATE, size, flags);
|
||||
if (shmid == MAP_SEGMENT_SHM_INVALID) {
|
||||
OPAL_OUTPUT_VERBOSE(
|
||||
(5, oshmem_sshmem_base_framework.framework_output,
|
||||
"Failed to shmget() %llu bytes (errno=%d)",
|
||||
(unsigned long long)size, errno));
|
||||
|
||||
opal_show_help("help-oshmem-sshmem-sysv.txt",
|
||||
opal_show_help("help-oshmem-sshmem.txt",
|
||||
"create segment failure",
|
||||
true,
|
||||
orte_process_info.nodename, (unsigned) size,
|
||||
"sysv",
|
||||
orte_process_info.nodename, (unsigned long long) size,
|
||||
strerror(errno), errno);
|
||||
opal_show_help("help-oshmem-sshmem-sysv.txt",
|
||||
"sysv:create segment failure",
|
||||
true);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Attach to the sement */
|
||||
addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0);
|
||||
if (addr == (void *) -1L) {
|
||||
OPAL_OUTPUT_VERBOSE(
|
||||
(5, oshmem_sshmem_base_framework.framework_output,
|
||||
"Failed to shmat() %llu bytes (errno=%d)",
|
||||
(unsigned long long)size, errno)
|
||||
);
|
||||
shmctl(shmid, IPC_RMID, NULL );
|
||||
opal_show_help("help-oshmem-sshmem.txt",
|
||||
"create segment failure",
|
||||
true,
|
||||
"sysv",
|
||||
orte_process_info.nodename, (unsigned long long) size,
|
||||
strerror(errno), errno);
|
||||
opal_show_help("help-oshmem-sshmem-sysv.txt",
|
||||
"sysv:create segment failure",
|
||||
true);
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
@ -228,6 +228,11 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
|
||||
if (OSHMEM_SUCCESS == ret) {
|
||||
oshmem_shmem_initialized = true;
|
||||
|
||||
if (OSHMEM_SUCCESS != shmem_lock_init()) {
|
||||
SHMEM_API_ERROR( "shmem_lock_init() failed");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* this is a collective op, implies barrier */
|
||||
MCA_MEMHEAP_CALL(get_all_mkeys());
|
||||
|
||||
@ -437,11 +442,6 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided)
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (OSHMEM_SUCCESS != shmem_lock_init()) {
|
||||
error = "shmem_lock_init() failed";
|
||||
goto error;
|
||||
}
|
||||
|
||||
error: if (ret != OSHMEM_SUCCESS) {
|
||||
const char *err_msg = opal_strerror(ret);
|
||||
orte_show_help("help-shmem-runtime.txt",
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user