1
1

OSHMEM: better error messages when failing

Provide users with right fail reason.

fixes trac:4433

This commit was SVN r31202.

The following Trac tickets were found above:
  Ticket 4433 --> https://svn.open-mpi.org/trac/ompi/ticket/4433
Этот коммит содержится в:
Mike Dubman 2014-03-25 15:27:13 +00:00
родитель 8c2b9658ce
Коммит be3fc7bf20
7 изменённых файлов: 70 добавлений и 53 удалений

Просмотреть файл

@ -7,6 +7,8 @@
# $HEADER$
#
dist_ompidata_DATA = base/help-oshmem-sshmem.txt
headers += \
base/base.h

Просмотреть файл

@ -0,0 +1,25 @@
# -*- text -*-
#
# Copyright (c) 2013 Mellanox Technologies, Inc.
# All rights reserved.
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open SHMEM MCA error messages.
#
[create segment failure]
The OpenSHMEM "(%s)" plugin in the "sshmem" framework failed to
allocate a shared memory segment via the %s system call. This
usually means that there are not enough resources available to memory subsystem on your server.
Your OpenSHMEM job will now abort.
Server: %s
Requested shared
memory segment size: %llu
Specific error: %s (%d)

Просмотреть файл

@ -10,19 +10,16 @@
#
#
[mmap segment failed]
The OpenSHMEM "mmap" plugin in the "sshmem" framework failed to
allocate a shared memory segement via the mmap system call. This
usually means that there are not enough resources available to your
memory subsystem on your server.
[mmap:create segment failure]
Your OpenSHMEM job will now abort.
You can try the following:
Server: %s
Requested mmap
segment size: %u
Specific error: %s (%d)
1. Decrease the symmetric heap area with
"-x SHMEM_SYMMETRIC_HEAP_SIZE=<value>".
2. Set "--mca sshmem_base_start_address 0" for
automatic selection by OS of virtual start address for sshmem.
You can try to decrease the symmetric heap area with:
"-x SHMEM_SYMMETRIC_HEAP_SIZE=<value>".
This issue could also be related to CONFIG_STRICT_DEVMEM
kernel option which if enabled prevents access to physical
memory via "mmap". In this case you could try using other
sshmem components instead.

Просмотреть файл

@ -194,16 +194,15 @@ segment_create(map_segment_t *ds_buf,
0);
if (MAP_FAILED == addr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to mmap() %llu bytes (errno=%d)",
(unsigned long long)size, errno)
);
opal_show_help("help-oshmem-sshmem-mmap.txt",
"mmap segment failed",
opal_show_help("help-oshmem-sshmem.txt",
"create segment failure",
"mmap",
true,
orte_process_info.nodename, (unsigned) size,
strerror(errno),errno);
orte_process_info.nodename, (unsigned long long) size,
strerror(errno), errno);
opal_show_help("help-oshmem-sshmem-mmap.txt",
"mmap:create segment failure",
true);
return OSHMEM_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -1,6 +1,6 @@
# -*- text -*-
#
# Copyright (c) 2013 Mellanox Technologies, Inc.
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
@ -11,18 +11,7 @@
#
# This is the US/English help file for Open SHMEM MCA error messages.
#
[create segment failure]
The OpenSHMEM "sysv" plugin in the "sshmem" framework failed to
allocate a shared memory segment via the shmat(2) system call. This
usually means that there are not enough resources available to the
SYSV shared memory subsystem on your server.
Your OpenSHMEM job will now abort.
Server: %s
Requested shared
memory segment size: %u
Specific error: %s (%d)
[sysv:create segment failure]
You can try the following:
@ -31,3 +20,5 @@ You can try the following:
SHMEM_SYMMETRIC_HEAP_SIZE=<value>".
3. Increase your system's allowable SYSV shared memory segment size
(e.g., via the SHMMAX and/or SMMAX kernel parameters).
4. Set "--mca sshmem_base_start_address 0" for
automatic selection by OS of virtual start address for sshmem.

Просмотреть файл

@ -194,28 +194,31 @@ segment_create(map_segment_t *ds_buf,
/* Create a new shared memory segment and save the shmid. */
shmid = shmget(IPC_PRIVATE, size, flags);
if (shmid == MAP_SEGMENT_SHM_INVALID) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to shmget() %llu bytes (errno=%d)",
(unsigned long long)size, errno));
opal_show_help("help-oshmem-sshmem-sysv.txt",
opal_show_help("help-oshmem-sshmem.txt",
"create segment failure",
true,
orte_process_info.nodename, (unsigned) size,
"sysv",
orte_process_info.nodename, (unsigned long long) size,
strerror(errno), errno);
opal_show_help("help-oshmem-sshmem-sysv.txt",
"sysv:create segment failure",
true);
return OSHMEM_ERROR;
}
/* Attach to the sement */
addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0);
if (addr == (void *) -1L) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to shmat() %llu bytes (errno=%d)",
(unsigned long long)size, errno)
);
shmctl(shmid, IPC_RMID, NULL );
opal_show_help("help-oshmem-sshmem.txt",
"create segment failure",
true,
"sysv",
orte_process_info.nodename, (unsigned long long) size,
strerror(errno), errno);
opal_show_help("help-oshmem-sshmem-sysv.txt",
"sysv:create segment failure",
true);
shmctl(shmid, IPC_RMID, NULL);
return OSHMEM_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -228,6 +228,11 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
if (OSHMEM_SUCCESS == ret) {
oshmem_shmem_initialized = true;
if (OSHMEM_SUCCESS != shmem_lock_init()) {
SHMEM_API_ERROR( "shmem_lock_init() failed");
return OSHMEM_ERROR;
}
/* this is a collective op, implies barrier */
MCA_MEMHEAP_CALL(get_all_mkeys());
@ -437,11 +442,6 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided)
goto error;
}
if (OSHMEM_SUCCESS != shmem_lock_init()) {
error = "shmem_lock_init() failed";
goto error;
}
error: if (ret != OSHMEM_SUCCESS) {
const char *err_msg = opal_strerror(ret);
orte_show_help("help-shmem-runtime.txt",