1
1

Merge pull request #1125 from igor-ivanov/pr/oshmem_new_mca_vars

oshmem: Add new mca variables oshmem_abort_delay and oshmem_abort_pri…
Этот коммит содержится в:
Mike Dubman 2015-11-11 14:34:12 +02:00
родитель ae6b6ba05b f288cd7254
Коммит 93847e4ca9
3 изменённых файлов: 58 добавлений и 4 удалений

Просмотреть файл

@ -71,11 +71,11 @@ int oshmem_shmem_abort(int errcode)
/* Should we print a stack trace? Not aggregated because they
might be different on all processes. */
if (ompi_mpi_abort_print_stack) {
if (oshmem_shmem_abort_print_stack) {
char **messages;
int len, i;
if (OSHMEM_SUCCESS == opal_backtrace_buffer(&messages, &len)) {
if (OPAL_SUCCESS == opal_backtrace_buffer(&messages, &len)) {
for (i = 0; i < len; ++i) {
fprintf(stderr,
"[%s:%d] [%d] func:%s\n",
@ -94,6 +94,24 @@ int oshmem_shmem_abort(int errcode)
}
}
/* Should we wait for a while before aborting? */
if (0 != oshmem_shmem_abort_delay) {
if (oshmem_shmem_abort_delay < 0) {
fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter mpi_abort_delay is < 0)\n",
host, (int) pid);
fflush(stderr);
while (1) {
sleep(5);
}
} else {
fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n",
host, (int) pid, oshmem_shmem_abort_delay);
do {
sleep(1);
} while (--oshmem_shmem_abort_delay > 0);
}
}
if (!orte_initialized || !oshmem_shmem_initialized) {
if (orte_show_help_is_available()) {
/* TODO help message from SHMEM not from MPI is needed*/

Просмотреть файл

@ -13,12 +13,37 @@
#include "oshmem/constants.h"
bool oshmem_shmem_abort_print_stack = false;
int oshmem_shmem_abort_delay = 0;
int oshmem_shmem_lock_recursive = 0;
int oshmem_shmem_api_verbose = 0;
int oshmem_preconnect_all = 0;
int oshmem_shmem_register_params(void)
{
oshmem_shmem_abort_delay = 0;
(void) mca_base_var_register("oshmem",
"oshmem",
NULL,
"abort_delay",
"If nonzero, print out an identifying message when abort is invoked (hostname, PID of the process that called abort operation) and delay for that many seconds before exiting (a negative delay value means to never abort). This allows attaching of a debugger before quitting the job.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&oshmem_shmem_abort_delay);
oshmem_shmem_abort_print_stack = false;
(void) mca_base_var_register("oshmem",
"oshmem",
NULL,
"abort_print_stack",
"If nonzero, print out a stack trace when abort is invoked",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&oshmem_shmem_abort_print_stack);
(void) mca_base_var_register("oshmem",
"oshmem",
NULL,

Просмотреть файл

@ -20,9 +20,20 @@ BEGIN_C_DECLS
*/
/**
* Whether an MPI_ABORT should print out a stack trace or not.
* Whether an abort should print out a stack trace or not.
*/
OSHMEM_DECLSPEC extern bool ompi_mpi_abort_print_stack;
OSHMEM_DECLSPEC extern bool oshmem_shmem_abort_print_stack;
/**
* Whether abort should print out an identifying message
* (e.g., hostname and PID) and loop waiting for a debugger to
* attach. The value of the integer is how many seconds to wait:
*
* 0 = do not print the message and do not loop
* negative value = print the message and loop forever
* positive value = print the message and delay for that many seconds
*/
OSHMEM_DECLSPEC extern int oshmem_shmem_abort_delay;
/**
* Whether or not the lock routines are recursive