opal: Fix opal_initialized reference counter
Before this change, the reference counters `opal_util_initialized` and `opal_initialized` were incremented at the beginning of the `opal_init_util` and the `opal_init` functions respectively. In other words, they were incremented before fully initialized. This causes the following program to abort by SIGFPE if `--enable-timing` is enabled on `configure`. ```c // need -lm option on link int main(int argc, char *argv[]) { // raise SIGFPE on division-by-zero feenableexcept(FE_DIVBYZERO); MPI_Init(&argc, &argv); MPI_Finalize(); return 0; } ``` The logic of the SIGFPE is: 1. `MPI_Init` calls `opal_init` through `ompi_rte_init`. 2. `opal_init` changes the value of `opal_initialized` to 1. 3. `opal_init` calls `opal_init_util`. 4. `opal_init_util` calls `opal_timing_ts_func` through `OPAL_TIMING_ENV_INIT`, and `opal_timing_ts_func` returns `get_ts_cycle` instead of `get_ts_gettimeofday` because `opal_initialized` to 1. (This is the problem) 5. `opal_init_util` calls `get_ts_cycle` through `OPAL_TIMING_ENV_INIT`. 6. `get_ts_cycle` executes `opal_timer_base_get_cycles()) / opal_timer_base_get_freq()` and it raises SIGFPE (division-by-zero) because the OPAL TIMER framework is not initialized yet and `opal_timer_base_get_freq` returns 0. This commit changes the increment timing of `opal_util_initialized` and `opal_initialized` to the end of `opal_init_util` and the `opal_init` functions respectively. Signed-off-by: Tsubasa Yanagibashi <fj2505dt@aa.jp.fujitsu.com>
Этот коммит содержится в:
родитель
f496f256cd
Коммит
7d5fbcfd76
@ -24,6 +24,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2018-2019 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2020 FUJITSU LIMITED. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -470,10 +471,11 @@ opal_init_util(int* pargc, char*** pargv)
|
||||
char *error = NULL;
|
||||
OPAL_TIMING_ENV_INIT(otmng);
|
||||
|
||||
if( ++opal_util_initialized != 1 ) {
|
||||
if( opal_util_initialized < 1 ) {
|
||||
if( opal_util_initialized != 0 ) {
|
||||
if( opal_util_initialized < 0 ) {
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
++opal_util_initialized;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -615,6 +617,8 @@ opal_init_util(int* pargc, char*** pargv)
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(otmng, "opal_if_init");
|
||||
|
||||
++opal_util_initialized;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -635,10 +639,11 @@ opal_init(int* pargc, char*** pargv)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if( ++opal_initialized != 1 ) {
|
||||
if( opal_initialized < 1 ) {
|
||||
if( opal_initialized != 0 ) {
|
||||
if( opal_initialized < 0 ) {
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
++opal_initialized;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -688,5 +693,7 @@ opal_init(int* pargc, char*** pargv)
|
||||
return opal_init_error ("opal_reachable_base_select", ret);
|
||||
}
|
||||
|
||||
++opal_initialized;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user