From 7d5fbcfd7635f0b90a15be3c593488c1651bdd18 Mon Sep 17 00:00:00 2001 From: Tsubasa Yanagibashi Date: Wed, 26 Feb 2020 14:09:19 +0900 Subject: [PATCH] opal: Fix opal_initialized reference counter Before this change, the reference counters `opal_util_initialized` and `opal_initialized` were incremented at the beginning of the `opal_init_util` and the `opal_init` functions respectively. In other words, they were incremented before fully initialized. This causes the following program to abort by SIGFPE if `--enable-timing` is enabled on `configure`. ```c // need -lm option on link int main(int argc, char *argv[]) { // raise SIGFPE on division-by-zero feenableexcept(FE_DIVBYZERO); MPI_Init(&argc, &argv); MPI_Finalize(); return 0; } ``` The logic of the SIGFPE is: 1. `MPI_Init` calls `opal_init` through `ompi_rte_init`. 2. `opal_init` changes the value of `opal_initialized` to 1. 3. `opal_init` calls `opal_init_util`. 4. `opal_init_util` calls `opal_timing_ts_func` through `OPAL_TIMING_ENV_INIT`, and `opal_timing_ts_func` returns `get_ts_cycle` instead of `get_ts_gettimeofday` because `opal_initialized` to 1. (This is the problem) 5. `opal_init_util` calls `get_ts_cycle` through `OPAL_TIMING_ENV_INIT`. 6. `get_ts_cycle` executes `opal_timer_base_get_cycles()) / opal_timer_base_get_freq()` and it raises SIGFPE (division-by-zero) because the OPAL TIMER framework is not initialized yet and `opal_timer_base_get_freq` returns 0. This commit changes the increment timing of `opal_util_initialized` and `opal_initialized` to the end of `opal_init_util` and the `opal_init` functions respectively. Signed-off-by: Tsubasa Yanagibashi --- opal/runtime/opal_init.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index ff4b4993ce..ab17e8e9bb 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -24,6 +24,7 @@ * All rights reserved. * Copyright (c) 2018-2019 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2020 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -470,10 +471,11 @@ opal_init_util(int* pargc, char*** pargv) char *error = NULL; OPAL_TIMING_ENV_INIT(otmng); - if( ++opal_util_initialized != 1 ) { - if( opal_util_initialized < 1 ) { + if( opal_util_initialized != 0 ) { + if( opal_util_initialized < 0 ) { return OPAL_ERROR; } + ++opal_util_initialized; return OPAL_SUCCESS; } @@ -615,6 +617,8 @@ opal_init_util(int* pargc, char*** pargv) OPAL_TIMING_ENV_NEXT(otmng, "opal_if_init"); + ++opal_util_initialized; + return OPAL_SUCCESS; } @@ -635,10 +639,11 @@ opal_init(int* pargc, char*** pargv) { int ret; - if( ++opal_initialized != 1 ) { - if( opal_initialized < 1 ) { + if( opal_initialized != 0 ) { + if( opal_initialized < 0 ) { return OPAL_ERROR; } + ++opal_initialized; return OPAL_SUCCESS; } @@ -688,5 +693,7 @@ opal_init(int* pargc, char*** pargv) return opal_init_error ("opal_reachable_base_select", ret); } + ++opal_initialized; + return OPAL_SUCCESS; }