Merge pull request #4933 from karasevb/timings_update
timings: added new timing points
Этот коммит содержится в:
Коммит
77ff99e9ee
@ -20,7 +20,7 @@
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
|
||||
* Copyright (c) 2016-2018 Mellanox Technologies Ltd. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -384,7 +384,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
volatile bool active;
|
||||
bool background_fence = false;
|
||||
|
||||
OMPI_TIMING_INIT(32);
|
||||
OMPI_TIMING_INIT(64);
|
||||
|
||||
ompi_hook_base_mpi_init_top(argc, argv, requested, provided);
|
||||
|
||||
@ -423,6 +423,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
error = "ompi_mpi_init: opal_init_util failed";
|
||||
goto error;
|
||||
}
|
||||
OMPI_TIMING_IMPORT_OPAL("opal_init_util");
|
||||
|
||||
/* If thread support was enabled, then setup OPAL to allow for them. This must be done
|
||||
* early to prevent a race condition that can occur with orte_init(). */
|
||||
@ -512,8 +513,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
error = "ompi_mpi_init: ompi_rte_init failed";
|
||||
goto error;
|
||||
}
|
||||
|
||||
OMPI_TIMING_NEXT("rte_init");
|
||||
OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup");
|
||||
OMPI_TIMING_IMPORT_OPAL("rte_init");
|
||||
|
||||
ompi_rte_initialized = true;
|
||||
|
||||
@ -643,16 +645,23 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
}
|
||||
|
||||
OMPI_TIMING_IMPORT_OPAL("orte_init");
|
||||
OMPI_TIMING_IMPORT_OPAL("opal_init_util");
|
||||
OMPI_TIMING_NEXT("rte_init-commit");
|
||||
|
||||
|
||||
/* exchange connection info - this function may also act as a barrier
|
||||
* if data exchange is required. The modex occurs solely across procs
|
||||
* in our job. If a barrier is required, the "modex" function will
|
||||
* perform it internally */
|
||||
opal_pmix.commit();
|
||||
OMPI_TIMING_NEXT("commit");
|
||||
#if (OPAL_ENABLE_TIMING)
|
||||
if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex &&
|
||||
opal_pmix_collect_all_data) {
|
||||
opal_pmix.fence(NULL, 0);
|
||||
OMPI_TIMING_NEXT("pmix-barrier-1");
|
||||
opal_pmix.fence(NULL, 0);
|
||||
OMPI_TIMING_NEXT("pmix-barrier-2");
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If we have a non-blocking fence:
|
||||
* if we are doing an async modex, but we are collecting all
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
|
||||
* Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -21,6 +21,7 @@ typedef struct {
|
||||
double ts;
|
||||
char *file;
|
||||
char *prefix;
|
||||
int imported;
|
||||
} ompi_timing_val_t;
|
||||
|
||||
typedef struct {
|
||||
@ -36,11 +37,15 @@ typedef struct ompi_timing_t {
|
||||
int cnt;
|
||||
int error;
|
||||
int enabled;
|
||||
int import_cnt;
|
||||
opal_timing_ts_func_t get_ts;
|
||||
ompi_timing_list_t *timing;
|
||||
ompi_timing_list_t *cur_timing;
|
||||
} ompi_timing_t;
|
||||
|
||||
#define OMPI_TIMING_ENABLED \
|
||||
(getenv("OMPI_TIMING_ENABLE") ? atoi(getenv("OMPI_TIMING_ENABLE")) : 0)
|
||||
|
||||
#define OMPI_TIMING_INIT(_size) \
|
||||
ompi_timing_t OMPI_TIMING; \
|
||||
OMPI_TIMING.prefix = __func__; \
|
||||
@ -50,6 +55,7 @@ typedef struct ompi_timing_t {
|
||||
OMPI_TIMING.error = 0; \
|
||||
OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
|
||||
OMPI_TIMING.enabled = 0; \
|
||||
OMPI_TIMING.import_cnt = 0; \
|
||||
{ \
|
||||
char *ptr; \
|
||||
ptr = getenv("OMPI_TIMING_ENABLE"); \
|
||||
@ -94,7 +100,8 @@ typedef struct ompi_timing_t {
|
||||
#define OMPI_TIMING_NEXT(...) \
|
||||
do { \
|
||||
if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \
|
||||
char *f = strrchr(__FILE__, '/') + 1; \
|
||||
char *f = strrchr(__FILE__, '/'); \
|
||||
f = (f == NULL) ? strdup(__FILE__) : f+1; \
|
||||
int len = 0; \
|
||||
if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \
|
||||
OMPI_TIMING_ITEM_EXTEND; \
|
||||
@ -135,10 +142,13 @@ typedef struct ompi_timing_t {
|
||||
int cnt; \
|
||||
int i; \
|
||||
double ts; \
|
||||
OMPI_TIMING.import_cnt++; \
|
||||
OPAL_TIMING_ENV_CNT(func, cnt); \
|
||||
OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \
|
||||
for(i = 0; i < cnt; i++){ \
|
||||
char *desc, *filename; \
|
||||
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].imported= \
|
||||
OMPI_TIMING.import_cnt; \
|
||||
OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \
|
||||
OMPI_TIMING_APPEND(filename, func, desc, ts); \
|
||||
} \
|
||||
@ -155,6 +165,7 @@ typedef struct ompi_timing_t {
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size); \
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); \
|
||||
int error = 0; \
|
||||
int imported = 0; \
|
||||
\
|
||||
MPI_Reduce(&OMPI_TIMING.error, &error, 1, \
|
||||
MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \
|
||||
@ -171,6 +182,7 @@ typedef struct ompi_timing_t {
|
||||
char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
|
||||
char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
|
||||
char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
|
||||
double total_avg = 0, total_min = 0, total_max = 0; \
|
||||
\
|
||||
if( OMPI_TIMING.cnt > 0 ) { \
|
||||
OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
|
||||
@ -193,21 +205,53 @@ typedef struct ompi_timing_t {
|
||||
timing = (ompi_timing_list_t*)timing->next; \
|
||||
} while (timing != NULL); \
|
||||
\
|
||||
if( 0 == rank ){ \
|
||||
if( 0 == rank ) { \
|
||||
if (OMPI_TIMING.timing->next) { \
|
||||
printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \
|
||||
"==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \
|
||||
} \
|
||||
\
|
||||
printf("------------------ %s ------------------\n", \
|
||||
OMPI_TIMING.prefix); \
|
||||
OMPI_TIMING.prefix); \
|
||||
imported = OMPI_TIMING.timing->val[0].imported; \
|
||||
for(i=0; i< OMPI_TIMING.cnt; i++){ \
|
||||
bool print_total = 0; \
|
||||
imported = OMPI_TIMING.timing->val[i].imported; \
|
||||
avg[i] /= size; \
|
||||
printf("[%s:%s:%s]: %lf / %lf / %lf\n", \
|
||||
printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
|
||||
imported ? " -- " : "", \
|
||||
file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \
|
||||
if (OMPI_TIMING.timing->val[i].imported) { \
|
||||
total_avg += avg[i]; \
|
||||
total_min += min[i]; \
|
||||
total_max += max[i]; \
|
||||
} \
|
||||
if (i == (OMPI_TIMING.cnt-1)) { \
|
||||
print_total = true; \
|
||||
} else { \
|
||||
print_total = imported != OMPI_TIMING.timing->val[i+1].imported; \
|
||||
} \
|
||||
if (print_total && OMPI_TIMING.timing->val[i].imported) { \
|
||||
printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
|
||||
imported ? " !! " : "", \
|
||||
file[i], prefix[i], "total", \
|
||||
total_avg, total_min, total_max); \
|
||||
total_avg = 0; total_min = 0; total_max = 0; \
|
||||
} \
|
||||
} \
|
||||
total_avg = 0; total_min = 0; total_max = 0; \
|
||||
for(i=0; i< OMPI_TIMING.cnt; i++) { \
|
||||
if (!OMPI_TIMING.timing->val[i].imported) { \
|
||||
total_avg += avg[i]; \
|
||||
total_min += min[i]; \
|
||||
total_max += max[i]; \
|
||||
} \
|
||||
} \
|
||||
printf("[%s:total] %lf / %lf / %lf\n", \
|
||||
OMPI_TIMING.prefix, \
|
||||
total_avg, total_min, total_max); \
|
||||
printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \
|
||||
OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
|
||||
OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
|
||||
} \
|
||||
} \
|
||||
free(avg); \
|
||||
@ -233,6 +277,8 @@ typedef struct ompi_timing_t {
|
||||
|
||||
#define OMPI_TIMING_FINALIZE
|
||||
|
||||
#define OMPI_TIMING_ENABLED 0
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -20,6 +20,8 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
|
||||
* All Rights reserved.
|
||||
* Copyright (c) 2018 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -479,12 +481,16 @@ opal_init_util(int* pargc, char*** pargv)
|
||||
goto return_error;
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(otmng, "opal_dss_open");
|
||||
|
||||
/* initialize the mca */
|
||||
if (OPAL_SUCCESS != (ret = mca_base_open())) {
|
||||
error = "mca_base_open";
|
||||
goto return_error;
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(otmng, "mca_base_open");
|
||||
|
||||
/* initialize if framework */
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_if_base_framework, 0))) {
|
||||
fprintf(stderr, "opal_if_base_open() failed -- process will likely abort (%s:%d, returned %d instead of OPAL_SUCCESS)\n",
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Artem Polyakov <artpol84@gmail.com>
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
|
||||
* Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -91,7 +91,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type);
|
||||
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \
|
||||
do { \
|
||||
opal_timing_env_t name ## _val, *name = &(name ## _val); \
|
||||
*name = OPAL_TIMING_ENV_START_TYPE(__func__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \
|
||||
*name = OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \
|
||||
} while(0)
|
||||
|
||||
#define OPAL_TIMING_ENV_NEXT(h, ...) \
|
||||
@ -121,7 +121,8 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type);
|
||||
h->error = 1; \
|
||||
} \
|
||||
setenv(buf1, buf2, 1); \
|
||||
filename = strrchr(__FILE__, '/') + 1; \
|
||||
filename = strrchr(__FILE__, '/'); \
|
||||
filename = (filename == NULL) ? strdup(__FILE__) : filename+1; \
|
||||
n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \
|
||||
if ( n > OPAL_TIMING_STR_LEN ){ \
|
||||
h->error = 1; \
|
||||
@ -207,7 +208,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type);
|
||||
|
||||
#define OPAL_TIMING_ENV_INIT(name)
|
||||
|
||||
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix)
|
||||
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name)
|
||||
|
||||
#define OPAL_TIMING_ENV_NEXT(h, ... )
|
||||
|
||||
|
@ -16,6 +16,8 @@
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2018 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -62,6 +64,7 @@
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
@ -74,6 +77,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
char *error = NULL;
|
||||
opal_list_t transports;
|
||||
|
||||
OPAL_TIMING_ENV_INIT(ess_base_setup);
|
||||
/*
|
||||
* stdout/stderr buffering
|
||||
* If the user requested to override the default setting then do
|
||||
@ -116,6 +120,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_state_base_select";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "state_framework_open");
|
||||
|
||||
/* open the errmgr */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
|
||||
@ -123,6 +128,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_errmgr_base_open";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_framework_open");
|
||||
|
||||
/* setup my session directory */
|
||||
if (orte_create_session_dirs) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
|
||||
@ -157,6 +164,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
}
|
||||
}
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "create_session_dirs");
|
||||
|
||||
/* Setup the communication infrastructure */
|
||||
/* Routed system */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
|
||||
@ -169,6 +178,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_routed_base_select";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "routed_framework_open");
|
||||
|
||||
/*
|
||||
* OOB Layer
|
||||
*/
|
||||
@ -182,6 +193,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_oob_base_select";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "oob_framework_open");
|
||||
|
||||
/* Runtime Messaging Layer */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -193,6 +206,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_rml_base_select";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_framework_open");
|
||||
|
||||
/* if we have info on the HNP and local daemon, process it */
|
||||
if (NULL != orte_process_info.my_hnp_uri) {
|
||||
/* we have to set the HNP's name, even though we won't route messages directly
|
||||
@ -243,6 +258,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_errmgr_base_select";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_select");
|
||||
|
||||
/* get a conduit for our use - we never route IO over fabric */
|
||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||
@ -264,6 +280,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&transports);
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_open_conduit");
|
||||
|
||||
/*
|
||||
* Group communications
|
||||
@ -278,6 +295,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_grpcomm_base_select";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "grpcomm_framework_open");
|
||||
|
||||
/* open the distributed file system */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
|
||||
@ -290,6 +308,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_dfs_base_select";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(ess_base_setup, "dfs_framework_open");
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
error:
|
||||
orte_show_help("help-orte-runtime.txt",
|
||||
|
@ -15,6 +15,8 @@
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2018 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -51,6 +53,7 @@
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
@ -101,16 +104,20 @@ static int rte_init(void)
|
||||
bool bool_val, *bool_ptr = &bool_val, tdir_mca_override = false;
|
||||
size_t i;
|
||||
|
||||
OPAL_TIMING_ENV_INIT(rte_init);
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
|
||||
error = "orte_ess_base_std_prolog";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "orte_ess_base_std_prolog");
|
||||
|
||||
/* get an async event base - we use the opal_async one so
|
||||
* we don't startup extra threads if not needed */
|
||||
orte_event_base = opal_progress_thread_init(NULL);
|
||||
progress_thread_running = true;
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "progress_thread_init");
|
||||
|
||||
/* open and setup pmix */
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
@ -126,6 +133,8 @@ static int rte_init(void)
|
||||
}
|
||||
/* set the event base */
|
||||
opal_pmix_base_set_evbase(orte_event_base);
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "pmix_framework_open");
|
||||
|
||||
/* initialize the selected module */
|
||||
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) {
|
||||
/* we cannot run - this could be due to being direct launched
|
||||
@ -166,6 +175,8 @@ static int rte_init(void)
|
||||
pname.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
pname.vpid = 0;
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "pmix_init");
|
||||
|
||||
/* get our local rank from PMI */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
|
||||
ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
|
||||
@ -242,6 +253,7 @@ static int rte_init(void)
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
orte_process_info.num_nodes = u32;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "pmix_get_job_params");
|
||||
|
||||
/* setup transport keys in case the MPI layer needs them -
|
||||
* we can use the jobfam and stepid as unique keys
|
||||
@ -263,6 +275,7 @@ static int rte_init(void)
|
||||
/* cannot free the envar as that messes up our environ */
|
||||
free(string_key);
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "orte_precondition_transport");
|
||||
|
||||
/* retrieve temp directories info */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
|
||||
@ -320,6 +333,7 @@ static int rte_init(void)
|
||||
orte_process_info.rm_session_dirs = bool_val;
|
||||
}
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "pmix_set_tdirs");
|
||||
|
||||
/* get our local peers */
|
||||
if (0 < orte_process_info.num_local_peers) {
|
||||
@ -395,6 +409,7 @@ static int rte_init(void)
|
||||
free(mycpuset);
|
||||
}
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "pmix_set_locality");
|
||||
|
||||
/* now that we have all required info, complete the setup */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) {
|
||||
@ -402,12 +417,14 @@ static int rte_init(void)
|
||||
error = "orte_ess_base_app_setup";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "ess_base_app_setup");
|
||||
|
||||
/* setup process binding */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
|
||||
error = "proc_binding";
|
||||
goto error;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "ess_base_proc_binding");
|
||||
|
||||
/* this needs to be set to enable debugger use when direct launched */
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
@ -443,7 +460,8 @@ static int rte_init(void)
|
||||
opal_pmix.commit();
|
||||
opal_pmix.fence(NULL, 0);
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(rte_init, "rte_init_done");
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* Copyright (c) 2013-2018 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -17,6 +17,7 @@
|
||||
#include "oshmem/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "ompi/util/timings.h"
|
||||
|
||||
#include "oshmem/util/oshmem_util.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
@ -57,6 +58,8 @@ int mca_scoll_enable(void)
|
||||
{
|
||||
int ret = OSHMEM_SUCCESS;
|
||||
|
||||
OPAL_TIMING_ENV_INIT(mca_scoll_enable);
|
||||
|
||||
if (!mca_scoll_sync_array) {
|
||||
void* ptr = (void*) mca_scoll_sync_array;
|
||||
int i = 0;
|
||||
@ -69,16 +72,23 @@ int mca_scoll_enable(void)
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "memheap");
|
||||
|
||||
/* Note: it is done to support FCA only and we need to consider possibility to
|
||||
* find a way w/o this ugly hack
|
||||
*/
|
||||
if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_all))) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "group_all");
|
||||
|
||||
if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_self))) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "group_self");
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* Copyright (c) 2013-2018 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,6 +21,7 @@
|
||||
#include "oshmem/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
#include "ompi/util/timings.h"
|
||||
|
||||
#include "oshmem/util/oshmem_util.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
@ -194,6 +195,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
opal_list_item_t *item;
|
||||
int ret;
|
||||
|
||||
OPAL_TIMING_ENV_INIT(mca_scoll_base_select);
|
||||
|
||||
/* Announce */
|
||||
SCOLL_VERBOSE(10, "scoll:base:group_select: new group: %d", group->id);
|
||||
mca_scoll_base_group_unselect(group);
|
||||
@ -206,6 +209,9 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
group->g_scoll.scoll_alltoall = scoll_null_alltoall;
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "setup");
|
||||
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:base:group_select: Checking all available modules");
|
||||
selectable = check_components(&oshmem_scoll_base_framework.framework_components, group);
|
||||
@ -218,6 +224,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "check_components");
|
||||
|
||||
/* do the selection loop */
|
||||
for (item = opal_list_remove_first(selectable); NULL != item; item =
|
||||
opal_list_remove_first(selectable)) {
|
||||
@ -236,6 +244,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
OBJ_RELEASE(avail);
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "select_loop");
|
||||
|
||||
/* Done with the list from the check_components() call so release it. */
|
||||
OBJ_RELEASE(selectable);
|
||||
if ((NULL == group->g_scoll.scoll_barrier)
|
||||
@ -247,6 +257,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
return OSHMEM_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "release");
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2018 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -17,6 +17,7 @@
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
#include "ompi/mca/coll/base/base.h"
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
int mca_scoll_mpi_init_query(bool enable_progress_threads, bool enable_mpi_threads)
|
||||
{
|
||||
@ -121,20 +122,27 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
|
||||
if ((osh_group->proc_count < 2) || (osh_group->proc_count < cm->mpi_np)) {
|
||||
return NULL;
|
||||
}
|
||||
OPAL_TIMING_ENV_INIT(comm_query);
|
||||
|
||||
/* Create OMPI_Comm object and store ptr to it in group obj*/
|
||||
if (NULL == oshmem_group_all) {
|
||||
osh_group->ompi_comm = &(ompi_mpi_comm_world.comm);
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "ompi_mpi_comm_world");
|
||||
} else {
|
||||
err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
|
||||
return NULL;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "ompi_comm_group");
|
||||
|
||||
ranks = (int*) malloc(osh_group->proc_count * sizeof(int));
|
||||
if (OPAL_UNLIKELY(NULL == ranks)) {
|
||||
return NULL;
|
||||
}
|
||||
tag = 1;
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "malloc");
|
||||
|
||||
for (i = 0; i < osh_group->proc_count; i++) {
|
||||
ompi_proc_t* ompi_proc;
|
||||
for( int j = 0; j < ompi_group_size(parent_group); j++ ) {
|
||||
@ -146,24 +154,32 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "build_ranks");
|
||||
|
||||
err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
|
||||
free(ranks);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "ompi_group_incl");
|
||||
|
||||
err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
|
||||
free(ranks);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "ompi_comm_create_group");
|
||||
|
||||
err = ompi_group_free(&new_group);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
|
||||
free(ranks);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "ompi_group_free");
|
||||
|
||||
free(ranks);
|
||||
osh_group->ompi_comm = newcomm;
|
||||
OPAL_TIMING_ENV_NEXT(comm_query, "set_group_comm");
|
||||
}
|
||||
mpi_module = OBJ_NEW(mca_scoll_mpi_module_t);
|
||||
if (!mpi_module){
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include "opal/mca/allocator/base/base.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
#include "ompi/util/timings.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
@ -143,17 +144,26 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
|
||||
{
|
||||
int ret = OSHMEM_SUCCESS;
|
||||
|
||||
OMPI_TIMING_INIT(32);
|
||||
|
||||
if (!oshmem_shmem_initialized) {
|
||||
if (!ompi_mpi_initialized && !ompi_mpi_finalized) {
|
||||
ret = ompi_mpi_init(argc, argv, requested, provided);
|
||||
}
|
||||
OMPI_TIMING_NEXT("ompi_mpi_init");
|
||||
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
PMPI_Comm_dup(MPI_COMM_WORLD, &oshmem_comm_world);
|
||||
OMPI_TIMING_NEXT("PMPI_Comm_dup");
|
||||
|
||||
ret = _shmem_init(argc, argv, requested, provided);
|
||||
OMPI_TIMING_NEXT("_shmem_init");
|
||||
OMPI_TIMING_IMPORT_OPAL("mca_scoll_mpi_comm_query");
|
||||
OMPI_TIMING_IMPORT_OPAL("mca_scoll_enable");
|
||||
OMPI_TIMING_IMPORT_OPAL("mca_scoll_base_select");
|
||||
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
return ret;
|
||||
@ -164,11 +174,15 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
|
||||
SHMEM_API_ERROR( "shmem_lock_init() failed");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
OMPI_TIMING_NEXT("shmem_lock_init");
|
||||
|
||||
/* this is a collective op, implies barrier */
|
||||
MCA_MEMHEAP_CALL(get_all_mkeys());
|
||||
OMPI_TIMING_NEXT("get_all_mkeys()");
|
||||
|
||||
oshmem_shmem_preconnect_all();
|
||||
OMPI_TIMING_NEXT("shmem_preconnect_all");
|
||||
|
||||
#if OSHMEM_OPAL_THREAD_ENABLE
|
||||
pthread_t thread_id;
|
||||
int perr;
|
||||
@ -178,11 +192,14 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
#endif
|
||||
OMPI_TIMING_NEXT("THREAD_ENABLE");
|
||||
}
|
||||
#ifdef SIGUSR1
|
||||
signal(SIGUSR1,sighandler__SIGUSR1);
|
||||
signal(SIGTERM,sighandler__SIGTERM);
|
||||
#endif
|
||||
OMPI_TIMING_OUT;
|
||||
OMPI_TIMING_FINALIZE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user