Bring the timing instrumentation to the trunk.
If you want to look at our launch and MPI process startup times, you can do so with two MCA params: OMPI_MCA_orte_timing: set it to anything non-zero and you will get the launch time for different steps in the job launch procedure. The degree of detail depends on the launch environment. rsh will provide you with the average, min, and max launch time for the daemons. SLURM block launches the daemon, so you only get the time to launch the daemons and the total time to launch the job. Ditto for bproc. TM looks more like rsh. Only those four environments are currently supported - anyone interested in extending this capability to other environs is welcome to do so. In all cases, you also get the time to setup the job for launch. OMPI_MCA_ompi_timing: set it to anything non-zero and you will get the time for mpi_init to reach the compound registry command, the time to execute that command, the time to go from our stage1 barrier to the stage2 barrier, and the time to go from the stage2 barrier to the end of mpi_init. This will be output for each process, so you'll have to compile any statistics on your own. Note: if someone develops a nice parser to do so, it would be really appreciated if you could/would share! This commit was SVN r12302.
Этот коммит содержится в:
родитель
d6ff14ed61
Коммит
36d4511143
@ -208,7 +208,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
bool compound_cmd = false;
|
||||
bool timing = false;
|
||||
int param, value;
|
||||
struct timeval ompistart, ompistop;
|
||||
struct timeval ompistart, ompistop, stg2start, stg3start;
|
||||
|
||||
/* Join the run-time environment - do the things that don't hit
|
||||
the registry */
|
||||
@ -219,7 +219,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
}
|
||||
|
||||
/* check to see if we want timing information */
|
||||
param = mca_base_param_reg_int_name("orte", "timing",
|
||||
param = mca_base_param_reg_int_name("ompi", "timing",
|
||||
"Request that critical timing loops be measured",
|
||||
false, false, 0, &value);
|
||||
if (value != 0) {
|
||||
@ -506,9 +506,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
if (0 != gettimeofday(&ompistop, NULL)) {
|
||||
opal_output(0, "ompi_mpi_init: could not obtain stop time");
|
||||
} else {
|
||||
opal_output(0, "ompi_mpi_init: time from start to exec_compound_cmd %ld sec %ld usec",
|
||||
(long int)(ompistop.tv_sec - ompistart.tv_sec),
|
||||
(long int)(ompistop.tv_usec - ompistart.tv_usec));
|
||||
opal_output(0, "ompi_mpi_init: time from start to exec_compound_cmd %ld usec",
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
if (0 != gettimeofday(&ompistart, NULL)) {
|
||||
opal_output(0, "ompi_mpi_init: could not obtain new start time");
|
||||
ompistart.tv_sec = ompistop.tv_sec;
|
||||
@ -532,9 +532,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
if (0 != gettimeofday(&ompistop, NULL)) {
|
||||
opal_output(0, "ompi_mpi_init: could not obtain stop time after compound_cmd");
|
||||
} else {
|
||||
opal_output(0, "ompi_mpi_init: time to exec_compound_cmd %ld sec %ld usec",
|
||||
(long int)(ompistop.tv_sec - ompistart.tv_sec),
|
||||
(long int)(ompistop.tv_usec - ompistart.tv_usec));
|
||||
opal_output(0, "ompi_mpi_init: time to execute compound command %ld usec",
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -546,6 +546,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* check for timing request - get start time */
|
||||
if (timing) {
|
||||
if (0 != gettimeofday(&stg2start, NULL)) {
|
||||
opal_output(0, "ompi_mpi_init: could not obtain stop time after compound_cmd");
|
||||
}
|
||||
}
|
||||
|
||||
/* start PTL's */
|
||||
ret = MCA_PML_CALL(enable(true));
|
||||
if( OMPI_SUCCESS != ret ) {
|
||||
@ -638,6 +645,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (timing) {
|
||||
if (0 != gettimeofday(&ompistop, NULL)) {
|
||||
opal_output(0, "ompi_mpi_init: could not obtain stop time after compound_cmd");
|
||||
} else {
|
||||
opal_output(0, "ompi_mpi_init: time from stage1 to stage2 %ld usec",
|
||||
(long int)((ompistop.tv_sec - stg2start.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - stg2start.tv_usec)));
|
||||
}
|
||||
}
|
||||
|
||||
/* Second barrier -- wait for message from
|
||||
RMGR_PROC_STAGE_GATE_MGR to arrive */
|
||||
|
||||
@ -648,6 +666,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* check for timing request - get start time */
|
||||
if (timing) {
|
||||
if (0 != gettimeofday(&stg3start, NULL)) {
|
||||
opal_output(0, "ompi_mpi_init: could not obtain start time for stg3");
|
||||
}
|
||||
}
|
||||
|
||||
/* new very last step: check whether we have been spawned or not.
|
||||
We introduce that at the very end, since we need collectives,
|
||||
datatypes, ptls etc. up and running here.... */
|
||||
@ -705,5 +730,16 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
/* Do we need to wait for a TotalView-like debugger? */
|
||||
ompi_wait_for_totalview();
|
||||
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (timing) {
|
||||
if (0 != gettimeofday(&ompistop, NULL)) {
|
||||
opal_output(0, "ompi_mpi_init: could not obtain stop time at end");
|
||||
} else {
|
||||
opal_output(0, "ompi_mpi_init: time from stage2 to complete mpi_init %ld usec",
|
||||
(long int)((ompistop.tv_sec - stg3start.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - stg3start.tv_usec)));
|
||||
}
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
@ -511,13 +511,15 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
opal_list_t daemons;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
opal_list_item_t *item;
|
||||
struct timeval joblaunchstart, launchstart, launchstop;
|
||||
|
||||
OPAL_TRACE(1);
|
||||
|
||||
/* clean out any MCA component selection directives that
|
||||
* won't work on remote nodes
|
||||
*/
|
||||
orte_pls_base_purge_mca_params(envp);
|
||||
if (mca_pls_bproc_component.timing) {
|
||||
if (0 != gettimeofday(&joblaunchstart, NULL)) {
|
||||
opal_output(0, "pls_bproc: could not obtain start time");
|
||||
}
|
||||
}
|
||||
|
||||
/* setup a list that will contain the info for all the daemons
|
||||
* so we can store it on the registry when done
|
||||
@ -633,7 +635,25 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
|
||||
/* launch the daemons */
|
||||
mca_pls_bproc_component.num_daemons += num_daemons;
|
||||
|
||||
if (mca_pls_bproc_component.timing) {
|
||||
if (0 != gettimeofday(&launchstart, NULL)) {
|
||||
opal_output(0, "pls_bproc: could not obtain start time");
|
||||
}
|
||||
}
|
||||
|
||||
rc = bproc_vexecmove(num_daemons, daemon_list, pids, orted_path, argv, *envp);
|
||||
|
||||
if (mca_pls_bproc_component.timing) {
|
||||
if (0 != gettimeofday(&launchstop, NULL)) {
|
||||
opal_output(0, "pls_bproc: could not obtain stop time");
|
||||
} else {
|
||||
opal_output(0, "pls_bproc: daemon launch time is %ld usec",
|
||||
(launchstop.tv_sec - launchstart.tv_sec)*1000000 +
|
||||
(launchstop.tv_usec - launchstart.tv_usec));
|
||||
}
|
||||
}
|
||||
|
||||
if(rc != num_daemons) {
|
||||
opal_show_help("help-pls-bproc.txt", "daemon-launch-number", true,
|
||||
num_daemons, rc, orted_path);
|
||||
@ -699,6 +719,16 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
}
|
||||
*num_launched = num_daemons;
|
||||
|
||||
if (mca_pls_bproc_component.timing) {
|
||||
if (0 != gettimeofday(&launchstop, NULL)) {
|
||||
opal_output(0, "pls_bproc: could not obtain stop time");
|
||||
} else {
|
||||
opal_output(0, "pls_bproc: total job launch time is %ld usec",
|
||||
(launchstop.tv_sec - joblaunchstart.tv_sec)*1000000 +
|
||||
(launchstop.tv_usec - joblaunchstart.tv_usec));
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if(NULL != argv) {
|
||||
opal_argv_free(argv);
|
||||
|
@ -111,6 +111,8 @@ struct orte_pls_bproc_component_t {
|
||||
* we will look for it in the user's path */
|
||||
int debug;
|
||||
/**< If greater than 0 print debugging information */
|
||||
bool timing;
|
||||
/**< If true, report launch timing info */
|
||||
int num_procs;
|
||||
/**< The number of processes that are running */
|
||||
int priority;
|
||||
|
@ -54,7 +54,7 @@ orte_pls_bproc_component_t mca_pls_bproc_component = {
|
||||
* finishes setting up the component struct.
|
||||
*/
|
||||
int orte_pls_bproc_component_open(void) {
|
||||
int rc;
|
||||
int rc, tmp, value;
|
||||
char *policy;
|
||||
|
||||
/* init parameters */
|
||||
@ -89,6 +89,15 @@ int orte_pls_bproc_component_open(void) {
|
||||
mca_pls_bproc_component.bynode = false;
|
||||
}
|
||||
|
||||
tmp = mca_base_param_reg_int_name("orte", "timing",
|
||||
"Request that critical timing loops be measured",
|
||||
false, false, 0, &value);
|
||||
if (value != 0) {
|
||||
mca_pls_bproc_component.timing = true;
|
||||
} else {
|
||||
mca_pls_bproc_component.timing = false;
|
||||
}
|
||||
|
||||
/* init the list to hold the daemon names */
|
||||
rc = orte_pointer_array_init(&mca_pls_bproc_component.daemon_names, 8, 200000, 8);
|
||||
/* init the list to hold the daemon names */
|
||||
|
@ -63,6 +63,7 @@ struct orte_pls_rsh_component_t {
|
||||
bool debug;
|
||||
bool debug_malloc;
|
||||
bool debug_daemons;
|
||||
bool timing;
|
||||
bool reap;
|
||||
bool assume_same_shell;
|
||||
int delay;
|
||||
|
@ -108,7 +108,7 @@ orte_pls_rsh_component_t mca_pls_rsh_component = {
|
||||
|
||||
int orte_pls_rsh_component_open(void)
|
||||
{
|
||||
int tmp;
|
||||
int tmp, value;
|
||||
mca_base_component_t *c = &mca_pls_rsh_component.super.pls_version;
|
||||
|
||||
/* initialize globals */
|
||||
@ -140,12 +140,20 @@ int orte_pls_rsh_component_open(void)
|
||||
false, false, false, &tmp);
|
||||
mca_pls_rsh_component.debug = OPAL_INT_TO_BOOL(tmp);
|
||||
}
|
||||
|
||||
mca_base_param_reg_int_name("orte", "debug_daemons",
|
||||
"Whether or not to enable debugging daemons (0 or 1)",
|
||||
false, false, false, &tmp);
|
||||
"Whether or not to enable debugging of daemons (0 or 1)",
|
||||
false, false, false, &tmp);
|
||||
mca_pls_rsh_component.debug_daemons = OPAL_INT_TO_BOOL(tmp);
|
||||
|
||||
tmp = mca_base_param_reg_int_name("orte", "timing",
|
||||
"Request that critical timing loops be measured",
|
||||
false, false, 0, &value);
|
||||
if (value != 0) {
|
||||
mca_pls_rsh_component.timing = true;
|
||||
} else {
|
||||
mca_pls_rsh_component.timing = false;
|
||||
}
|
||||
|
||||
mca_base_param_reg_string(c, "orted",
|
||||
"The command name that the rsh pls component will invoke for the ORTE daemon",
|
||||
false, false, "orted",
|
||||
|
@ -41,9 +41,6 @@
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
@ -129,8 +126,14 @@ static const char * orte_pls_rsh_shell_name[] = {
|
||||
"unknown"
|
||||
};
|
||||
|
||||
/* local global storage of timing variables */
|
||||
static unsigned long mintime=999999999, miniter, maxtime=0, maxiter;
|
||||
static float avgtime=0.0;
|
||||
static struct timeval *launchstart;
|
||||
static struct timeval joblaunchstart, joblaunchstop;
|
||||
|
||||
/* local global storage of the list of active daemons */
|
||||
opal_list_t active_daemons;
|
||||
static opal_list_t active_daemons;
|
||||
|
||||
|
||||
/**
|
||||
@ -281,6 +284,8 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
orte_mapped_proc_t *proc;
|
||||
opal_list_item_t *item;
|
||||
int rc;
|
||||
unsigned long deltat;
|
||||
struct timeval launchstop;
|
||||
|
||||
/* if ssh exited abnormally, set the child processes to aborted
|
||||
and print something useful to the user. The usual reasons for
|
||||
@ -345,24 +350,66 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
} else {
|
||||
opal_output(0, "No extra status information is available: %d.", status);
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
|
||||
/* tell the system that this daemon is gone */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* remove the daemon from our local list */
|
||||
opal_list_remove_item(&active_daemons, &info->super);
|
||||
OBJ_RELEASE(info);
|
||||
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
|
||||
} /* if abnormal exit */
|
||||
|
||||
/* release any waiting threads */
|
||||
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
|
||||
/* tell the system that this daemon is gone */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
/* first check timing request */
|
||||
if (mca_pls_rsh_component.timing) {
|
||||
if (0 != gettimeofday(&launchstop, NULL)) {
|
||||
opal_output(0, "pls_rsh: could not obtain stop time");
|
||||
} else {
|
||||
deltat = (launchstop.tv_sec - launchstart[info->name->vpid].tv_sec)*1000000 +
|
||||
(launchstop.tv_usec - launchstart[info->name->vpid].tv_usec);
|
||||
avgtime = avgtime + deltat;
|
||||
if (deltat < mintime) {
|
||||
mintime = deltat;
|
||||
miniter = (unsigned long)info->name->vpid;
|
||||
}
|
||||
if (deltat > maxtime) {
|
||||
maxtime = deltat;
|
||||
maxiter = (unsigned long)info->name->vpid;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* remove the daemon from our local list */
|
||||
opal_list_remove_item(&active_daemons, &info->super);
|
||||
OBJ_RELEASE(info);
|
||||
|
||||
if (mca_pls_rsh_component.num_children-- >=
|
||||
mca_pls_rsh_component.num_concurrent ||
|
||||
mca_pls_rsh_component.num_children == 0) {
|
||||
opal_condition_signal(&mca_pls_rsh_component.cond);
|
||||
}
|
||||
|
||||
if (mca_pls_rsh_component.timing && mca_pls_rsh_component.num_children == 0) {
|
||||
if (0 != gettimeofday(&joblaunchstop, NULL)) {
|
||||
opal_output(0, "pls_rsh: could not obtain job launch stop time");
|
||||
} else {
|
||||
deltat = (joblaunchstop.tv_sec - joblaunchstart.tv_sec)*1000000 +
|
||||
(joblaunchstop.tv_usec - joblaunchstart.tv_usec);
|
||||
opal_output(0, "pls_rsh: total time to launch job is %lu usec", deltat);
|
||||
if (mintime < 999999999) {
|
||||
/* had at least one non-local node */
|
||||
avgtime = avgtime/opal_list_get_size(&active_daemons);
|
||||
opal_output(0, "pls_rsh: average time to launch one daemon %f usec", avgtime);
|
||||
opal_output(0, "pls_rsh: min time to launch a daemon was %lu usec for iter %lu", mintime, miniter);
|
||||
opal_output(0, "pls_rsh: max time to launch a daemon was %lu usec for iter %lu", maxtime, maxiter);
|
||||
} else {
|
||||
opal_output(0, "No nonlocal launches to report for timing info");
|
||||
}
|
||||
}
|
||||
free(launchstart);
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
|
||||
|
||||
}
|
||||
@ -387,7 +434,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
char *uri, *param;
|
||||
char **argv, **tmp;
|
||||
char *prefix_dir;
|
||||
char **env;
|
||||
int argc;
|
||||
int rc;
|
||||
sigset_t sigs;
|
||||
@ -396,6 +442,14 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
bool local_bash = false, local_csh = false;
|
||||
char *lib_base = NULL, *bin_base = NULL;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
|
||||
if (mca_pls_rsh_component.timing) {
|
||||
if (0 != gettimeofday(&joblaunchstart, NULL)) {
|
||||
opal_output(0, "pls_rsh: could not obtain start time");
|
||||
joblaunchstart.tv_sec = 0;
|
||||
joblaunchstart.tv_usec = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup a list that will contain the info for all the daemons
|
||||
* so we can store it on the registry when done and use it
|
||||
@ -414,17 +468,13 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
orte_dss.dump(0, map, ORTE_JOB_MAP);
|
||||
}
|
||||
|
||||
num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
|
||||
|
||||
if (mca_pls_rsh_component.debug_daemons &&
|
||||
mca_pls_rsh_component.num_concurrent < num_nodes) {
|
||||
/* we can't run in this situation, so pretty print the error
|
||||
* and exit
|
||||
*/
|
||||
* and exit
|
||||
*/
|
||||
opal_show_help("help-pls-rsh.txt", "deadlock-params",
|
||||
true, mca_pls_rsh_component.num_concurrent, num_nodes);
|
||||
OBJ_RELEASE(map);
|
||||
@ -648,63 +698,14 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
lib_base = opal_basename(OPAL_LIBDIR);
|
||||
bin_base = opal_basename(OPAL_BINDIR);
|
||||
|
||||
/* copy the environment so we can modify it with opal functions. The
|
||||
* environment is the same for all daemons, so we only need to do
|
||||
* this once
|
||||
*/
|
||||
env = opal_argv_copy(environ);
|
||||
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables
|
||||
*/
|
||||
if (NULL != prefix_dir) {
|
||||
char *oldenv, *newenv;
|
||||
|
||||
/* Reset PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
|
||||
oldenv = getenv("PATH");
|
||||
if (NULL != oldenv) {
|
||||
char *temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv );
|
||||
free( newenv );
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
|
||||
}
|
||||
free(newenv);
|
||||
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
|
||||
oldenv = getenv("LD_LIBRARY_PATH");
|
||||
if (NULL != oldenv) {
|
||||
char* temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free(newenv);
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
/* ensure we aren't the seed */
|
||||
param = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(param, "0", true, &env);
|
||||
free(param);
|
||||
|
||||
/* clean out any MCA component selection directives that
|
||||
* won't work on remote nodes
|
||||
*/
|
||||
orte_pls_base_purge_mca_params(&env);
|
||||
|
||||
/*
|
||||
* Iterate through each of the nodes
|
||||
*/
|
||||
if (mca_pls_rsh_component.timing) {
|
||||
/* allocate space to track the start times */
|
||||
launchstart = (struct timeval*)malloc((num_nodes+vpid) * sizeof(struct timeval));
|
||||
}
|
||||
|
||||
for(n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
@ -712,9 +713,15 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
pid_t pid;
|
||||
char *exec_path;
|
||||
char **exec_argv;
|
||||
|
||||
|
||||
rmaps_node = (orte_mapped_node_t*)n_item;
|
||||
|
||||
|
||||
if (mca_pls_rsh_component.timing) {
|
||||
if (0 != gettimeofday(&launchstart[vpid], NULL)) {
|
||||
opal_output(0, "pls_rsh: could not obtain start time");
|
||||
}
|
||||
}
|
||||
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
@ -750,7 +757,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* rsh a child to exec the rsh/ssh session */
|
||||
/* fork a child to exec the rsh/ssh session */
|
||||
|
||||
/* set the process state to "launched" */
|
||||
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(name, ORTE_PROC_STATE_LAUNCHED, 0))) {
|
||||
@ -767,6 +774,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
/* child */
|
||||
if (pid == 0) {
|
||||
char* name_string;
|
||||
char** env;
|
||||
char* var;
|
||||
long fd, fdmax = sysconf(_SC_OPEN_MAX);
|
||||
|
||||
@ -795,6 +803,15 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
opal_output(0, "pls:rsh: %s is a LOCAL node\n",
|
||||
rmaps_node->nodename);
|
||||
}
|
||||
if (mca_pls_rsh_component.timing) {
|
||||
/* since this is a local launch, the daemon will never reach
|
||||
* the waitpid callback - so set the start value to
|
||||
* something nonsensical
|
||||
*/
|
||||
launchstart[vpid].tv_sec = 0;
|
||||
launchstart[vpid].tv_usec = 0;
|
||||
}
|
||||
|
||||
exec_argv = &argv[local_exec_index];
|
||||
exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL);
|
||||
|
||||
@ -816,7 +833,45 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. We're
|
||||
already in the child process, so it's ok to modify
|
||||
environ. */
|
||||
if (NULL != prefix_dir) {
|
||||
char *oldenv, *newenv;
|
||||
|
||||
/* Reset PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
|
||||
oldenv = getenv("PATH");
|
||||
if (NULL != oldenv) {
|
||||
char *temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv );
|
||||
free( newenv );
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("PATH", newenv, true, &environ);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
|
||||
}
|
||||
free(newenv);
|
||||
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
|
||||
oldenv = getenv("LD_LIBRARY_PATH");
|
||||
if (NULL != oldenv) {
|
||||
char* temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free(newenv);
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
/* Since this is a local execution, we need to
|
||||
potentially whack the final ")" in the argv (if
|
||||
sh/csh conditionals, from above). Note that we're
|
||||
@ -846,7 +901,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
}
|
||||
/* Ignore errors -- what are we going to do?
|
||||
(and we ignore errors on the remote nodes
|
||||
in the odls, so this is consistent) */
|
||||
in the fork pls, so this is consistent) */
|
||||
chdir(var);
|
||||
}
|
||||
} else {
|
||||
@ -938,6 +993,11 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
sigprocmask(0, 0, &sigs);
|
||||
sigprocmask(SIG_UNBLOCK, &sigs, 0);
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
param = opal_argv_join(exec_argv, ' ');
|
||||
@ -946,7 +1006,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
|
||||
execve(exec_path, exec_argv, env);
|
||||
opal_output(0, "pls:rsh: execv failed with errno=%d\n", errno);
|
||||
exit(-1);
|
||||
@ -1001,7 +1060,6 @@ cleanup:
|
||||
|
||||
free(jobid_string); /* done with this variable */
|
||||
opal_argv_free(argv);
|
||||
opal_argv_free(env);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ extern "C" {
|
||||
orte_pls_base_component_t super;
|
||||
int priority;
|
||||
int debug;
|
||||
bool timing;
|
||||
char *orted;
|
||||
char *custom_args;
|
||||
};
|
||||
|
@ -103,6 +103,7 @@ orte_pls_slurm_component_t mca_pls_slurm_component = {
|
||||
static int pls_slurm_open(void)
|
||||
{
|
||||
mca_base_component_t *comp = &mca_pls_slurm_component.super.pls_version;
|
||||
int tmp, value;
|
||||
|
||||
mca_base_param_reg_int(comp, "debug", "Enable debugging of slurm pls",
|
||||
false, false, 0,
|
||||
@ -122,6 +123,15 @@ static int pls_slurm_open(void)
|
||||
false, false, "orted",
|
||||
&mca_pls_slurm_component.orted);
|
||||
|
||||
tmp = mca_base_param_reg_int_name("orte", "timing",
|
||||
"Request that critical timing loops be measured",
|
||||
false, false, 0, &value);
|
||||
if (value != 0) {
|
||||
mca_pls_slurm_component.timing = true;
|
||||
} else {
|
||||
mca_pls_slurm_component.timing = false;
|
||||
}
|
||||
|
||||
mca_base_param_reg_string(comp, "args",
|
||||
"Custom arguments to srun",
|
||||
false, false, NULL,
|
||||
|
@ -38,6 +38,9 @@
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
@ -132,7 +135,14 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
char *cur_prefix;
|
||||
opal_list_t daemons;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
struct timeval joblaunchstart, launchstart, launchstop;
|
||||
|
||||
if (mca_pls_slurm_component.timing) {
|
||||
if (0 != gettimeofday(&joblaunchstart, NULL)) {
|
||||
opal_output(0, "pls_slurm: could not obtain job start time");
|
||||
}
|
||||
}
|
||||
|
||||
/* setup a list that will contain the info for all the daemons
|
||||
* so we can store it on the registry when done
|
||||
*/
|
||||
@ -369,18 +379,33 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
var = mca_base_param_environ_variable("seed", NULL, NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* clean out any MCA component selection directives that
|
||||
* won't work on remote nodes
|
||||
*/
|
||||
orte_pls_base_purge_mca_params(&env);
|
||||
if (mca_pls_slurm_component.timing) {
|
||||
if (0 != gettimeofday(&launchstart, NULL)) {
|
||||
opal_output(0, "pls_slurm: could not obtain start time");
|
||||
}
|
||||
}
|
||||
|
||||
/* exec the daemon */
|
||||
rc = pls_slurm_start_proc(argc, argv, env, cur_prefix);
|
||||
|
||||
if (mca_pls_slurm_component.timing) {
|
||||
if (0 != gettimeofday(&launchstop, NULL)) {
|
||||
opal_output(0, "pls_slurm: could not obtain stop time");
|
||||
} else {
|
||||
opal_output(0, "pls_slurm: daemon block launch time is %ld usec",
|
||||
(launchstop.tv_sec - launchstart.tv_sec)*1000000 +
|
||||
(launchstop.tv_usec - launchstart.tv_usec));
|
||||
opal_output(0, "pls_slurm: total job launch time is %ld usec",
|
||||
(launchstop.tv_sec - joblaunchstart.tv_sec)*1000000 +
|
||||
(launchstop.tv_usec - joblaunchstart.tv_usec));
|
||||
}
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:slurm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
|
||||
/* JMS: short we stash the srun pid in the gpr somewhere for cleanup? */
|
||||
/* JMS: how do we catch when srun dies? */
|
||||
|
||||
|
@ -37,6 +37,7 @@ extern "C" {
|
||||
bool want_path_check;
|
||||
char *orted;
|
||||
char **checked_paths;
|
||||
bool timing;
|
||||
};
|
||||
typedef struct orte_pls_tm_component_t orte_pls_tm_component_t;
|
||||
|
||||
|
@ -95,7 +95,7 @@ orte_pls_tm_component_t mca_pls_tm_component = {
|
||||
|
||||
static int pls_tm_open(void)
|
||||
{
|
||||
int tmp;
|
||||
int tmp, value;
|
||||
mca_base_component_t *comp = &mca_pls_tm_component.super.pls_version;
|
||||
|
||||
mca_base_param_reg_int(comp, "debug", "Enable debugging of the TM pls",
|
||||
@ -114,7 +114,16 @@ static int pls_tm_open(void)
|
||||
"Whether the launching process should check for the pls_tm_orted executable in the PATH before launching (the TM API does not give an idication of failure; this is a somewhat-lame workaround; non-zero values enable this check)",
|
||||
false, false, (int) true, &tmp);
|
||||
mca_pls_tm_component.want_path_check = (bool) tmp;
|
||||
|
||||
|
||||
tmp = mca_base_param_reg_int_name("orte", "timing",
|
||||
"Request that critical timing loops be measured",
|
||||
false, false, 0, &value);
|
||||
if (value != 0) {
|
||||
mca_pls_tm_component.timing = true;
|
||||
} else {
|
||||
mca_pls_tm_component.timing = false;
|
||||
}
|
||||
|
||||
mca_pls_tm_component.checked_paths = NULL;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -30,11 +30,18 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <signal.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_WAIT_H
|
||||
#include <sys/wait.h>
|
||||
#endif
|
||||
#ifdef HAVE_SCHED_H
|
||||
#include <sched.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <tm.h>
|
||||
|
||||
@ -138,8 +145,19 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
tm_event_t event;
|
||||
opal_list_t daemons;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
|
||||
/* Query the map for this job.
|
||||
struct timeval launchstart, launchstop, completionstart, completionstop;
|
||||
struct timeval jobstart, jobstop;
|
||||
int maxtime=0, mintime=99999999, maxiter, miniter, deltat;
|
||||
float avgtime=0.0;
|
||||
|
||||
/* check for timing request - get start time if so */
|
||||
if (mca_pls_tm_component.timing) {
|
||||
if (0 != gettimeofday(&jobstart, NULL)) {
|
||||
opal_output(0, "pls_tm: could not obtain job start time");
|
||||
}
|
||||
}
|
||||
|
||||
/* Query the map for this job.
|
||||
* We need the entire mapping for a couple of reasons:
|
||||
* - need the prefix to start with.
|
||||
* - need to know if we are launching on a subset of the allocated nodes
|
||||
@ -397,6 +415,15 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
}
|
||||
}
|
||||
|
||||
/* check for timing request - get start time if so */
|
||||
if (mca_pls_tm_component.timing) {
|
||||
if (0 != gettimeofday(&launchstart, NULL)) {
|
||||
opal_output(0, "pls_tm: could not obtain start time");
|
||||
launchstart.tv_sec = 0;
|
||||
launchstart.tv_usec = 0;
|
||||
}
|
||||
}
|
||||
|
||||
rc = pls_tm_start_proc(node->nodename, argc, argv, env,
|
||||
tm_task_ids + launched,
|
||||
tm_events + launched);
|
||||
@ -404,6 +431,25 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
opal_output(0, "pls:tm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
/* check for timing request - get stop time and process if so */
|
||||
if (mca_pls_tm_component.timing) {
|
||||
if (0 != gettimeofday(&launchstop, NULL)) {
|
||||
opal_output(0, "pls_tm: could not obtain stop time");
|
||||
} else {
|
||||
deltat = (launchstop.tv_sec - launchstart.tv_sec)*1000000 +
|
||||
(launchstop.tv_usec - launchstart.tv_usec);
|
||||
avgtime = avgtime + deltat / num_nodes;
|
||||
if (deltat < mintime) {
|
||||
mintime = deltat;
|
||||
miniter = launched;
|
||||
}
|
||||
if (deltat > maxtime) {
|
||||
maxtime = deltat;
|
||||
maxiter = launched;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
launched++;
|
||||
++vpid;
|
||||
free(name);
|
||||
@ -415,6 +461,15 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
opal_output(0, "pls:tm:launch: finished spawning orteds\n");
|
||||
}
|
||||
|
||||
/* check for timing request - get start time for launch completion */
|
||||
if (mca_pls_tm_component.timing) {
|
||||
if (0 != gettimeofday(&completionstart, NULL)) {
|
||||
opal_output(0, "pls_tm: could not obtain completion start time");
|
||||
completionstart.tv_sec = 0;
|
||||
completionstart.tv_usec = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* all done, so store the daemon info on the registry */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -429,7 +484,23 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* check for timing request - get stop time for launch completion and report */
|
||||
if (mca_pls_tm_component.timing) {
|
||||
if (0 != gettimeofday(&completionstop, NULL)) {
|
||||
opal_output(0, "pls_tm: could not obtain completion stop time");
|
||||
} else {
|
||||
deltat = (launchstop.tv_sec - launchstart.tv_sec)*1000000 +
|
||||
(launchstop.tv_usec - launchstart.tv_usec);
|
||||
opal_output(0, "pls_tm: launch completion required %d usec", deltat);
|
||||
}
|
||||
opal_output(0, "pls_tm: Launch statistics:");
|
||||
opal_output(0, "pls_tm: Average time to launch an orted: %f usec", avgtime);
|
||||
opal_output(0, "pls_tm: Max time to launch an orted: %d usec at iter %d", maxtime, maxiter);
|
||||
opal_output(0, "pls_tm: Min time to launch an orted: %d usec at iter %d", mintime, miniter);
|
||||
}
|
||||
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(map);
|
||||
|
||||
@ -456,6 +527,17 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
}
|
||||
OBJ_DESTRUCT(&daemons);
|
||||
|
||||
/* check for timing request - get stop time and process if so */
|
||||
if (mca_pls_tm_component.timing) {
|
||||
if (0 != gettimeofday(&jobstop, NULL)) {
|
||||
opal_output(0, "pls_tm: could not obtain stop time");
|
||||
} else {
|
||||
deltat = (jobstop.tv_sec - jobstart.tv_sec)*1000000 +
|
||||
(jobstop.tv_usec - jobstart.tv_usec);
|
||||
opal_output(0, "pls_tm: launch of entire job required %d usec", deltat);
|
||||
}
|
||||
}
|
||||
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm:launch: finished\n");
|
||||
}
|
||||
|
@ -408,9 +408,9 @@ static int orte_rmgr_urm_spawn_job(
|
||||
if (0 != gettimeofday(&urmstop, NULL)) {
|
||||
opal_output(0, "rmgr_urm: could not obtain stop time");
|
||||
} else {
|
||||
opal_output(0, "rmgr_urm: job setup time is %ld sec %ld usec",
|
||||
(long int)(urmstop.tv_sec - urmstart.tv_sec),
|
||||
(long int)(urmstop.tv_usec - urmstart.tv_usec));
|
||||
opal_output(0, "rmgr_urm: job setup time is %ld usec",
|
||||
(long int)((urmstop.tv_sec - urmstart.tv_sec)*1000000 +
|
||||
(urmstop.tv_usec - urmstart.tv_usec)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -422,7 +422,18 @@ static int orte_rmgr_urm_spawn_job(
|
||||
return rc;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
/* check for timing request - get start time if so */
|
||||
if (mca_rmgr_urm_component.timing) {
|
||||
if (0 != gettimeofday(&urmstart, NULL)) {
|
||||
opal_output(0, "rmgr_urm: could not obtain launch stop time");
|
||||
} else {
|
||||
opal_output(0, "rmgr_urm: launch time is %ld usec",
|
||||
(long int)((urmstart.tv_sec - urmstop.tv_sec)*1000000 +
|
||||
(urmstart.tv_usec - urmstop.tv_usec)));
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user