1
1

Continue to refine the timing instrumentation to identify where launch time is being spent

This commit was SVN r20244.
Этот коммит содержится в:
Ralph Castain 2009-01-12 19:12:58 +00:00
родитель 56ad7acc8a
Коммит 2778c13fac
8 изменённых файлов: 231 добавлений и 60 удалений

Просмотреть файл

@ -485,6 +485,12 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
opal_list_append(&orte_local_jobdata, &jobdat->super);
}
/* if we are doing a timing test, store the time the msg was recvd */
if (orte_timing) {
jobdat->launch_msg_recvd.tv_sec = orte_daemon_msg_recvd.tv_sec;
jobdat->launch_msg_recvd.tv_usec = orte_daemon_msg_recvd.tv_usec;
}
/* UNPACK JOB-SPECIFIC DATA */
/* unpack the number of procs in this launch */
cnt=1;
@ -896,7 +902,7 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
return ORTE_SUCCESS;
}
static int pack_state_for_proc(opal_buffer_t *alert, bool pack_pid, orte_odls_child_t *child)
static int pack_state_for_proc(opal_buffer_t *alert, bool include_startup_info, orte_odls_child_t *child)
{
int rc;
@ -905,12 +911,26 @@ static int pack_state_for_proc(opal_buffer_t *alert, bool pack_pid, orte_odls_ch
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack its pid if we need to report it */
if (pack_pid) {
/* pack startup info if we need to report it */
if (include_startup_info) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->pid, 1, OPAL_PID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we are timing things, pack the time the proc was launched */
if (orte_timing) {
int64_t tmp;
tmp = child->starttime.tv_sec;
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &tmp, 1, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
return rc;
}
tmp = child->starttime.tv_usec;
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &tmp, 1, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
/* pack its state */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->state, 1, ORTE_PROC_STATE))) {
@ -926,7 +946,7 @@ static int pack_state_for_proc(opal_buffer_t *alert, bool pack_pid, orte_odls_ch
return ORTE_SUCCESS;
}
static int pack_state_update(opal_buffer_t *alert, bool pack_pid, orte_jobid_t job)
static int pack_state_update(opal_buffer_t *alert, bool include_startup_info, orte_odls_job_t *jobdat)
{
int rc;
opal_list_item_t *item;
@ -934,17 +954,31 @@ static int pack_state_update(opal_buffer_t *alert, bool pack_pid, orte_jobid_t j
orte_vpid_t null=ORTE_VPID_INVALID;
/* pack the jobid */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &job, 1, ORTE_JOBID))) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &jobdat->jobid, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we are timing things, pack the time the launch msg for this job was recvd */
if (include_startup_info && orte_timing) {
int64_t tmp;
tmp = jobdat->launch_msg_recvd.tv_sec;
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &tmp, 1, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
return rc;
}
tmp = jobdat->launch_msg_recvd.tv_usec;
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &tmp, 1, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
for (item = opal_list_get_first(&orte_local_children);
item != opal_list_get_end(&orte_local_children);
item = opal_list_get_next(item)) {
child = (orte_odls_child_t*)item;
/* if this child is part of the job... */
if (child->name->jobid == job) {
if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, pack_pid, child))) {
if (child->name->jobid == jobdat->jobid) {
if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, include_startup_info, child))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -1200,6 +1234,11 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
}
free(param);
/* if we are timing things, record when we are going to launch this proc */
if (orte_timing) {
gettimeofday(&child->starttime, NULL);
}
/* must unlock prior to fork to keep things clean in the
* event library
*/
@ -1265,7 +1304,7 @@ CLEANUP:
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(job)));
/* pack the launch results */
if (ORTE_SUCCESS != (ret = pack_state_update(&alert, true, job))) {
if (ORTE_SUCCESS != (ret = pack_state_update(&alert, true, jobdat))) {
ORTE_ERROR_LOG(ret);
}
@ -1742,6 +1781,8 @@ static void check_proc_complete(orte_odls_child_t *child)
int rc;
opal_buffer_t alert;
orte_plm_cmd_flag_t cmd=ORTE_PLM_UPDATE_PROC_STATE;
opal_list_item_t *item;
orte_odls_job_t *jdat;
/* is this proc fully complete? */
if (!child->waitpid_recvd || !child->iof_complete) {
@ -1817,7 +1858,17 @@ static void check_proc_complete(orte_odls_child_t *child)
goto unlock;
}
/* pack the data for the job */
if (ORTE_SUCCESS != (rc = pack_state_update(&alert, false, child->name->jobid))) {
for (item = opal_list_get_first(&orte_local_jobdata);
item != opal_list_get_end(&orte_local_jobdata);
item = opal_list_get_next(item)) {
jdat = (orte_odls_job_t*)item;
/* is this the specified job? */
if (jdat->jobid == child->name->jobid) {
break;
}
}
if (ORTE_SUCCESS != (rc = pack_state_update(&alert, false, jdat))) {
ORTE_ERROR_LOG(rc);
goto unlock;
}

Просмотреть файл

@ -23,6 +23,10 @@
#include "orte_config.h"
#include "orte/types.h"
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "opal/class/opal_list.h"
#include "opal/dss/dss_types.h"
#include "orte/mca/grpcomm/grpcomm_types.h"
@ -89,6 +93,7 @@ typedef struct {
char *slot_list; /* list of slots for this child */
bool waitpid_recvd; /* waitpid has detected proc termination */
bool iof_complete; /* IOF has noted proc terminating all channels */
struct timeval starttime; /* when the proc was started - for timing purposes only */
} orte_odls_child_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_child_t);
@ -112,6 +117,7 @@ typedef struct orte_odls_job_t {
int32_t num_contributors;
int num_participating;
int num_collected;
struct timeval launch_msg_recvd; /* when the launch msg for this job was recvd - for timing purposes only */
} orte_odls_job_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_job_t);

Просмотреть файл

@ -59,6 +59,8 @@
static int orte_plm_base_report_launched(orte_jobid_t job);
static char *pretty_print_timing(int64_t secs, int64_t usecs);
int orte_plm_base_setup_job(orte_job_t *jdata)
{
orte_job_t *jdatorted;
@ -183,7 +185,7 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
return ORTE_SUCCESS;
}
static struct timeval app_launch_start, app_launch_stop;
static struct timeval app_launch_start, app_launch_stop, launch_msg_sent;
int orte_plm_base_launch_apps(orte_jobid_t job)
{
@ -227,6 +229,11 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
return rc;
}
/* if we are timing, record the time we send this message */
if (orte_timing) {
gettimeofday(&launch_msg_sent, NULL);
}
/* send the command to the daemons */
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid,
buffer, ORTE_RML_TAG_DAEMON))) {
@ -246,23 +253,13 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
}
if (orte_timing) {
unsigned long maxsec, maxusec, minutes, seconds;
float fsecs;
int64_t maxsec, maxusec;
gettimeofday(&app_launch_stop, NULL);
/* subtract starting time to get time in microsecs for test */
maxsec = app_launch_stop.tv_sec - app_launch_start.tv_sec;
maxusec = app_launch_stop.tv_usec - app_launch_start.tv_usec;
/* pretty-print the result */
seconds = maxsec + (maxusec / 1000000l);
minutes = seconds / 60l;
seconds = seconds % 60l;
if (0 == minutes && 0 == seconds) {
fsecs = ((float)(maxsec)*1000000.0 + (float)maxusec) / 1000.0;
fprintf(orte_timing_output, "Time to launch apps: %8.2f millisecs\n", fsecs);
} else {
fprintf(orte_timing_output, "Time to launch apps: %3lu:%02lu min:sec\n", minutes, seconds);
}
fprintf(orte_timing_output, "Time to launch apps: %s\n", pretty_print_timing(maxsec, maxusec));
}
/* complete wiring up the iof */
@ -444,6 +441,9 @@ static void process_orted_launch_report(int fd, short event, void *data)
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
if (orte_timing_details) {
}
/* save the latest daemon to start */
if (startsec > daemonlaunchtime.tv_sec) {
@ -488,13 +488,7 @@ static void process_orted_launch_report(int fd, short event, void *data)
goto CLEANUP;
}
/* check the time for the callback to complete and save the longest */
secs = recvtime.tv_sec - setupsec;
if (setupusec <= recvtime.tv_usec) {
usecs = recvtime.tv_usec - setupusec;
} else {
secs--;
usecs = 1000000 - setupusec + recvtime.tv_usec;
}
ORTE_COMPUTE_TIME_DIFF(secs, usecs, setupsec, setupusec, recvtime.tv_sec, recvtime.tv_usec);
if (secs > daemoncbtime.tv_sec) {
daemoncbtime.tv_sec = secs;
daemoncbtime.tv_usec = usecs;
@ -565,8 +559,6 @@ static void orted_report_launch(int status, orte_process_name_t* sender,
int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons)
{
int rc;
unsigned long minutes, seconds;
float fsecs;
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:daemon_callback",
@ -598,35 +590,16 @@ int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons)
/* if we are timing, output the results */
if (orte_timing) {
seconds = (daemonlaunchtime.tv_sec - orte_plm_globals.daemonlaunchstart.tv_sec) +
((daemonlaunchtime.tv_usec - orte_plm_globals.daemonlaunchstart.tv_usec) / 1000000l);
minutes = seconds / 60l;
seconds = seconds % 60l;
if (0 == minutes && 0 == seconds) {
fsecs = ((float)(daemonlaunchtime.tv_sec - orte_plm_globals.daemonlaunchstart.tv_sec)*1000000.0 +
(float)(daemonlaunchtime.tv_usec - orte_plm_globals.daemonlaunchstart.tv_usec)) / 1000.0;
fprintf(orte_timing_output, "Daemon initial launch was completed in %8.2f millisecs\n", fsecs);
} else {
fprintf(orte_timing_output, "Daemon initial launch was completed in %3lu:%02lu min:sec\n", minutes, seconds);
}
seconds = daemonsetuptime.tv_sec + (daemonsetuptime.tv_usec / 1000000l);
minutes = seconds / 60l;
seconds = seconds % 60l;
if (0 == minutes && 0 == seconds) {
fsecs = ((float)(daemonsetuptime.tv_sec)*1000000.0 + (float)daemonsetuptime.tv_usec) / 1000.0;
fprintf(orte_timing_output, "Daemon setup was completed in a maximum of %8.2f millisecs\n", fsecs);
} else {
fprintf(orte_timing_output, "Daemon setup was completed in a maximum of %3lu:%02lu min:sec\n", minutes, seconds);
}
seconds = daemoncbtime.tv_sec + (daemoncbtime.tv_usec / 1000000l);
minutes = seconds / 60l;
seconds = seconds % 60l;
if (0 == minutes && 0 == seconds) {
fsecs = ((float)(daemoncbtime.tv_sec)*1000000.0 + (float)daemoncbtime.tv_usec) / 1000.0;
fprintf(orte_timing_output, "Daemon callback to HNP was completed in a maximum of %8.3f millisecs\n", fsecs);
} else {
fprintf(orte_timing_output, "Daemon callback to HNP was completed in a maximum of %3lu:%02lu min:sec\n", minutes, seconds);
}
int64_t sec, usec;
ORTE_COMPUTE_TIME_DIFF(sec, usec, orte_plm_globals.daemonlaunchstart.tv_sec,
orte_plm_globals.daemonlaunchstart.tv_usec,
daemonlaunchtime.tv_sec, daemonlaunchtime.tv_usec);
fprintf(orte_timing_output, "Daemon launch was completed in %s\n",
pretty_print_timing(sec, usec));
fprintf(orte_timing_output, "Daemon setup (from first exec statement to ready-for-commands) was completed in a maximum of %s\n",
pretty_print_timing(daemonsetuptime.tv_sec, daemonsetuptime.tv_usec));
fprintf(orte_timing_output, "Daemon callback message to HNP took a maximum time of %s to reach the HNP\n",
pretty_print_timing(daemoncbtime.tv_sec, daemoncbtime.tv_usec));
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
@ -646,6 +619,7 @@ int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons)
* for their local procs.
*/
static bool app_launch_failed;
static struct timeval max_daemon_launch_msg_recvd = {0,0};
/* since the HNP also reports launch of procs, we need to separate out
* the processing of the message vs its receipt so that the HNP
@ -697,6 +671,42 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
}
procs = (orte_proc_t**)(jdata->procs->addr);
/* if we are timing, the daemon will have included the time it
* recvd the launch msg - the maximum time between when we sent
* that message and a daemon recvd it tells us the time reqd
* to wireup the daemon comm network
*/
if (orte_timing) {
int64_t tmpsec, tmpusec;
cnt = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &tmpsec, &cnt, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
app_launch_failed = true;
goto CLEANUP;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &tmpusec, &cnt, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
app_launch_failed = true;
goto CLEANUP;
}
/* keep the maximum time */
if (tmpsec > max_daemon_launch_msg_recvd.tv_sec) {
max_daemon_launch_msg_recvd.tv_sec = tmpsec;
max_daemon_launch_msg_recvd.tv_usec = tmpusec;
} else if (tmpsec == max_daemon_launch_msg_recvd.tv_sec &&
tmpusec > max_daemon_launch_msg_recvd.tv_usec) {
max_daemon_launch_msg_recvd.tv_usec = tmpusec;
}
if (orte_timing_details) {
int64_t sec, usec;
ORTE_COMPUTE_TIME_DIFF(sec, usec, launch_msg_sent.tv_sec, launch_msg_sent.tv_usec,
tmpsec, tmpusec);
fprintf(orte_timing_output, "Time for launch msg to reach daemon %s: %s\n",
ORTE_VPID_PRINT(mev->sender.vpid), pretty_print_timing(sec, usec));
}
}
/* the daemon will report the vpid, state, and pid of each
* process it launches - we need the pid in particular so
* that any debuggers can attach to the process
@ -714,6 +724,32 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
app_launch_failed = true;
goto CLEANUP;
}
/* if we are timing things, unpack the time this proc was started */
if (orte_timing) {
int64_t tmpsec, tmpusec;
cnt = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &tmpsec, &cnt, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
app_launch_failed = true;
goto CLEANUP;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &tmpusec, &cnt, OPAL_INT64))) {
ORTE_ERROR_LOG(rc);
app_launch_failed = true;
goto CLEANUP;
}
if (orte_timing_details) {
time_t tmptime;
char *tmpstr;
tmptime = tmpsec;
tmpstr = ctime(&tmptime);
/* remove the newline and the year at the end */
tmpstr[strlen(tmpstr)-6] = '\0';
fprintf(orte_timing_output, "Time rank %s was launched: %s.%3lu\n",
ORTE_VPID_PRINT(vpid), tmpstr, (unsigned long)(tmpusec/1000));
}
}
/* unpack the state */
cnt = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &state, &cnt, ORTE_PROC_STATE))) {
@ -1260,3 +1296,24 @@ CHECK_ALL_JOBS:
}
}
static char timestring[128];
static char *pretty_print_timing(int64_t secs, int64_t usecs)
{
unsigned long minutes, seconds;
float fsecs;
seconds = secs + (usecs / 1000000l);
minutes = seconds / 60l;
seconds = seconds % 60l;
if (0 == minutes && 0 == seconds) {
fsecs = ((float)(secs)*1000000.0 + (float)usecs) / 1000.0;
snprintf(timestring, 128, "%8.2f millisecs", fsecs);
} else {
snprintf(timestring, 128, "%3lu:%02lu min:sec", minutes, seconds);
}
return timestring;
}

Просмотреть файл

@ -22,6 +22,10 @@
#include "orte_config.h"
#include "orte/types.h"
#ifdef HAVE_TIME_H
#include <time.h>
#endif
#include "opal/dss/dss_types.h"
#include "orte/mca/rml/rml_types.h"
@ -38,6 +42,9 @@ ORTE_DECLSPEC void orte_daemon_recv(int status, orte_process_name_t* sender,
/* direct cmd processing entry point - used by HNP */
ORTE_DECLSPEC void orte_daemon_cmd_processor(int fd, short event, void *data);
/* a time flag that needs to be visible elsewhere */
ORTE_DECLSPEC extern struct timeval orte_daemon_msg_recvd;
END_C_DECLS
#endif /* ORTED_H */

Просмотреть файл

@ -36,6 +36,9 @@
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#ifdef HAVE_TIME_H
#include <time.h>
#endif
#include "opal/class/opal_pointer_array.h"
@ -88,6 +91,10 @@ static int process_commands(orte_process_name_t* sender,
orte_rml_tag_t tag);
/* instantiate this - it is shared via orted.h */
struct timeval orte_daemon_msg_recvd;
static struct timeval mesg_recvd={0,0};
/* local callback function for non-blocking sends */
static void send_callback(int status, orte_process_name_t *peer,
@ -270,6 +277,11 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
if (orte_timing) {
/* pickup the time the message was recvd by this daemon */
gettimeofday(&mesg_recvd, NULL);
}
/* don't process this right away - we need to get out of the recv before
* we process the message as it may ask us to do something that involves
* more messaging! Instead, setup an event so that the message gets processed
@ -343,6 +355,13 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
wait_time = 1;
num_recursions = 0;
if (orte_timing && orte_process_info.hnp) {
/* if we are doing timing, and we are the HNP, then the message doesn't come
* through the RML recv, so we have to pickup the recv time here
*/
gettimeofday(&mesg_recvd, NULL);
}
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s orte:daemon:cmd:processor called by %s for tag %ld",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -393,6 +412,11 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
}
/* flag this location */
save = buffer->unpack_ptr;
/* store the time the cmd was recvd */
if (orte_timing) {
orte_daemon_msg_recvd.tv_sec = mesg_recvd.tv_sec;
orte_daemon_msg_recvd.tv_usec = mesg_recvd.tv_usec;
}
}
/* rewind the buffer to the beginning */

Просмотреть файл

@ -47,6 +47,7 @@
/* globals used by RTE */
bool orte_timing;
FILE *orte_timing_output = NULL;
bool orte_timing_details;
bool orte_debug_daemons_file_flag = false;
bool orte_leave_session_attached;
bool orte_do_not_launch = false;

Просмотреть файл

@ -118,6 +118,21 @@ typedef struct orte_job_t orte_job_t;
} while(0);
/* define a macro for computing time differences - used for timing tests
* across the code base
*/
#define ORTE_COMPUTE_TIME_DIFF(r, ur, s1, us1, s2, us2) \
do { \
(r) = (s2) - (s1); \
if ((us2) >= (us1)) { \
(ur) = (us2) - (us1); \
} else { \
(r)--; \
(ur) = 1000000 - (us1) + (us2); \
} \
} while(0);
/* global type definitions used by RTE - instanced in orte_globals.c */
/************
@ -400,6 +415,7 @@ ORTE_DECLSPEC orte_job_t* orte_get_job_data_object(orte_jobid_t job);
/* global variables used by RTE - instanced in orte_globals.c */
ORTE_DECLSPEC extern bool orte_timing;
ORTE_DECLSPEC extern FILE *orte_timing_output;
ORTE_DECLSPEC extern bool orte_timing_details;
ORTE_DECLSPEC extern bool orte_debug_daemons_flag, orte_debug_daemons_file_flag;
ORTE_DECLSPEC extern bool orte_leave_session_attached;
ORTE_DECLSPEC extern bool orte_do_not_launch;

Просмотреть файл

@ -145,6 +145,15 @@ int orte_register_params(void)
"Request that critical timing loops be measured",
false, false, (int)false, &value);
orte_timing = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("orte", "timing_details",
"Request that detailed timing data by reported",
false, false, (int)false, &value);
orte_timing_details = OPAL_INT_TO_BOOL(value);
if (orte_timing_details) {
/* ensure the timing flag is set too */
orte_timing = true;
}
if (orte_process_info.hnp) {
char *tmp;