diff --git a/orte/mca/grpcomm/bad/grpcomm_bad_module.c b/orte/mca/grpcomm/bad/grpcomm_bad_module.c index 950976ca41..620c6ba741 100644 --- a/orte/mca/grpcomm/bad/grpcomm_bad_module.c +++ b/orte/mca/grpcomm/bad/grpcomm_bad_module.c @@ -22,9 +22,6 @@ #include "orte/types.h" #include -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ #include "opal/threads/condition.h" #include "orte/util/show_help.h" @@ -197,7 +194,6 @@ CLEANUP: static bool barrier_recvd; -static bool barrier_timer; static void barrier_recv(int status, orte_process_name_t* sender, opal_buffer_t *buffer, @@ -207,29 +203,17 @@ static void barrier_recv(int status, orte_process_name_t* sender, barrier_recvd = true; } -static void barrier_timer_recv(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata) -{ - barrier_timer = true; -} - static int barrier(void) { opal_buffer_t buf; orte_daemon_cmd_flag_t command=ORTE_DAEMON_COLL_CMD; orte_grpcomm_coll_t coll_type=ORTE_GRPCOMM_BARRIER; int rc; - struct timeval ompistart, ompistop; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s grpcomm:bad entering barrier", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - if (orte_timing && ORTE_PROC_MY_NAME->vpid == 0) { - gettimeofday(&ompistart, NULL); - } - /* everyone sends barrier to local daemon */ OBJ_CONSTRUCT(&buf, opal_buffer_t); /* tell the daemon to collect the data */ @@ -272,44 +256,7 @@ static int barrier(void) OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, "%s grpcomm:bad received barrier release", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - if (orte_timing) { - if (ORTE_PROC_MY_NAME->vpid == 0) { - /* setup a receive to hear when the rank=N proc has received the data - * release - in most xcast schemes, this will always be the final recvr - */ - barrier_timer = false; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_COLLECTIVE_TIMER, - ORTE_RML_NON_PERSISTENT, barrier_timer_recv, NULL); - if (rc != ORTE_SUCCESS) { - ORTE_ERROR_LOG(rc); - return rc; - } - ORTE_PROGRESSED_WAIT(barrier_timer, 0, 1); - gettimeofday(&ompistop, NULL); - opal_output(0, "%s time to complete barrier %ld usec", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + - (ompistop.tv_usec - ompistart.tv_usec))); - } else if (ORTE_PROC_MY_NAME->vpid == orte_process_info.num_procs-1) { - /* if we are rank=N, send a message back to indicate - * the xcast completed for timing purposes - */ - orte_process_name_t name; - - name.jobid = ORTE_PROC_MY_NAME->jobid; - name.vpid = 0; - OBJ_CONSTRUCT(&buf, opal_buffer_t); - if (0 > (rc = orte_rml.send_buffer(&name,&buf,ORTE_RML_TAG_COLLECTIVE_TIMER,0))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return rc; - } - rc = ORTE_SUCCESS; - OBJ_DESTRUCT(&buf); - } - } - + return ORTE_SUCCESS; } @@ -333,7 +280,6 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) { int rc; orte_daemon_cmd_flag_t command=ORTE_DAEMON_COLL_CMD; - struct timeval ompistart, ompistop; opal_buffer_t coll; orte_grpcomm_coll_t coll_type=ORTE_GRPCOMM_ALLGATHER; @@ -341,10 +287,6 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) "%s grpcomm:bad entering allgather", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - if (orte_timing && ORTE_PROC_MY_NAME->vpid == 0) { - gettimeofday(&ompistart, NULL); - } - /* everyone sends data to their local daemon */ OBJ_CONSTRUCT(&coll, opal_buffer_t); /* tell the daemon to collect the data */ @@ -400,49 +342,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) return rc; } OBJ_RELEASE(allgather_buf); - - OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, - "%s allgather buffer received", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - if (orte_timing) { - if (ORTE_PROC_MY_NAME->vpid == 0) { - /* setup a receive to hear when the rank=N proc has received the data - * release - in most xcast schemes, this will always be the final recvr - */ - barrier_timer = false; - rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_COLLECTIVE_TIMER, - ORTE_RML_NON_PERSISTENT, barrier_timer_recv, NULL); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - ORTE_PROGRESSED_WAIT(barrier_timer, 0, 1); - gettimeofday(&ompistop, NULL); - opal_output(0, "%s allgather: time to complete %ld usec", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + - (ompistop.tv_usec - ompistart.tv_usec))); - } else if (ORTE_PROC_MY_NAME->vpid == orte_process_info.num_procs-1) { - /* if we are rank=N, send a message back to indicate - * the xcast completed for timing purposes - */ - orte_process_name_t name; - opal_buffer_t buf; - - name.jobid = ORTE_PROC_MY_NAME->jobid; - name.vpid = 0; - OBJ_CONSTRUCT(&buf, opal_buffer_t); - if (0 > (rc = orte_rml.send_buffer(&name,&buf,ORTE_RML_TAG_COLLECTIVE_TIMER,0))) { - ORTE_ERROR_LOG(rc); - return rc; - } - rc = ORTE_SUCCESS; - OBJ_DESTRUCT(&buf); - } - } - - + OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s grpcomm:bad allgather completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); diff --git a/orte/mca/grpcomm/basic/grpcomm_basic_module.c b/orte/mca/grpcomm/basic/grpcomm_basic_module.c index 8230b38e05..85de78534f 100644 --- a/orte/mca/grpcomm/basic/grpcomm_basic_module.c +++ b/orte/mca/grpcomm/basic/grpcomm_basic_module.c @@ -22,9 +22,6 @@ #include "orte/types.h" #include -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ #include #include "opal/threads/condition.h" @@ -257,7 +254,6 @@ CLEANUP: static bool barrier_recvd; -static bool barrier_timer; static void barrier_recv(int status, orte_process_name_t* sender, opal_buffer_t *buffer, @@ -267,13 +263,6 @@ static void barrier_recv(int status, orte_process_name_t* sender, barrier_recvd = true; } -static void barrier_timer_recv(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata) -{ - barrier_timer = true; -} - static int barrier(void) { opal_buffer_t buf; @@ -351,7 +340,6 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) { int rc; orte_daemon_cmd_flag_t command=ORTE_DAEMON_COLL_CMD; - struct timeval ompistart, ompistop; opal_buffer_t coll; orte_grpcomm_coll_t coll_type=ORTE_GRPCOMM_ALLGATHER; @@ -359,10 +347,6 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) "%s grpcomm:basic entering allgather", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - if (orte_timing && ORTE_PROC_MY_NAME->vpid == 0) { - gettimeofday(&ompistart, NULL); - } - /* everyone sends data to their local daemon */ OBJ_CONSTRUCT(&coll, opal_buffer_t); /* tell the daemon to collect the data */ @@ -419,48 +403,6 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf) } OBJ_RELEASE(allgather_buf); - OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, - "%s allgather buffer received", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - if (orte_timing) { - if (ORTE_PROC_MY_NAME->vpid == 0) { - /* setup a receive to hear when the rank=N proc has received the data - * release - in most xcast schemes, this will always be the final recvr - */ - barrier_timer = false; - rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_COLLECTIVE_TIMER, - ORTE_RML_NON_PERSISTENT, barrier_timer_recv, NULL); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - ORTE_PROGRESSED_WAIT(barrier_timer, 0, 1); - gettimeofday(&ompistop, NULL); - opal_output(0, "%s allgather: time to complete %ld usec", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + - (ompistop.tv_usec - ompistart.tv_usec))); - } else if (ORTE_PROC_MY_NAME->vpid == orte_process_info.num_procs-1) { - /* if we are rank=N, send a message back to indicate - * the xcast completed for timing purposes - */ - orte_process_name_t name; - opal_buffer_t buf; - - name.jobid = ORTE_PROC_MY_NAME->jobid; - name.vpid = 0; - OBJ_CONSTRUCT(&buf, opal_buffer_t); - if (0 > (rc = orte_rml.send_buffer(&name,&buf,ORTE_RML_TAG_COLLECTIVE_TIMER,0))) { - ORTE_ERROR_LOG(rc); - return rc; - } - rc = ORTE_SUCCESS; - OBJ_DESTRUCT(&buf); - } - } - - OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s grpcomm:basic allgather completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index c59b5802b8..e324b42b63 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -24,6 +24,9 @@ #ifdef HAVE_SYS_WAIT_H #include #endif +#ifdef HAVE_SYS_TIME_H +#include +#endif /* HAVE_SYS_TIME_H */ #include "opal/util/argv.h" #include "opal/runtime/opal_progress.h" @@ -87,29 +90,34 @@ int orte_plm_base_setup_job(orte_job_t *jdata) { opal_byte_object_t bo; int i; - orte_nid_t **nodes; - opal_pointer_array_t dummy; - + orte_nid_t **nd; + opal_list_item_t *item; + orte_attr_t *attr; + /* construct a nodemap */ if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo))) { ORTE_ERROR_LOG(rc); return rc; } - OBJ_CONSTRUCT(&dummy, opal_pointer_array_t); - if (ORTE_SUCCESS != (rc = orte_util_decode_nodemap(&bo, &dummy))) { + if (ORTE_SUCCESS != (rc = orte_util_decode_nodemap(&bo))) { ORTE_ERROR_LOG(rc); return rc; } /* print-out the map */ - nodes = (orte_nid_t**)dummy.addr; - for (i=0; i < dummy.size; i++) { - if (NULL != nodes[i]) { - fprintf(stderr, "NIDMAP: name %s daemon %s arch %0x\n", - nodes[i]->name, ORTE_VPID_PRINT(nodes[i]->daemon), nodes[i]->arch); - OBJ_RELEASE(nodes[i]); + nd = (orte_nid_t**)orte_nidmap.addr; + for (i=0; i < orte_nidmap.size && NULL != nd[i]; i++) { + fprintf(stderr, "%s node[%d].name %s daemon %s arch %0x\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, + (NULL == nd[i]) ? "NULL" : nd[i]->name, + ORTE_VPID_PRINT(nd[i]->daemon), + (NULL == nd[i]) ? 0 : nd[i]->arch); + for (item = opal_list_get_first(&nd[i]->attrs); + item != opal_list_get_end(&nd[i]->attrs); + item = opal_list_get_next(item)) { + attr = (orte_attr_t*)item; + fprintf(stderr, "\tAttribute: %s #bytes: %d\n", attr->name, attr->size); } } - OBJ_DESTRUCT(&dummy); } #endif @@ -175,6 +183,8 @@ int orte_plm_base_setup_job(orte_job_t *jdata) return ORTE_SUCCESS; } +static struct timeval app_launch_start, app_launch_stop; + int orte_plm_base_launch_apps(orte_jobid_t job) { orte_job_t *jdata; @@ -189,6 +199,10 @@ int orte_plm_base_launch_apps(orte_jobid_t job) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job))); + if (orte_timing) { + gettimeofday(&app_launch_start, NULL); + } + /* find the job's data record */ if (NULL == (jdata = orte_get_job_data_object(job))) { /* bad jobid */ @@ -231,6 +245,26 @@ int orte_plm_base_launch_apps(orte_jobid_t job) return rc; } + if (orte_timing) { + unsigned long maxsec, maxusec, minutes, seconds; + float fsecs; + + gettimeofday(&app_launch_stop, NULL); + /* subtract starting time to get time in microsecs for test */ + maxsec = app_launch_stop.tv_sec - app_launch_start.tv_sec; + maxusec = app_launch_stop.tv_usec - app_launch_start.tv_usec; + /* pretty-print the result */ + seconds = maxsec + (maxusec / 1000000l); + minutes = seconds / 60l; + seconds = seconds % 60l; + if (0 == minutes && 0 == seconds) { + fsecs = ((float)(maxsec)*1000000.0 + (float)maxusec) / 1000.0; + fprintf(orte_timing_output, "Time to launch apps: %8.2f millisecs\n", fsecs); + } else { + fprintf(orte_timing_output, "Time to launch apps: %3lu:%02lu min:sec\n", minutes, seconds); + } + } + /* complete wiring up the iof */ OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:launch wiring up iof", @@ -257,6 +291,7 @@ static int orted_num_callback; static bool orted_failed_launch; static orte_job_t *jdatorted; static orte_proc_t **pdatorted; +static struct timeval daemonlaunchtime = {0,0}, daemonsetuptime = {0,0}, daemoncbtime = {0,0}; void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid, int status, orte_job_state_t state) @@ -350,12 +385,21 @@ static void process_orted_launch_report(int fd, short event, void *data) int rc, idx; int32_t arch; orte_node_t **nodes; + struct timeval recvtime; + long secs, usecs; + int64_t setupsec, setupusec; + int64_t startsec, startusec; OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:orted_report_launch from daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mev->sender))); + /* see if we need to timestamp this receipt */ + if (orte_timing) { + gettimeofday(&recvtime, NULL); + } + /* update state */ pdatorted[mev->sender.vpid]->state = ORTE_PROC_STATE_RUNNING; @@ -385,6 +429,81 @@ static void process_orted_launch_report(int fd, short event, void *data) orted_failed_launch = true; goto CLEANUP; } + + /* if we are doing a timing test, unload the start and setup times of the daemon */ + if (orte_timing) { + /* get the time stamp when the daemon first started */ + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &startsec, &idx, OPAL_INT64))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &startusec, &idx, OPAL_INT64))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* save the latest daemon to start */ + if (startsec > daemonlaunchtime.tv_sec) { + daemonlaunchtime.tv_sec = startsec; + daemonlaunchtime.tv_usec = startusec; + } else if (startsec == daemonlaunchtime.tv_sec && + startusec > daemonlaunchtime.tv_usec) { + daemonlaunchtime.tv_usec = startusec; + } + /* get the time required for the daemon to setup - locally computed by each daemon */ + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &setupsec, &idx, OPAL_INT64))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &setupusec, &idx, OPAL_INT64))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* save the longest */ + if (setupsec > daemonsetuptime.tv_sec) { + daemonsetuptime.tv_sec = setupsec; + daemonsetuptime.tv_usec = setupusec; + } else if (setupsec == daemonsetuptime.tv_sec && + setupusec > daemonsetuptime.tv_usec) { + daemonsetuptime.tv_usec = setupusec; + } + /* get the time stamp of when the daemon started to send this message to us */ + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &setupsec, &idx, OPAL_INT64))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &setupusec, &idx, OPAL_INT64))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* check the time for the callback to complete and save the longest */ + secs = recvtime.tv_sec - setupsec; + if (setupusec <= recvtime.tv_usec) { + usecs = recvtime.tv_usec - setupusec; + } else { + secs--; + usecs = 1000000 - setupusec + recvtime.tv_usec; + } + if (secs > daemoncbtime.tv_sec) { + daemoncbtime.tv_sec = secs; + daemoncbtime.tv_usec = usecs; + } else if (secs == daemoncbtime.tv_sec && + usecs > daemoncbtime.tv_usec) { + daemoncbtime.tv_usec = usecs; + } + } + /* lookup the node */ nodes = (orte_node_t**)orte_node_pool->addr; if (NULL == nodes[mev->sender.vpid]) { @@ -446,7 +565,9 @@ static void orted_report_launch(int status, orte_process_name_t* sender, int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons) { int rc; - + unsigned long minutes, seconds; + float fsecs; + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:daemon_callback", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -475,6 +596,39 @@ int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons) return rc; } + /* if we are timing, output the results */ + if (orte_timing) { + seconds = (daemonlaunchtime.tv_sec - orte_plm_globals.daemonlaunchstart.tv_sec) + + ((daemonlaunchtime.tv_usec - orte_plm_globals.daemonlaunchstart.tv_usec) / 1000000l); + minutes = seconds / 60l; + seconds = seconds % 60l; + if (0 == minutes && 0 == seconds) { + fsecs = ((float)(daemonlaunchtime.tv_sec - orte_plm_globals.daemonlaunchstart.tv_sec)*1000000.0 + + (float)(daemonlaunchtime.tv_usec - orte_plm_globals.daemonlaunchstart.tv_usec)) / 1000.0; + fprintf(orte_timing_output, "Daemon initial launch was completed in %8.2f millisecs\n", fsecs); + } else { + fprintf(orte_timing_output, "Daemon initial launch was completed in %3lu:%02lu min:sec\n", minutes, seconds); + } + seconds = daemonsetuptime.tv_sec + (daemonsetuptime.tv_usec / 1000000l); + minutes = seconds / 60l; + seconds = seconds % 60l; + if (0 == minutes && 0 == seconds) { + fsecs = ((float)(daemonsetuptime.tv_sec)*1000000.0 + (float)daemonsetuptime.tv_usec) / 1000.0; + fprintf(orte_timing_output, "Daemon setup was completed in a maximum of %8.2f millisecs\n", fsecs); + } else { + fprintf(orte_timing_output, "Daemon setup was completed in a maximum of %3lu:%02lu min:sec\n", minutes, seconds); + } + seconds = daemoncbtime.tv_sec + (daemoncbtime.tv_usec / 1000000l); + minutes = seconds / 60l; + seconds = seconds % 60l; + if (0 == minutes && 0 == seconds) { + fsecs = ((float)(daemoncbtime.tv_sec)*1000000.0 + (float)daemoncbtime.tv_usec) / 1000.0; + fprintf(orte_timing_output, "Daemon callback to HNP was completed in a maximum of %8.3f millisecs\n", fsecs); + } else { + fprintf(orte_timing_output, "Daemon callback to HNP was completed in a maximum of %3lu:%02lu min:sec\n", minutes, seconds); + } + } + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:daemon_callback completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index 582f82f084..6480fdbb1c 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -27,6 +27,10 @@ #include "orte_config.h" #include "orte/types.h" +#ifdef HAVE_SYS_TIME_H +#include +#endif /* HAVE_SYS_TIME_H */ + #include "opal/class/opal_list.h" #include "opal/threads/condition.h" @@ -49,6 +53,8 @@ typedef struct { opal_condition_t orted_cmd_cond; /* next jobid */ orte_jobid_t next_jobid; + /* time when daemons started launch */ + struct timeval daemonlaunchstart; } orte_plm_globals_t; /** * Global instance of PLM framework data diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index b73d6c4f85..a5e9c21523 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -148,6 +148,11 @@ static int plm_slurm_launch_job(orte_job_t *jdata) int proc_vpid_index; orte_jobid_t failed_job; + /* if we are timing, record the start time */ + if (orte_timing) { + gettimeofday(&orte_plm_globals.daemonlaunchstart, NULL); + } + /* flag the daemons as failing by default */ failed_job = ORTE_PROC_MY_NAME->jobid; diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index 97d7f8bc6a..d3de67fbf5 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -148,6 +148,11 @@ static int plm_tm_launch_job(orte_job_t *jdata) mode_t current_umask; orte_jobid_t failed_job; + /* if we are timing, record the start time */ + if (orte_timing) { + gettimeofday(&orte_plm_globals.daemonlaunchstart, NULL); + } + /* default to declaring the daemons as failed */ failed_job = ORTE_PROC_MY_NAME->jobid; diff --git a/orte/mca/ras/tm/ras_tm_module.c b/orte/mca/ras/tm/ras_tm_module.c index c71a4c7b7e..954db95b81 100644 --- a/orte/mca/ras/tm/ras_tm_module.c +++ b/orte/mca/ras/tm/ras_tm_module.c @@ -23,9 +23,6 @@ #include #include #include -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ #include "opal/util/argv.h" #include "orte/util/show_help.h" @@ -122,13 +119,7 @@ static int discover(opal_list_t* nodelist, char *pbs_jobid) opal_list_item_t* item; FILE *fp; char *hostname; - struct timeval start, stop; - /* check for timing request - get start time if so */ - if (orte_timing) { - gettimeofday(&start, NULL); - } - /* Ignore anything that the user already specified -- we're getting nodes only from TM. */ @@ -206,15 +197,6 @@ static int discover(opal_list_t* nodelist, char *pbs_jobid) nodeid++; } - /* check for timing request - get stop time and report elapsed time if so */ - if (orte_timing) { - gettimeofday(&stop, NULL); - opal_output(0, "ras_tm: time to allocate is %ld usec", - (long int)((stop.tv_sec - start.tv_sec)*1000000 + - (stop.tv_usec - start.tv_usec))); - gettimeofday(&start, NULL); - } - return ORTE_SUCCESS; } diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 6034688825..413975bb55 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -184,7 +184,7 @@ static void send_relay(opal_buffer_t *buf, orte_jobid_t target_job, orte_rml_tag } else { /* buffer is already setup - just point to it */ buffer = buf; - /* retain it so that callback function can release it */ + /* retain it to keep bookkeeping straight */ OBJ_RETAIN(buffer); /* tag needs to be set to daemon_tag */ tag = ORTE_RML_TAG_DAEMON; @@ -227,6 +227,7 @@ static void send_relay(opal_buffer_t *buf, orte_jobid_t target_job, orte_rml_tag "%s orte:daemon:send_relay sending relay msg to %s tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&target), tag)); + /* retain buffer so callback function can release it */ OBJ_RETAIN(buffer); if (0 > (ret = orte_rml.send_buffer_nb(&target, buffer, tag, 0, send_callback, NULL))) { @@ -244,6 +245,7 @@ static void send_relay(opal_buffer_t *buf, orte_jobid_t target_job, orte_rml_tag ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name))); + /* retain buffer so callback function can release it */ OBJ_RETAIN(buffer); if (0 > (ret = orte_rml.send_buffer_nb(&nm->name, buffer, tag, 0, send_callback, NULL))) { diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 8ae0771df1..66e63e59ad 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -36,6 +36,9 @@ #include #include #include +#ifdef HAVE_SYS_TIME_H +#include +#endif /* HAVE_SYS_TIME_H */ #include "opal/event/event.h" @@ -196,7 +199,11 @@ int orte_daemon(int argc, char *argv[]) opal_buffer_t *buffer; char hostname[100]; char *tmp_env_var = NULL; - + struct timeval starttime, setuptime; + + /* get our time for first executable */ + gettimeofday(&starttime, NULL); + /* initialize the globals */ memset(&orted_globals, 0, sizeof(orted_globals)); /* initialize the singleton died pipe to an illegal value so we can detect it was set */ @@ -606,6 +613,56 @@ int orte_daemon(int argc, char *argv[]) OBJ_RELEASE(buffer); return ret; } + if (orte_timing) { + int64_t secs, usecs; + /* add our start time */ + secs = starttime.tv_sec; + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &secs, 1, OPAL_INT64))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + return ret; + } + usecs = starttime.tv_usec; + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &usecs, 1, OPAL_INT64))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + return ret; + } + /* get and send our setup time */ + gettimeofday(&setuptime, NULL); + secs = setuptime.tv_sec - starttime.tv_sec; + if (starttime.tv_usec <= setuptime.tv_usec) { + usecs = setuptime.tv_usec - starttime.tv_usec; + } else { + secs--; + usecs = 1000000 - starttime.tv_usec + setuptime.tv_usec; + } + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &secs, 1, OPAL_INT64))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + return ret; + } + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &usecs, 1, OPAL_INT64))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + return ret; + } + /* include the actual timestamp so the HNP can figure out how + * long it took for this message to arrive + */ + secs = setuptime.tv_sec; + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &secs, 1, OPAL_INT64))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + return ret; + } + usecs = setuptime.tv_usec; + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &usecs, 1, OPAL_INT64))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + return ret; + } + } if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, buffer, ORTE_RML_TAG_ORTED_CALLBACK, 0))) { ORTE_ERROR_LOG(ret); diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 0daa0fb488..f46d9481e5 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -24,8 +24,10 @@ #ifdef HAVE_SYS_TIME_H #include #endif +#include #include "opal/mca/base/mca_base_param.h" +#include "opal/util/output.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" @@ -144,6 +146,27 @@ int orte_register_params(void) false, false, (int)false, &value); orte_timing = OPAL_INT_TO_BOOL(value); + if (orte_process_info.hnp) { + char *tmp; + mca_base_param_reg_string_name("orte", "timing_file", + "Name of the file where timing data is to be written (relative or absolute path)", + false, false, NULL, &tmp); + if (orte_timing && NULL == tmp) { + /* send the timing output to stdout */ + orte_timing_output = stdout; + } else if (NULL != tmp) { + /* make sure the timing flag is set */ + orte_timing = true; + /* send the output to the indicated file */ + orte_timing_output = fopen(tmp, "w"); + if (NULL == orte_timing_output) { + /* couldn't be opened */ + opal_output(0, "File %s could not be opened", tmp); + orte_timing_output = stderr; + } + } + } + /* User-level debugger info string */ mca_base_param_reg_string_name("orte", "base_user_debugger",