diff --git a/configure.ac b/configure.ac index 3abba020e3..6348daedb9 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. -# Copyright (c) 2004-2014 The University of Tennessee and The University +# Copyright (c) 2004-2015 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, @@ -1379,6 +1379,7 @@ AC_CONFIG_FILES([ test/support/Makefile test/threads/Makefile test/util/Makefile + test/monitoring/Makefile ]) AC_CONFIG_FILES([contrib/dist/mofed/debian/rules], [chmod +x contrib/dist/mofed/debian/rules]) diff --git a/ompi/mca/pml/monitoring/Makefile.am b/ompi/mca/pml/monitoring/Makefile.am new file mode 100644 index 0000000000..504a2c6fd4 --- /dev/null +++ b/ompi/mca/pml/monitoring/Makefile.am @@ -0,0 +1,41 @@ +# +# Copyright (c) 2013-2015 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2013-2015 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +monitoring_sources = \ + pml_monitoring.c \ + pml_monitoring.h \ + pml_monitoring_comm.c \ + pml_monitoring_comm.h \ + pml_monitoring_component.c \ + pml_monitoring_component.h \ + pml_monitoring_hdr.h \ + pml_monitoring_iprobe.c \ + pml_monitoring_irecv.c \ + pml_monitoring_isend.c \ + pml_monitoring_start.c + +if MCA_BUILD_ompi_pml_monitoring_DSO +component_noinst = +component_install = mca_pml_monitoring.la +else +component_noinst = libmca_pml_monitoring.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pml_monitoring_la_SOURCES = $(monitoring_sources) +mca_pml_monitoring_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pml_monitoring_la_SOURCES = $(monitoring_sources) +libmca_pml_monitoring_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/pml/monitoring/README b/ompi/mca/pml/monitoring/README new file mode 100644 index 0000000000..8361027d65 --- /dev/null +++ b/ompi/mca/pml/monitoring/README @@ -0,0 +1,181 @@ + + Copyright (c) 2013-2015 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. + Copyright (c) 2013-2015 Inria. All rights reserved. + $COPYRIGHT$ + + Additional copyrights may follow + + $HEADER$ + +=========================================================================== + +Low level communication monitoring interface in Open MPI + +Introduction +------------ +This interface traces and monitors all messages sent by MPI before they go to the +communication channels. At that levels all communication are point-to-point communications: +collectives are already decomposed in send and receive calls. + +The monitoring is stored internally by each process and output on stderr at the end of the +application (during MPI_Finalize()). + + +Enabling the monitoring +----------------------- +To enable the monitoring add --mca pml_monitoring_enable x to the mpirun command line. +If x = 1 it monitors internal and external tags indifferently and aggregate everything. +If x = 2 it monitors internal tags and external tags separately. +If x = 0 the monitoring is disabled. +Other value of x are not supported. + +Internal tags are tags < 0. They are used to tag send and receive coming from +collective operations or from protocol communications + +External tags are tags >=0. They are used by the application in point-to-point communication. + +Therefore, distinguishing external and internal tags help to distinguish between point-to-point +and other communication (mainly collectives). + +Output format +------------- +The output of the monitoring looks like (with --mca pml_monitoring_enable 2): +I 0 1 108 bytes 27 msgs sent +E 0 1 1012 bytes 30 msgs sent +E 0 2 23052 bytes 61 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 208 bytes 52 msgs sent +E 1 0 860 bytes 24 msgs sent +E 1 3 2552 bytes 56 msgs sent +I 2 3 104 bytes 26 msgs sent +E 2 0 22804 bytes 49 msgs sent +E 2 3 860 bytes 24 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 204 bytes 51 msgs sent +E 3 1 2304 bytes 44 msgs sent +E 3 2 860 bytes 24 msgs sent + +Where: + - the first column distinguishes internal (I) and external (E) tags. + - the second column is the sender rank + - the third column is the receiver rank + - the fourth column is the number of bytes sent + - the last column is the number of messages. + +In this example process 0 as sent 27 messages to process 1 using point-to-point call +for 108 bytes and 30 messages with collectives and protocol related communication +for 1012 bytes to process 1. + +If the monitoring was called with --mca pml_monitoring_enable 1 everything is aggregated +under the internal tags. With te above example, you have: +I 0 1 1120 bytes 57 msgs sent +I 0 2 23052 bytes 61 msgs sent +I 1 0 860 bytes 24 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 2760 bytes 108 msgs sent +I 2 0 22804 bytes 49 msgs sent +I 2 3 964 bytes 50 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 2508 bytes 95 msgs sent +I 3 2 860 bytes 24 msgs sent + +Monitoring phases +----------------- +If one wants to monitor phases of the application, it is possible to flush the monitoring +at the application level. In this case all the monitoring since the last flush is stored +by every process in a file. + +An example of how to flush such monitoring is given in test/monitoring/monitoring_test.c + +Moreover, all the different flushed phased are aggregated at runtime and output at the end +of the application as described above. + +Example +------- +A working example is given in test/monitoring/monitoring_test.c +It features, MPI_COMM_WORLD monitoring , sub-communicator monitoring, collective and +point-to-point communication monitoring and phases monitoring + +To compile: +> make monitoring_test + +Helper scripts +-------------- +Two perl scripts are provided in test/monitoring +- aggregate_profile.pl is for aggregating monitoring phases of different processes + This script aggregates the profiles generated by the flush_monitoring function. + The files need to be in in given format: name__ + They are then aggregated by phases. + If one needs the profile of all the phases he can concatenate the different files, + or use the output of the monitoring system done at MPI_Finalize + in the example it should be call as: + ./aggregate_profile.pl prof/phase to generate + prof/phase_1.prof + prof/phase_2.prof + +- profile2mat.pl is for transforming a the monitoring output into a communication matrix. + Take a profile file and aggregates all the recorded communicator into matrices. + It generated a matrices for the number of messages, (msg), + for the total bytes transmitted (size) and + the average number of bytes per messages (avg) + + The output matrix is symmetric + +Do not forget to enable the execution right to these scripts. + +For instance, the provided examples store phases output in ./prof + +If you type: +> mpirun -np 4 --mca pml_monitoring_enable 2 ./monitoring_test +you should have the following output +Proc 3 flushing monitoring to: ./prof/phase_1_3.prof +Proc 0 flushing monitoring to: ./prof/phase_1_0.prof +Proc 2 flushing monitoring to: ./prof/phase_1_2.prof +Proc 1 flushing monitoring to: ./prof/phase_1_1.prof +Proc 1 flushing monitoring to: ./prof/phase_2_1.prof +Proc 3 flushing monitoring to: ./prof/phase_2_3.prof +Proc 0 flushing monitoring to: ./prof/phase_2_0.prof +Proc 2 flushing monitoring to: ./prof/phase_2_2.prof +I 2 3 104 bytes 26 msgs sent +E 2 0 22804 bytes 49 msgs sent +E 2 3 860 bytes 24 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 204 bytes 51 msgs sent +E 3 1 2304 bytes 44 msgs sent +E 3 2 860 bytes 24 msgs sent +I 0 1 108 bytes 27 msgs sent +E 0 1 1012 bytes 30 msgs sent +E 0 2 23052 bytes 61 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 208 bytes 52 msgs sent +E 1 0 860 bytes 24 msgs sent +E 1 3 2552 bytes 56 msgs sent + +you can parse the phases with: +> /aggregate_profile.pl prof/phase +Building prof/phase_1.prof +Building prof/phase_2.prof + +And you can build the different communication matrices of phase 1 with: +> ./profile2mat.pl prof/phase_1.prof +prof/phase_1.prof -> all +prof/phase_1_size_all.mat +prof/phase_1_msg_all.mat +prof/phase_1_avg_all.mat + +prof/phase_1.prof -> external +prof/phase_1_size_external.mat +prof/phase_1_msg_external.mat +prof/phase_1_avg_external.mat + +prof/phase_1.prof -> internal +prof/phase_1_size_internal.mat +prof/phase_1_msg_internal.mat +prof/phase_1_avg_internal.mat + +Credit +------ +Designed by George Bosilca and +Emmanuel Jeannot diff --git a/ompi/mca/pml/monitoring/pml_monitoring.c b/ompi/mca/pml/monitoring/pml_monitoring.c new file mode 100644 index 0000000000..672a5ad09f --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include "opal/class/opal_hash_table.h" +typedef struct _transtlator_t{ + int *ranks; + int size; +} translator_t; + + +void initialize_monitoring( void ); +void monitor_send_data(int world_rank, size_t data_size, int tag); +void output_monitoring( void ); +void finalize_monitoring( void ); +int filter_monitoring( void ); /* returns 1 if we distinguish positive (point-to-point) and negative (collective and meta messages) tags*/ +int ompi_mca_pml_monitoring_flush(char* filename); + + +MPI_Group group_world; + +/* array for stroring monitoring data*/ +size_t *sent_data = NULL; +int *messages_count = NULL; +size_t *filtered_sent_data = NULL; +int *filtered_messages_count = NULL; +size_t *all_sent_data = NULL; +int *all_messages_count = NULL; +size_t *all_filtered_sent_data = NULL; +int *all_filtered_messages_count = NULL; + +int init_done = 0; +int nbprocs = -1; +int my_rank = -1; +opal_hash_table_t *translation_ht = NULL; + + +mca_pml_monitoring_module_t mca_pml_monitoring = { + mca_pml_monitoring_add_procs, + mca_pml_monitoring_del_procs, + mca_pml_monitoring_enable, + NULL, + mca_pml_monitoring_add_comm, + mca_pml_monitoring_del_comm, + mca_pml_monitoring_irecv_init, + mca_pml_monitoring_irecv, + mca_pml_monitoring_recv, + mca_pml_monitoring_isend_init, + mca_pml_monitoring_isend, + mca_pml_monitoring_send, + mca_pml_monitoring_iprobe, + mca_pml_monitoring_probe, + mca_pml_monitoring_start, + mca_pml_monitoring_improbe, + mca_pml_monitoring_mprobe, + mca_pml_monitoring_imrecv, + mca_pml_monitoring_mrecv, + mca_pml_monitoring_dump, + NULL, + 65535, + INT_MAX +}; + +int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, + size_t nprocs) +{ + /** + * Create the monitoring hashtable only for my MPI_COMM_WORLD. We choose + * to ignore by now all other processes. + */ + if(NULL == translation_ht) { + size_t i; + uint64_t key; + + nbprocs = nprocs; + + translation_ht = OBJ_NEW(opal_hash_table_t); + opal_hash_table_init(translation_ht, 2048); + + + for( i = 0; i < nprocs; i++ ) { + /* rank : ompi_proc_local_proc in procs */ + if( procs[i] == ompi_proc_local_proc) + my_rank = i; + key = *((uint64_t*)&(procs[i]->super.proc_name)); + /* store the rank (in COMM_WORLD) of the process + with its name (a uniq opal ID) as key in the hash table*/ + opal_hash_table_set_value_uint64(translation_ht, + key, + (void*)(uintptr_t)i); + } + } + return pml_selected_module.pml_add_procs(procs, nprocs); +} + + +int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, + size_t nprocs) +{ + return pml_selected_module.pml_del_procs(procs, nprocs); +} + +int mca_pml_monitoring_dump(struct ompi_communicator_t* comm, + int verbose) +{ + return pml_selected_module.pml_dump(comm, verbose); +} + + +void finalize_monitoring( void ){ + + if(filter_monitoring()){ + free(filtered_sent_data); + free(filtered_messages_count); + } + + free(sent_data); + free(messages_count); + opal_hash_table_remove_all( translation_ht ); + free(translation_ht); + +} +void initialize_monitoring( void ){ + + sent_data = (size_t*)calloc(nbprocs, sizeof(size_t)); + messages_count = (int*) calloc(nbprocs, sizeof(int)); + all_sent_data = (size_t*)calloc(nbprocs, sizeof(size_t)); + all_messages_count = (int*) calloc(nbprocs, sizeof(int)); + + if(filter_monitoring()){ + filtered_sent_data = (size_t*)calloc(nbprocs, sizeof(size_t)); + filtered_messages_count = (int*) calloc(nbprocs, sizeof(int)); + all_filtered_sent_data = (size_t*)calloc(nbprocs, sizeof(size_t)); + all_filtered_messages_count = (int*) calloc(nbprocs, sizeof(int)); + } + + init_done = 1; +} + + + +void monitor_send_data(int world_rank, size_t data_size, int tag){ + + if ( !init_done ) + initialize_monitoring(); + + /* distinguishses positive and negative tags if requested */ + if((tag<0) && (filter_monitoring())){ + filtered_sent_data[world_rank] += data_size; + filtered_messages_count[world_rank]++; + }else{ /* if filtered monitoring is not activated data is aggregated indifferently */ + sent_data[world_rank] += data_size; + messages_count[world_rank]++; + } + /*printf("%d Send dest = %d(%d:comm_world=%d), size = %ld ajouté dans : %d\n",my_rank, dest_rank, comm->c_my_rank, MPI_COMM_WORLD->c_my_rank, data_size, rank); fflush(stdout);*/ + + +} + +void output_monitoring( void ){ + int i; + for (i = 0 ; i < nbprocs ; i++) { + all_sent_data[i] += sent_data[i]; + all_messages_count[i] += messages_count[i]; + if(all_sent_data[i] > 0) { + fprintf(stderr, "I\t%d\t%d\t%ld bytes\t%d msgs sent\n", my_rank, i, all_sent_data[i], all_messages_count[i]); fflush(stderr); + } + } + + if(filter_monitoring()){ + for (i = 0 ; i < nbprocs ; i++) { + all_filtered_sent_data[i] += filtered_sent_data[i]; + all_filtered_messages_count[i] += filtered_messages_count[i]; + if(all_filtered_sent_data[i] > 0) { + fprintf(stderr, "E\t%d\t%d\t%ld bytes\t%d msgs sent\n", my_rank, i, all_filtered_sent_data[i], all_filtered_messages_count[i]); fflush(stderr); + } + } + } +} + + +/* + Flushes the monitoring into filename + Useful for phases (see exmple in test/monitoring) +*/ + +int ompi_mca_pml_monitoring_flush(char* filename) { + FILE *pf; + int i; + + + pf = fopen(filename, "w"); + + if(!pf) + return -1; + + fprintf(stderr,"Proc %d flushing monitoring to: %s\n", my_rank, filename); + + for (i = 0 ; i < nbprocs ; i++) { + if(sent_data[i] > 0) { + fprintf(pf, "I\t%d\t%d\t%ld bytes\t%d msgs sent\n", my_rank, i, sent_data[i], messages_count[i]); fflush(pf); + /* aggregate data in general array*/ + all_sent_data[i] += sent_data[i]; + all_messages_count[i] += messages_count[i]; + /* reset phase array */ + messages_count[i] = 0; + sent_data[i] = 0; + } + } + + if(filter_monitoring()){ + for (i = 0 ; i < nbprocs ; i++) { + if(filtered_sent_data[i] > 0) { + fprintf(pf, "E\t%d\t%d\t%ld bytes\t%d msgs sent\n", my_rank, i, filtered_sent_data[i], filtered_messages_count[i]); fflush(pf); + /* aggregate data in general array*/ + all_filtered_sent_data[i] += filtered_sent_data[i]; + all_filtered_messages_count[i] += filtered_messages_count[i]; + /* reset phase array */ + filtered_messages_count[i] = 0; + filtered_sent_data[i] = 0; + } + } + } + + fclose(pf); + return 0; +} diff --git a/ompi/mca/pml/monitoring/pml_monitoring.h b/ompi/mca/pml/monitoring/pml_monitoring.h new file mode 100644 index 0000000000..dbae8e1eee --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_PML_MONITORING_H +#define MCA_PML_MONITORING_H + +BEGIN_C_DECLS + +#include +#include +#include +#include +#include + +typedef mca_pml_base_module_t mca_pml_monitoring_module_t; + +extern mca_pml_base_component_t pml_selected_component; +extern mca_pml_base_module_t pml_selected_module; +extern mca_pml_monitoring_module_t mca_pml_monitoring; +OMPI_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_monitoring_component; + +/* + * PML interface functions. + */ + +extern int mca_pml_monitoring_add_comm(struct ompi_communicator_t* comm); + +extern int mca_pml_monitoring_del_comm(struct ompi_communicator_t* comm); + +extern int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, + size_t nprocs); + +extern int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, + size_t nprocs); + +extern int mca_pml_monitoring_enable(bool enable); + +extern int mca_pml_monitoring_iprobe(int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_probe(int dst, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_improbe(int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + struct ompi_message_t **message, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_mprobe(int dst, + int tag, + struct ompi_communicator_t* comm, + struct ompi_message_t **message, + ompi_status_public_t* status ); + +extern int mca_pml_monitoring_isend_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_isend(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_send(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm); + +extern int mca_pml_monitoring_irecv_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_irecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_recv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status); + +extern int mca_pml_monitoring_imrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + struct ompi_request_t **request); + +extern int mca_pml_monitoring_mrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + ompi_status_public_t* status); + +extern int mca_pml_monitoring_dump(struct ompi_communicator_t* comm, + int verbose); + +extern int mca_pml_monitoring_start(size_t count, + ompi_request_t** requests); + +END_C_DECLS + +#endif /* MCA_PML_MONITORING_H */ diff --git a/ompi/mca/pml/monitoring/pml_monitoring_comm.c b/ompi/mca/pml/monitoring/pml_monitoring_comm.c new file mode 100644 index 0000000000..047a15bfd3 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_comm.c @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +extern void output_monitoring( void ); + + +int mca_pml_monitoring_add_comm(struct ompi_communicator_t* comm) +{ + return pml_selected_module.pml_add_comm(comm); +} + +int mca_pml_monitoring_del_comm(struct ompi_communicator_t* comm) +{ + return pml_selected_module.pml_del_comm(comm); +} diff --git a/ompi/mca/pml/monitoring/pml_monitoring_component.c b/ompi/mca/pml/monitoring/pml_monitoring_component.c new file mode 100644 index 0000000000..491f5f9f78 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_component.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include + +static int mca_pml_monitoring_enabled = 0; +static int mca_pml_monitoring_active = 0; +mca_pml_base_component_t pml_selected_component; +mca_pml_base_module_t pml_selected_module; + +extern void output_monitoring( void ); +extern void finalize_monitoring( void ); +extern int ompi_mca_pml_monitoring_flush(char* filename); +int filter_monitoring( void ); + + + +/* Return 1 if the the seperation between internal tags and external tags is enabled*/ +int filter_monitoring( void ) +{ + if (mca_pml_monitoring_enabled == 2) + return 1; + else + return 0; +} + +static unsigned long hidden_fct = (unsigned long)((void*)ompi_mca_pml_monitoring_flush); +int mca_pml_monitoring_enable(bool enable) +{ + /* If we reach this point we were succesful at hijacking the interface of + * the real PML, and we are now correctly interleaved between the upper + * layer and the real PML. + */ + mca_base_component_var_register(&mca_pml_monitoring_component.pmlm_version, "flush", + "Hidden argument to provide the flush function pointer", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, + OPAL_INFO_LVL_1, + MCA_BASE_VAR_SCOPE_CONSTANT, + &hidden_fct); + return pml_selected_module.pml_enable(enable); +} + +static int mca_pml_monitoring_component_open(void) +{ + if( mca_pml_monitoring_enabled ) { + opal_pointer_array_add(&mca_pml_base_pml, + strdup(mca_pml_monitoring_component.pmlm_version.mca_component_name)); + } + return OMPI_SUCCESS; +} + +static int mca_pml_monitoring_component_close(void) +{ + if( mca_pml_monitoring_enabled ) { + if( !mca_pml_monitoring_active ) { + /* Save a copy of the selected PML */ + pml_selected_component = mca_pml_base_selected_component; + pml_selected_module = mca_pml; + /* And now install the interception layer */ + mca_pml_base_selected_component = mca_pml_monitoring_component; + mca_pml = mca_pml_monitoring; + mca_pml.pml_progress = pml_selected_module.pml_progress; + /* Bump my ref count up to avoid getting released too early */ + mca_base_component_repository_retain_component(mca_pml_monitoring_component.pmlm_version.mca_type_name, + mca_pml_monitoring_component.pmlm_version.mca_component_name); + mca_pml_monitoring_active = 1; + } + } + return OMPI_SUCCESS; +} + +static mca_pml_base_module_t* +mca_pml_monitoring_component_init(int* priority, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + if( mca_pml_monitoring_enabled ) { + *priority = 0; /* I'm up but don't select me */ + return &mca_pml_monitoring; + } + return NULL; +} + +static int mca_pml_monitoring_component_finish(void) +{ + if( mca_pml_monitoring_enabled && mca_pml_monitoring_active ) { + /* It is over... Output what has been monitored*/ + output_monitoring(); + /* Free internal data structure */ + finalize_monitoring(); + /* Call the original PML and then close */ + mca_pml_monitoring_active = 0; + mca_pml_monitoring_enabled = 0; + /* Restore the original PML */ + mca_pml_base_selected_component = pml_selected_component; + mca_pml = pml_selected_module; + /* Redirect the close call to the original PML */ + pml_selected_component.pmlm_finalize(); + /** + * We should never release the last ref on the current component or face forever punishement. + */ + /* mca_base_component_repository_release(&mca_pml_monitoring_component.pmlm_version); */ + } + return OMPI_SUCCESS; +} + +static int mca_pml_monitoring_component_register(void) +{ + (void)mca_base_component_var_register(&mca_pml_monitoring_component.pmlm_version, "enable", + "Enable the monitoring at the PML level. This value should be different than 0 in order for the monitoring to be enabled (default disable)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_pml_monitoring_enabled); + return OMPI_SUCCESS; +} + +mca_pml_base_component_2_0_0_t mca_pml_monitoring_component = { + + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + MCA_PML_BASE_VERSION_2_0_0, + + "monitoring", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_pml_monitoring_component_open, /* component open */ + mca_pml_monitoring_component_close, /* component close */ + NULL, + mca_pml_monitoring_component_register + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + mca_pml_monitoring_component_init, /* component init */ + mca_pml_monitoring_component_finish /* component finalize */ + +}; + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c b/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c new file mode 100644 index 0000000000..ec34cb5d27 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + + +/* EJ: nothing to do here */ + +int mca_pml_monitoring_iprobe( int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + ompi_status_public_t* status ) +{ + return pml_selected_module.pml_iprobe(dst, tag, comm, + matched, status); +} + +int mca_pml_monitoring_probe( int dst, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ) +{ + return pml_selected_module.pml_probe(dst, tag, comm, status); +} + +int mca_pml_monitoring_improbe(int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + struct ompi_message_t **message, + ompi_status_public_t* status) +{ + return pml_selected_module.pml_improbe(dst, tag, comm, + matched, message, status); +} + + +int mca_pml_monitoring_mprobe(int dst, + int tag, + struct ompi_communicator_t* comm, + struct ompi_message_t **message, + ompi_status_public_t* status) +{ + return pml_selected_module.pml_mprobe(dst, tag, comm, message, status); +} + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_irecv.c b/ompi/mca/pml/monitoring/pml_monitoring_irecv.c new file mode 100644 index 0000000000..91b247c7c5 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_irecv.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + + +/* EJ: loging is done on the sender. Nothing to do here */ + +int mca_pml_monitoring_irecv_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_irecv_init(buf, count, datatype, + src, tag, comm, request); +} + + +int mca_pml_monitoring_irecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_irecv(buf, count, datatype, + src, tag, comm, request); +} + + +int mca_pml_monitoring_recv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status) +{ + return pml_selected_module.pml_recv(buf, count, datatype, + src, tag, comm, status); +} + + +int mca_pml_monitoring_imrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_imrecv(buf, count, datatype, + message, request); +} + + +int mca_pml_monitoring_mrecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + struct ompi_message_t **message, + ompi_status_public_t* status) + +{ + return pml_selected_module.pml_mrecv(buf, count, datatype, + message, status); +} + + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_isend.c b/ompi/mca/pml/monitoring/pml_monitoring_isend.c new file mode 100644 index 0000000000..b8fc4f18a8 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_isend.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +extern void monitor_send_data(int dest_rank, size_t data_size, int tag); +extern opal_hash_table_t *get_hashtable(void); +extern opal_hash_table_t *translation_ht; + +int mca_pml_monitoring_isend_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + return pml_selected_module.pml_isend_init(buf, count, datatype, + dst, tag, mode, comm, request); +} + +int mca_pml_monitoring_isend(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + + /* find the processor of teh destination */ + ompi_proc_t *proc = ompi_group_get_proc_ptr(comm->c_remote_group, dst); + int world_rank; + + /* find its name*/ + uint64_t key = *((uint64_t*)&(proc->super.proc_name)); + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + size_t type_size, data_size; + ompi_datatype_type_size(datatype, &type_size); + data_size = count*type_size; + monitor_send_data(world_rank, data_size, tag); + } + + return pml_selected_module.pml_isend(buf, count, datatype, + dst, tag, mode, comm, request); +} + +int mca_pml_monitoring_send(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm) +{ + + ompi_proc_t *proc = ompi_group_get_proc_ptr(comm->c_remote_group, dst); + int world_rank; + uint64_t key = *((uint64_t*) &(proc->super.proc_name)); + + /** + * If this fails the destination is not part of my MPI_COM_WORLD + */ + if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + size_t type_size, data_size; + ompi_datatype_type_size(datatype, &type_size); + data_size = count*type_size; + monitor_send_data(world_rank, data_size, tag); + } + + + return pml_selected_module.pml_send(buf, count, datatype, + dst, tag, mode, comm); +} + diff --git a/ompi/mca/pml/monitoring/pml_monitoring_start.c b/ompi/mca/pml/monitoring/pml_monitoring_start.c new file mode 100644 index 0000000000..5b503977e7 --- /dev/null +++ b/ompi/mca/pml/monitoring/pml_monitoring_start.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include + +extern void monitor_send_data(int dest_rank, size_t data_size, int tag); +extern opal_hash_table_t *translation_ht; + + +/* manage persistant requests*/ +int mca_pml_monitoring_start(size_t count, + ompi_request_t** requests) +{ + size_t i; + + for( i = 0; i < count; i++ ) { + mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i]; + ompi_proc_t *proc; + int world_rank; + + if(NULL == pml_request) { + continue; + } + if(OMPI_REQUEST_PML != requests[i]->req_type) { + continue; + } + if(MCA_PML_REQUEST_SEND != pml_request->req_type) { + continue; + } + + proc = ompi_group_get_proc_ptr(pml_request->req_comm->c_remote_group, pml_request->req_peer); + uint64_t key = *((uint64_t*) &(proc->super.proc_name)); + + + /** + * If this fails the destination is not part of my MPI_COM_WORLD + */ + if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + size_t type_size, data_size; + ompi_datatype_type_size(pml_request->req_datatype, &type_size); + data_size = pml_request->req_count * type_size; + monitor_send_data(world_rank, data_size, 1); + } + } + return pml_selected_module.pml_start(count, requests); +} + diff --git a/test/Makefile.am b/test/Makefile.am index 268a9f100f..5252fd5f53 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University +# Copyright (c) 2004-2015 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, @@ -19,5 +19,5 @@ # # support needs to be first for dependencies -SUBDIRS = support asm class threads datatype util +SUBDIRS = support asm class threads datatype util monitoring DIST_SUBDIRS = event $(SUBDIRS) diff --git a/test/monitoring/Makefile.am b/test/monitoring/Makefile.am new file mode 100644 index 0000000000..db85187bf9 --- /dev/null +++ b/test/monitoring/Makefile.am @@ -0,0 +1,21 @@ +# +# Copyright (c) 2013-2015 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2013-2015 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This test requires multiple processes to run. Don't run it as part +# of 'make check' +if PROJECT_OMPI + noinst_PROGRAMS = monitoring_test + + monitoring_test_SOURCES = monitoring_test.c + monitoring_test_LDFLAGS = $(WRAPPER_EXTRA_LDFLAGS) + monitoring_test_LDADD = $(top_builddir)/ompi/libmpi.la $(top_builddir)/opal/libopen-pal.la +endif diff --git a/test/monitoring/aggregate_profile.pl b/test/monitoring/aggregate_profile.pl new file mode 100644 index 0000000000..da6d3780b0 --- /dev/null +++ b/test/monitoring/aggregate_profile.pl @@ -0,0 +1,71 @@ +#!/usr/bin/perl -w + +# +# Copyright (c) 2013-2015 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2013-2015 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Author Emmanuel Jeannot +# +# This script aggregates the profiles generated by the flush_monitoring function. +# The files need to be in in given format: name__ +# They are then aggregated by phases. +# If one needs the profile of all the phases he can concatenate the different files, +# or use the output of the monitoring system done at MPI_Finalize +# in the example it should be call as: +# ./aggregate_profile.pl prof/phase to generate +# prof/phase_1.prof +# prof/phase_2.prof +# +# ensure that this script as the executable right: chmod +x ... +# + +die "$0 \n\tProfile files should be of the form \"name_phaseid_processesid.prof\"\n\tFor instance if you saved the monitoring into phase_0_0.prof, phase_0_1.prof, ..., phase_1_0.prof etc you should call: $0 phase\n" if ($#ARGV!=0); + +$name = $ARGV[0]; + +@files = glob ($name."*"); + +%phaseid = (); + + +# Detect the different phases +foreach $file (@files) { + ($id)=($file =~ m/$name\_(\d+)_\d+/); + $phaseid{$id} = 1 if ($id); +} + +# for each phases aggregate the files +foreach $id (sort {$a <=> $b} keys %phaseid) { + aggregate($name."_".$id); +} + + + + +sub aggregate{ + $phase = $_[0]; + + print "Building $phase.prof\n"; + + open OUT,">$phase.prof"; + + @files = glob ($phase."*"); + + foreach $file ( @files) { + open IN,$file; + while () { + print OUT; + } + close IN; + } + close OUT; +} diff --git a/test/monitoring/monitoring_test.c b/test/monitoring/monitoring_test.c new file mode 100644 index 0000000000..6e85c7491b --- /dev/null +++ b/test/monitoring/monitoring_test.c @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2013-2015 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2015 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* +pml monitoring tester. + +Designed by George Bosilca and Emmanuel Jeannot +Contact the authors for questions. + +To be run as: + +mpirun -np 4 --mca pml_monitoring_enable 2 ./monitoring_test +pm +Then, the output should be: + +flushing to ./prof/phase_1_2.prof +flushing to ./prof/phase_1_0.prof +flushing to ./prof/phase_1_3.prof +flushing to ./prof/phase_2_1.prof +flushing to ./prof/phase_2_3.prof +flushing to ./prof/phase_2_0.prof +flushing to ./prof/phase_2_2.prof +I 0 1 108 bytes 27 msgs sent +E 0 1 1012 bytes 30 msgs sent +E 0 2 23052 bytes 61 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 208 bytes 52 msgs sent +E 1 0 860 bytes 24 msgs sent +E 1 3 2552 bytes 56 msgs sent +I 2 3 104 bytes 26 msgs sent +E 2 0 22804 bytes 49 msgs sent +E 2 3 860 bytes 24 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 204 bytes 51 msgs sent +E 3 1 2304 bytes 44 msgs sent +E 3 2 860 bytes 24 msgs sent + +or as + +mpirun -np 4 --mca pml_monitoring_enable 1 ./monitoring_test + +for an output as: + +flushing to ./prof/phase_1_1.prof +flushing to ./prof/phase_1_0.prof +flushing to ./prof/phase_1_2.prof +flushing to ./prof/phase_1_3.prof +flushing to ./prof/phase_2_1.prof +flushing to ./prof/phase_2_3.prof +flushing to ./prof/phase_2_2.prof +flushing to ./prof/phase_2_0.prof +I 0 1 1120 bytes 57 msgs sent +I 0 2 23052 bytes 61 msgs sent +I 1 0 860 bytes 24 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 2760 bytes 108 msgs sent +I 2 0 22804 bytes 49 msgs sent +I 2 3 964 bytes 50 msgs sent +I 3 0 104 bytes 26 msgs sent +I 3 1 2508 bytes 95 msgs sent +I 3 2 860 bytes 24 msgs sent +*/ + + + +#include +#include "mpi.h" + +/* opal mca header taken from opal/mca/base/mca_base_var.h + Required to flush monitoring phases +*/ +int mca_base_var_find_by_name (const char *full_name, int *vari); +int mca_base_var_get_value (int vari, const void *value, + void *source, /* should be mca_base_var_source_t *source, + but we do not need it + and we do not know what is mca_base_var_source_t */ + const char **source_file); + + +int main(argc, argv) + int argc; + char **argv; +{ + int rank, size, n, to, from, tagno; + MPI_Status status; + MPI_Comm newcomm; + MPI_Request request; + char filename[1024]; + + + /* first phase : make a token circulated in MPI_COMM_WORLD */ + n = -1; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + to = (rank + 1) % size; + from = (rank - 1) % size; + tagno = 201; + if (rank == 0){ + n=25; + MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request); + } + while (1){ + MPI_Irecv(&n,1,MPI_INT,from,tagno,MPI_COMM_WORLD, &request); + MPI_Wait(&request,&status); + if (rank == 0) {n--;tagno++;} + MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD, &request); + if (rank != 0) {n--;tagno++;} + if (n<0){ + break; + } + } + + + /* flush the monitoring of the first phase */ + int fctidx; + void* fct; + int (*flush_monitoring)(char*) = NULL; + /* + Get the function pointer of the flushing function of the monitoring + This uses Opal low level interface + */ + mca_base_var_find_by_name( "pml_monitoring_flush", &fctidx); + if(fctidx){ + mca_base_var_get_value(fctidx, &fct, NULL, NULL); + flush_monitoring = *(unsigned long*)fct; + } + /* Build one file per processes + Evevry thing that has been monitored by each + process since the last flush will be output in filename*/ + + /* + Requires directory prof to be created. + Filename format should display the phase number + and the process rank for ease of parsing with + aggregate_profile.pl script + */ + sprintf(filename,"./prof/phase_1_%d.prof",rank); + if(flush_monitoring){ + int r = flush_monitoring(filename); + if(r == -1){ + fprintf(stderr, "Process %d cannot save monitoring in %s\n", rank, filename); + } + } + + /* + Second phase. Work with different communicators. + even ranls will circulate a token + while odd ranks wil perform a all_to_all + */ + MPI_Comm_split(MPI_COMM_WORLD,rank%2,rank,&newcomm); + + /* the filename for flushing monitoring now uses 2 as phase number! */ + sprintf(filename,"./prof/phase_2_%d.prof",rank); + + + if(rank%2){ /*even ranks (in COMM_WORD) circulate a token*/ + int old_rank=rank; + MPI_Comm_rank(newcomm,&rank); + MPI_Comm_size(newcomm,&size); + if( size > 1 ) { + to = (rank + 1) % size;; + from = (rank - 1) % size ; + tagno = 201; + if (rank == 0){ + n=50; + MPI_Send(&n,1,MPI_INT,to,tagno,newcomm); + } + while (1){ + MPI_Recv(&n,1,MPI_INT,from,tagno,newcomm, &status); + if (rank == 0) {n--;tagno++;} + MPI_Send(&n,1,MPI_INT,to,tagno,newcomm); + if (rank != 0) {n--;tagno++;} + if (n<0){ + if(flush_monitoring){ + int r = flush_monitoring(filename); + if(r == -1){ + fprintf(stderr, "Process %d cannot save monitoring in %s\n", old_rank, filename); + } + } + break; + } + } + } + }else{ /*odd ranks (in COMM_WORD) will perform a all_to_all and a barrier*/ + int send_buff[10240]; + int recv_buff[10240]; + MPI_Comm_rank(newcomm,&rank); + MPI_Comm_size(newcomm,&size); + MPI_Alltoall(send_buff,10240/size, MPI_INT,recv_buff,10240/size,MPI_INT,newcomm); + MPI_Comm_split(newcomm,rank%2,rank,&newcomm); + MPI_Barrier(newcomm); + if(flush_monitoring){ + int r = flush_monitoring(filename); + if(r == -1){ + fprintf(stderr, "Process %d cannot save monitoring in %s\n", rank, filename); + } + } + } + + /* Now, in MPI_Finalize(), the pml_monitoring library outputs, in STDERR, the aggregated recorded monitoring of all the phases*/ + MPI_Finalize(); + return 0; +} diff --git a/test/monitoring/prof/phase_1_0.prof b/test/monitoring/prof/phase_1_0.prof new file mode 100644 index 0000000000..500401685b --- /dev/null +++ b/test/monitoring/prof/phase_1_0.prof @@ -0,0 +1 @@ +I 0 1 108 bytes 27 msgs sent diff --git a/test/monitoring/prof/phase_1_1.prof b/test/monitoring/prof/phase_1_1.prof new file mode 100644 index 0000000000..1a314c842c --- /dev/null +++ b/test/monitoring/prof/phase_1_1.prof @@ -0,0 +1 @@ +I 1 2 104 bytes 26 msgs sent diff --git a/test/monitoring/prof/phase_1_2.prof b/test/monitoring/prof/phase_1_2.prof new file mode 100644 index 0000000000..da71c785a6 --- /dev/null +++ b/test/monitoring/prof/phase_1_2.prof @@ -0,0 +1 @@ +I 2 3 104 bytes 26 msgs sent diff --git a/test/monitoring/prof/phase_1_3.prof b/test/monitoring/prof/phase_1_3.prof new file mode 100644 index 0000000000..c2da632079 --- /dev/null +++ b/test/monitoring/prof/phase_1_3.prof @@ -0,0 +1 @@ +I 3 0 104 bytes 26 msgs sent diff --git a/test/monitoring/prof/phase_2_0.prof b/test/monitoring/prof/phase_2_0.prof new file mode 100644 index 0000000000..a74fe68244 --- /dev/null +++ b/test/monitoring/prof/phase_2_0.prof @@ -0,0 +1,2 @@ +I 0 1 20 bytes 4 msgs sent +I 0 2 20528 bytes 9 msgs sent diff --git a/test/monitoring/prof/phase_2_1.prof b/test/monitoring/prof/phase_2_1.prof new file mode 100644 index 0000000000..43328d3520 --- /dev/null +++ b/test/monitoring/prof/phase_2_1.prof @@ -0,0 +1,2 @@ +I 1 0 20 bytes 4 msgs sent +I 1 3 236 bytes 56 msgs sent diff --git a/test/monitoring/prof/phase_2_2.prof b/test/monitoring/prof/phase_2_2.prof new file mode 100644 index 0000000000..5dc7d16ada --- /dev/null +++ b/test/monitoring/prof/phase_2_2.prof @@ -0,0 +1,2 @@ +I 2 0 20528 bytes 9 msgs sent +I 2 3 20 bytes 4 msgs sent diff --git a/test/monitoring/prof/phase_2_3.prof b/test/monitoring/prof/phase_2_3.prof new file mode 100644 index 0000000000..bc2c365b29 --- /dev/null +++ b/test/monitoring/prof/phase_2_3.prof @@ -0,0 +1,2 @@ +I 3 1 232 bytes 55 msgs sent +I 3 2 20 bytes 4 msgs sent diff --git a/test/monitoring/profile2mat.pl b/test/monitoring/profile2mat.pl new file mode 100644 index 0000000000..a6ea6a52bb --- /dev/null +++ b/test/monitoring/profile2mat.pl @@ -0,0 +1,123 @@ +#!/usr/bin/perl -w + +# +# Copyright (c) 2013-2015 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2013-2015 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Author Emmanuel Jeannot +# +# Take a profile file and aggregates all the recorded communicaton into matrices. +# It generated a matrices for teh number of messages, (msg), +# for the total bytes transmitted (size) and +# the average nulber of bytes per messages (avg) +# +# The output matix is symetric +# +# If possible it creates file with "internal" tags (collexctive and eta data), +# "external" tags (point to point messages) and "all" (every messgaes). +# +# ensure that this script as the executable right: chmod +x ... +# + + +if($#ARGV < 0){ + die("Usage: $0 <\".prof\" filename>\n"); +}else{ + $filename=$ARGV[0]; +} + +profile($filename,"I|E","all"); +if ( profile($filename,"E","external") ){ + profile($filename,"I","internal"); +} + +sub profile{ + my $filename= $_[0]; + my $filter= $_[1]; + my $suffix= $_[2]; + my $done = 0; + + $outfile=$filename; + $outfile=~s/\.prof$/_size_$suffix\.mat/; + + + open IN,"<$filename"; + $n=0; + @mat1=(); + @mat2=(); + @mat3=(); + $i=0; + while () { + $i++; + if (($f,$p1,$p2,$s,$m)=/^($filter)\s+(\d+)\s+(\d+)\s+(\d+)\D+(\d+)/){ + $done = 1; + $f++; + #print "$p1 | $p2 | $s | $m\n"; + $mat1[$p1][$p2]+=$s; + $mat1[$p2][$p1]+=$s; + $mat2[$p1][$p2]+=$m; + $mat2[$p2][$p1]+=$m; + $n=$p1 if ($p1>$n); + $n=$p2 if ($p2>$n); + }else { + # print("file $filename line $i: $_\n"); + } + } + close IN; + + #print "$done\n"; + + foreach $i (0..$n) { + foreach $j (0..$n) { + $mat1[$i][$j]+=0; + $mat2[$i][$j]+=0; + $mat1[$i][$j]/=2; + $mat2[$i][$j]/=2; + if ($mat2[$i][$j]){ + $mat3[$i][$j]=$mat1[$i][$j]/$mat2[$i][$j] ; + #printf"%f\t%f\t%f\n",$mat1[$i][$j],$mat2[$i][$j],$mat3[$i][$j]; + }else{ + $mat3[$i][$j]=0; + } + } + } + + + if ($done) { + print "$filename -> $suffix\n"; + save_file($outfile,$n,\@mat1); + $outfile=~s/_size/_msg/; + save_file($outfile,$n,\@mat2); + $outfile=~s/_msg/_avg/; + save_file($outfile,$n,\@mat3); + print"\n"; + } + return $done; +} + + +sub save_file{ + my $outfile=$_[0]; + my $n=$_[1]; + my @mat=@{$_[2]}; + $s=$n+1; + print "$outfile\n"; + open OUT,">$outfile"; + foreach $i (0..$n) { + foreach $j (0..$n) { + printf OUT "%.0f ",$mat[$i][$j]; + } + print OUT "\n"; + } + # print"\n------------\n\n"; + close OUT; +}