SPC: allow counters to be attached solely through MPI_T and reduce overhead
- only make MCA parameters available if SPC is enabled - do not compile SPC code if SPC is disabled - move includes into ompi_spc.c - allow counters to be enabled through MPI_T without setting MCA parameter - inline counter update calls that are likely in the critical path - fix test to succeed even if encountering invalid pvars - move timer_[start|stop] to header and move attachment info into ompi_spc_t There is no need to store the name in the ompi_spc_t struct too, we can use that space for the attachment info instead to avoid accessing another cache line. - make timer/watermark flags a property of the spc description This is meant to making adding counters easier in the future by centralizing the necessary information. By storing a copy of these flags in the ompi_spc_t structure (without adding to its size) reduces cache pollution for timer/watermark events. - allocate ompi_spc_t objects with cache-alignment This prevents objects from spanning multiple cache lines and thus ensures that only one cache line is loaded per update. - fix handling of timer and timer conversion - only call opal_timer_base_get_cycles if necesary to reduce overhead - Remove use of OPAL_UNLIKELY to improve code generated by GCC It appears that GCC makes less effort in optimizing the unlikely path and generates bloated code. - Allocate ompi_spc_events statically to reduce loads in critical path - duplicate comm_world only when dumping is requested Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
Этот коммит содержится в:
родитель
30831fb7f0
Коммит
d11f625ed5
@ -334,9 +334,10 @@ int ompi_mpi_register_params(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mpi_compat_mpi3);
|
||||
|
||||
#if SPC_ENABLE == 1
|
||||
ompi_mpi_spc_attach_string = NULL;
|
||||
(void) mca_base_var_register("ompi", "mpi", NULL, "spc_attach",
|
||||
"A comma delimeted string listing the software-based performance counters (SPCs) to enable.",
|
||||
"A comma-delimeted list of software-based performance counters (SPCs) to enable (\"all\" enables all counters).",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
@ -344,11 +345,12 @@ int ompi_mpi_register_params(void)
|
||||
|
||||
ompi_mpi_spc_dump_enabled = false;
|
||||
(void) mca_base_var_register("ompi", "mpi", NULL, "spc_dump_enabled",
|
||||
"A boolean value for whether (true) or not (false) to enable dumping SPC counters in MPI_Finalize.",
|
||||
"A boolean value for whether (true) or not (false) to enable dumping enabled SPC counters in MPI_Finalize.",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mpi_spc_dump_enabled);
|
||||
#endif // SPC_ENABLE
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2019 The University of Tennessee and The University
|
||||
* Copyright (c) 2018-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
@ -16,11 +16,30 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_spc.h"
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include "ompi/runtime/ompi_spc.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "opal/mca/timer/timer.h"
|
||||
#include "opal/mca/base/mca_base_pvar.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#if SPC_ENABLE == 1
|
||||
|
||||
static opal_timer_t sys_clock_freq_mhz = 0;
|
||||
|
||||
static void ompi_spc_dump(void);
|
||||
static ompi_spc_value_t ompi_spc_cycles_to_usecs_internal(opal_timer_t cycles);
|
||||
|
||||
/* Array for converting from SPC indices to MPI_T indices */
|
||||
static bool mpi_t_enabled = false;
|
||||
@ -29,154 +48,133 @@ static ompi_communicator_t *ompi_spc_comm = NULL;
|
||||
typedef struct ompi_spc_event_t {
|
||||
const char* counter_name;
|
||||
const char* counter_description;
|
||||
bool is_high_watermark;
|
||||
bool is_timer_event;
|
||||
} ompi_spc_event_t;
|
||||
|
||||
#define SET_COUNTER_ARRAY(NAME, DESC) [NAME] = { .counter_name = #NAME, .counter_description = DESC }
|
||||
#define SET_COUNTER_ARRAY(NAME, DESC, HWM, ITE) [NAME] = { .counter_name = #NAME, .counter_description = DESC, \
|
||||
.is_high_watermark = HWM, .is_timer_event = ITE }
|
||||
|
||||
static ompi_spc_event_t ompi_spc_events_names[OMPI_SPC_NUM_COUNTERS] = {
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SEND, "The number of times MPI_Send was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BSEND, "The number of times MPI_Bsend was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RSEND, "The number of times MPI_Rsend was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SSEND, "The number of times MPI_Ssend was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RECV, "The number of times MPI_Recv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_MRECV, "The number of times MPI_Mrecv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISEND, "The number of times MPI_Isend was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IBSEND, "The number of times MPI_Ibsend was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IRSEND, "The number of times MPI_Irsend was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISSEND, "The number of times MPI_Issend was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IRECV, "The number of times MPI_Irecv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV, "The number of times MPI_Sendrecv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV_REPLACE, "The number of times MPI_Sendrecv_replace was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_PUT, "The number of times MPI_Put was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RPUT, "The number of times MPI_Rput was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GET, "The number of times MPI_Get was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RGET, "The number of times MPI_Rget was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_PROBE, "The number of times MPI_Probe was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IPROBE, "The number of times MPI_Iprobe was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BCAST, "The number of times MPI_Bcast was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IBCAST, "The number of times MPI_Ibcast was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BCAST_INIT, "The number of times MPIX_Bcast_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE, "The number of times MPI_Reduce was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER, "The number of times MPI_Reduce_scatter was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK, "The number of times MPI_Reduce_scatter_block was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE, "The number of times MPI_Ireduce was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER, "The number of times MPI_Ireduce_scatter was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER_BLOCK, "The number of times MPI_Ireduce_scatter_block was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_INIT, "The number of times MPIX_Reduce_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_INIT, "The number of times MPIX_Reduce_scatter_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK_INIT, "The number of times MPIX_Reduce_scatter_block_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE, "The number of times MPI_Allreduce was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLREDUCE, "The number of times MPI_Iallreduce was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE_INIT, "The number of times MPIX_Allreduce_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCAN, "The number of times MPI_Scan was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN, "The number of times MPI_Exscan was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISCAN, "The number of times MPI_Iscan was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IEXSCAN, "The number of times MPI_Iexscan was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCAN_INIT, "The number of times MPIX_Scan_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN_INIT, "The number of times MPIX_Exscan_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTER, "The number of times MPI_Scatter was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV, "The number of times MPI_Scatterv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISCATTER, "The number of times MPI_Iscatter was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISCATTERV, "The number of times MPI_Iscatterv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTER_INIT, "The number of times MPIX_Scatter_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV_INIT, "The number of times MPIX_Scatterv_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHER, "The number of times MPI_Gather was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHERV, "The number of times MPI_Gatherv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IGATHER, "The number of times MPI_Igather was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IGATHERV, "The number of times MPI_Igatherv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHER_INIT, "The number of times MPIX_Gather_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHERV_INIT, "The number of times MPIX_Gatherv_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL, "The number of times MPI_Alltoall was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV, "The number of times MPI_Alltoallv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW, "The number of times MPI_Alltoallw was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALL, "The number of times MPI_Ialltoall was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLV, "The number of times MPI_Ialltoallv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLW, "The number of times MPI_Ialltoallw was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL_INIT, "The number of times MPIX_Alltoall_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV_INIT, "The number of times MPIX_Alltoallv_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW_INIT, "The number of times MPIX_Alltoallw_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL, "The number of times MPI_Neighbor_alltoall was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV, "The number of times MPI_Neighbor_alltoallv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW, "The number of times MPI_Neighbor_alltoallw was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALL, "The number of times MPI_Ineighbor_alltoall was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLV, "The number of times MPI_Ineighbor_alltoallv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLW, "The number of times MPI_Ineighbor_alltoallw was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL_INIT, "The number of times MPIX_Neighbor_alltoall_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV_INIT, "The number of times MPIX_Neighbor_alltoallv_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW_INIT, "The number of times MPIX_Neighbor_alltoallw_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER, "The number of times MPI_Allgather was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV, "The number of times MPI_Allgatherv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHER, "The number of times MPI_Iallgather was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHERV, "The number of times MPI_Iallgatherv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER_INIT, "The number of times MPIX_Allgather_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV_INIT, "The number of times MPIX_Allgatherv_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER, "The number of times MPI_Neighbor_allgather was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV, "The number of times MPI_Neighbor_allgatherv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHER, "The number of times MPI_Ineighbor_allgather was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHERV, "The number of times MPI_Ineighbor_allgatherv was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER_INIT, "The number of times MPIX_Neighbor_allgather_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV_INIT, "The number of times MPIX_Neighbor_allgatherv_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TEST, "The number of times MPI_Test was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TESTALL, "The number of times MPI_Testall was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TESTANY, "The number of times MPI_Testany was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TESTSOME, "The number of times MPI_Testsome was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAIT, "The number of times MPI_Wait was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAITALL, "The number of times MPI_Waitall was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAITANY, "The number of times MPI_Waitany was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAITSOME, "The number of times MPI_Waitsome was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BARRIER, "The number of times MPI_Barrier was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IBARRIER, "The number of times MPI_Ibarrier was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BARRIER_INIT, "The number of times MPIX_Barrier_init was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WTIME, "The number of times MPI_Wtime was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_CANCEL, "The number of times MPI_Cancel was called."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_USER, "The number of bytes received by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_MPI, "The number of bytes received by MPI through collective, control, or other internal communications."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_USER, "The number of bytes sent by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_MPI, "The number of bytes sent by MPI through collective, control, or other internal communications."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_PUT, "The number of bytes sent/received using RMA Put operations both through user-level Put functions and internal Put functions."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_GET, "The number of bytes sent/received using RMA Get operations both through user-level Get functions and internal Get functions."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED, "The number of messages that arrived as unexpected messages."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_OUT_OF_SEQUENCE, "The number of messages that arrived out of the proper sequence."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_MATCH_TIME, "The number of microseconds spent matching unexpected messages. Note: The timer used on the back end is in cycles, which could potentially be problematic on a system where the clock frequency can change. On such a system, this counter could be inaccurate since we assume a fixed clock rate."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED_IN_QUEUE, "The number of messages that are currently in the unexpected message queue(s) of an MPI process."),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_OOS_IN_QUEUE, "The number of messages that are currently in the out of sequence message queue(s) of an MPI process."),
|
||||
static const ompi_spc_event_t ompi_spc_events_desc[OMPI_SPC_NUM_COUNTERS] = {
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SEND, "The number of times MPI_Send was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BSEND, "The number of times MPI_Bsend was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RSEND, "The number of times MPI_Rsend was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SSEND, "The number of times MPI_Ssend was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RECV, "The number of times MPI_Recv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_MRECV, "The number of times MPI_Mrecv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISEND, "The number of times MPI_Isend was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IBSEND, "The number of times MPI_Ibsend was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IRSEND, "The number of times MPI_Irsend was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISSEND, "The number of times MPI_Issend was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IRECV, "The number of times MPI_Irecv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV, "The number of times MPI_Sendrecv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV_REPLACE, "The number of times MPI_Sendrecv_replace was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_PUT, "The number of times MPI_Put was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RPUT, "The number of times MPI_Rput was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GET, "The number of times MPI_Get was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_RGET, "The number of times MPI_Rget was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_PROBE, "The number of times MPI_Probe was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IPROBE, "The number of times MPI_Iprobe was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BCAST, "The number of times MPI_Bcast was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IBCAST, "The number of times MPI_Ibcast was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BCAST_INIT, "The number of times MPIX_Bcast_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE, "The number of times MPI_Reduce was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER, "The number of times MPI_Reduce_scatter was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK, "The number of times MPI_Reduce_scatter_block was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE, "The number of times MPI_Ireduce was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER, "The number of times MPI_Ireduce_scatter was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER_BLOCK, "The number of times MPI_Ireduce_scatter_block was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_INIT, "The number of times MPIX_Reduce_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_INIT, "The number of times MPIX_Reduce_scatter_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK_INIT, "The number of times MPIX_Reduce_scatter_block_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE, "The number of times MPI_Allreduce was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLREDUCE, "The number of times MPI_Iallreduce was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE_INIT, "The number of times MPIX_Allreduce_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCAN, "The number of times MPI_Scan was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN, "The number of times MPI_Exscan was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISCAN, "The number of times MPI_Iscan was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IEXSCAN, "The number of times MPI_Iexscan was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCAN_INIT, "The number of times MPIX_Scan_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN_INIT, "The number of times MPIX_Exscan_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTER, "The number of times MPI_Scatter was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV, "The number of times MPI_Scatterv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISCATTER, "The number of times MPI_Iscatter was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ISCATTERV, "The number of times MPI_Iscatterv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTER_INIT, "The number of times MPIX_Scatter_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV_INIT, "The number of times MPIX_Scatterv_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHER, "The number of times MPI_Gather was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHERV, "The number of times MPI_Gatherv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IGATHER, "The number of times MPI_Igather was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IGATHERV, "The number of times MPI_Igatherv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHER_INIT, "The number of times MPIX_Gather_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_GATHERV_INIT, "The number of times MPIX_Gatherv_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL, "The number of times MPI_Alltoall was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV, "The number of times MPI_Alltoallv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW, "The number of times MPI_Alltoallw was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALL, "The number of times MPI_Ialltoall was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLV, "The number of times MPI_Ialltoallv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLW, "The number of times MPI_Ialltoallw was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL_INIT, "The number of times MPIX_Alltoall_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV_INIT, "The number of times MPIX_Alltoallv_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW_INIT, "The number of times MPIX_Alltoallw_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL, "The number of times MPI_Neighbor_alltoall was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV, "The number of times MPI_Neighbor_alltoallv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW, "The number of times MPI_Neighbor_alltoallw was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALL, "The number of times MPI_Ineighbor_alltoall was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLV, "The number of times MPI_Ineighbor_alltoallv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLW, "The number of times MPI_Ineighbor_alltoallw was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL_INIT, "The number of times MPIX_Neighbor_alltoall_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV_INIT, "The number of times MPIX_Neighbor_alltoallv_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW_INIT, "The number of times MPIX_Neighbor_alltoallw_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER, "The number of times MPI_Allgather was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV, "The number of times MPI_Allgatherv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHER, "The number of times MPI_Iallgather was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHERV, "The number of times MPI_Iallgatherv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER_INIT, "The number of times MPIX_Allgather_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV_INIT, "The number of times MPIX_Allgatherv_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER, "The number of times MPI_Neighbor_allgather was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV, "The number of times MPI_Neighbor_allgatherv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHER, "The number of times MPI_Ineighbor_allgather was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHERV, "The number of times MPI_Ineighbor_allgatherv was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER_INIT, "The number of times MPIX_Neighbor_allgather_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV_INIT, "The number of times MPIX_Neighbor_allgatherv_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TEST, "The number of times MPI_Test was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TESTALL, "The number of times MPI_Testall was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TESTANY, "The number of times MPI_Testany was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_TESTSOME, "The number of times MPI_Testsome was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAIT, "The number of times MPI_Wait was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAITALL, "The number of times MPI_Waitall was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAITANY, "The number of times MPI_Waitany was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WAITSOME, "The number of times MPI_Waitsome was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BARRIER, "The number of times MPI_Barrier was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_IBARRIER, "The number of times MPI_Ibarrier was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BARRIER_INIT, "The number of times MPIX_Barrier_init was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_WTIME, "The number of times MPI_Wtime was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_CANCEL, "The number of times MPI_Cancel was called.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_USER, "The number of bytes received by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_MPI, "The number of bytes received by MPI through collective, control, or other internal communications.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_USER, "The number of bytes sent by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_MPI, "The number of bytes sent by MPI through collective, control, or other internal communications.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_PUT, "The number of bytes sent/received using RMA Put operations both through user-level Put functions and internal Put functions.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_BYTES_GET, "The number of bytes sent/received using RMA Get operations both through user-level Get functions and internal Get functions.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED, "The number of messages that arrived as unexpected messages.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_OUT_OF_SEQUENCE, "The number of messages that arrived out of the proper sequence.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_MATCH_TIME, "The number of microseconds spent matching unexpected messages. Note: The timer used on the back end is in cycles, which could potentially be problematic on a system where the clock frequency can change. On such a system, this counter could be inaccurate since we assume a fixed clock rate.", false, true),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED_IN_QUEUE, "The number of messages that are currently in the unexpected message queue(s) of an MPI process.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_OOS_IN_QUEUE, "The number of messages that are currently in the out of sequence message queue(s) of an MPI process.", false, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_MAX_UNEXPECTED_IN_QUEUE, "The maximum number of messages that the unexpected message queue(s) within an MPI process "
|
||||
"contained at once since the last reset of this counter. Note: This counter is reset each time it is read."),
|
||||
"contained at once since the last reset of this counter. Note: This counter is reset each time it is read.", true, false),
|
||||
SET_COUNTER_ARRAY(OMPI_SPC_MAX_OOS_IN_QUEUE, "The maximum number of messages that the out of sequence message queue(s) within an MPI process "
|
||||
"contained at once since the last reset of this counter. Note: This counter is reset each time it is read.")
|
||||
"contained at once since the last reset of this counter. Note: This counter is reset each time it is read.", true, false)
|
||||
};
|
||||
|
||||
/* An array of integer values to denote whether an event is activated (1) or not (0) */
|
||||
static uint32_t ompi_spc_attached_event[OMPI_SPC_NUM_COUNTERS / sizeof(uint32_t)] = { 0 };
|
||||
/* An array of integer values to denote whether an event is timer-based (1) or not (0) */
|
||||
static uint32_t ompi_spc_timer_event[OMPI_SPC_NUM_COUNTERS / sizeof(uint32_t)] = { 0 };
|
||||
/* An array of event structures to store the event data (name and value) */
|
||||
static ompi_spc_t *ompi_spc_events = NULL;
|
||||
|
||||
static inline void SET_SPC_BIT(uint32_t* array, int32_t pos)
|
||||
{
|
||||
assert(pos < OMPI_SPC_NUM_COUNTERS);
|
||||
array[pos / (8 * sizeof(uint32_t))] |= (1U << (pos % (8 * sizeof(uint32_t))));
|
||||
}
|
||||
|
||||
static inline bool IS_SPC_BIT_SET(uint32_t* array, int32_t pos)
|
||||
{
|
||||
assert(pos < OMPI_SPC_NUM_COUNTERS);
|
||||
return !!(array[pos / (8 * sizeof(uint32_t))] & (1U << (pos % (8 * sizeof(uint32_t)))));
|
||||
}
|
||||
|
||||
static inline void CLEAR_SPC_BIT(uint32_t* array, int32_t pos)
|
||||
{
|
||||
assert(pos < OMPI_SPC_NUM_COUNTERS);
|
||||
array[pos / (8 * sizeof(uint32_t))] &= ~(1U << (pos % (8 * sizeof(uint32_t))));
|
||||
}
|
||||
/* An array of event structures to store the event data (value, attachments, flags) */
|
||||
ompi_spc_t ompi_spc_events[OMPI_SPC_NUM_COUNTERS];
|
||||
|
||||
/* ##############################################################
|
||||
* ################# Begin MPI_T Functions ######################
|
||||
* ##############################################################
|
||||
*/
|
||||
static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count)
|
||||
__opal_attribute_unused__;
|
||||
|
||||
static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count)
|
||||
{
|
||||
@ -197,11 +195,11 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v
|
||||
}
|
||||
/* For this event, we need to turn on the counter */
|
||||
else if(MCA_BASE_PVAR_HANDLE_START == event) {
|
||||
SET_SPC_BIT(ompi_spc_attached_event, index);
|
||||
opal_atomic_fetch_add_32(&ompi_spc_events[index].num_attached, 1);
|
||||
}
|
||||
/* For this event, we need to turn off the counter */
|
||||
else if(MCA_BASE_PVAR_HANDLE_STOP == event) {
|
||||
CLEAR_SPC_BIT(ompi_spc_attached_event, index);
|
||||
opal_atomic_fetch_add_32(&ompi_spc_events[index].num_attached, -1);
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
@ -217,65 +215,61 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v
|
||||
* so we need to convert from MPI_T index to SPC index and then set the 'value' argument
|
||||
* to the correct value for this pvar.
|
||||
*/
|
||||
static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle)
|
||||
__opal_attribute_unused__;
|
||||
|
||||
static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle)
|
||||
{
|
||||
long long *counter_value = (long long*)value;
|
||||
long long *counter_value_ptr = (long long*)value;
|
||||
long long counter_value;
|
||||
|
||||
if(OPAL_LIKELY(!mpi_t_enabled)) {
|
||||
*counter_value = 0;
|
||||
*counter_value_ptr = 0;
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Convert from MPI_T pvar index to SPC index */
|
||||
int index = (int)(uintptr_t)pvar->ctx;
|
||||
/* Set the counter value to the current SPC value */
|
||||
*counter_value = (long long)ompi_spc_events[index].value;
|
||||
counter_value = (long long)ompi_spc_events[index].value;
|
||||
/* If this is a timer-based counter, convert from cycles to microseconds */
|
||||
if( IS_SPC_BIT_SET(ompi_spc_timer_event, index) ) {
|
||||
*counter_value /= sys_clock_freq_mhz;
|
||||
if( ompi_spc_events[index].is_timer_event ) {
|
||||
counter_value /= sys_clock_freq_mhz;
|
||||
}
|
||||
/* If this is a high watermark counter, reset it after it has been read */
|
||||
if(index == OMPI_SPC_MAX_UNEXPECTED_IN_QUEUE || index == OMPI_SPC_MAX_OOS_IN_QUEUE) {
|
||||
if(ompi_spc_events[index].is_high_watermark) {
|
||||
ompi_spc_events[index].value = 0;
|
||||
}
|
||||
|
||||
*counter_value_ptr = counter_value;
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initializes the events data structure and allocates memory for it if needed. */
|
||||
void ompi_spc_events_init(void)
|
||||
/* Allocate and initializes the events data structure. */
|
||||
static void ompi_spc_events_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* If the events data structure hasn't been allocated yet, allocate memory for it */
|
||||
if(NULL == ompi_spc_events) {
|
||||
ompi_spc_events = (ompi_spc_t*)malloc(OMPI_SPC_NUM_COUNTERS * sizeof(ompi_spc_t));
|
||||
if(ompi_spc_events == NULL) {
|
||||
opal_show_help("help-mpi-runtime.txt", "lib-call-fail", true,
|
||||
"malloc", __FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* The data structure has been allocated, so we simply initialize all of the counters
|
||||
* with their names and an initial count of 0.
|
||||
/* Initialize all of the counters with an initial count of 0.
|
||||
* Also copy over the flags for faster access later.
|
||||
*/
|
||||
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
|
||||
ompi_spc_events[i].name = (char*)ompi_spc_events_names[i].counter_name;
|
||||
ompi_spc_events[i].value = 0;
|
||||
ompi_spc_events[i].num_attached = 0;
|
||||
ompi_spc_events[i].is_high_watermark = ompi_spc_events_desc[i].is_high_watermark;
|
||||
ompi_spc_events[i].is_timer_event = ompi_spc_events_desc[i].is_timer_event;
|
||||
}
|
||||
|
||||
ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm);
|
||||
if (ompi_mpi_spc_dump_enabled) {
|
||||
ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm);
|
||||
}
|
||||
}
|
||||
|
||||
/* Initializes the SPC data structures and registers all counters as MPI_T pvars.
|
||||
* Turns on only the counters that were specified in the mpi_spc_attach MCA parameter.
|
||||
/*
|
||||
* Initializes the SPC events infrastructure.
|
||||
* Registers all counters requested through the MCA parameter mpi_spc_attach as MPI_T pvars.
|
||||
*/
|
||||
void ompi_spc_init(void)
|
||||
{
|
||||
int i, j, ret, found = 0, all_on = 0, matched = 0;
|
||||
int i, j, ret, all_on = 0, matched = 0;
|
||||
|
||||
/* Initialize the clock frequency variable as the CPU's frequency in MHz */
|
||||
sys_clock_freq_mhz = opal_timer_base_get_freq() / 1000000;
|
||||
@ -295,15 +289,16 @@ void ompi_spc_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* enable mpi_t and only disable if something goes wrong */
|
||||
mpi_t_enabled = true;
|
||||
|
||||
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
|
||||
/* Reset all timer-based counters */
|
||||
CLEAR_SPC_BIT(ompi_spc_timer_event, i);
|
||||
matched = all_on;
|
||||
|
||||
if( !matched ) {
|
||||
/* Turn on only the counters that were specified in the MCA parameter */
|
||||
for(j = 0; j < num_args; j++) {
|
||||
if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) {
|
||||
if( 0 == strcmp(ompi_spc_events_desc[i].counter_name, arg_strings[j]) ) {
|
||||
matched = 1;
|
||||
break;
|
||||
}
|
||||
@ -311,16 +306,14 @@ void ompi_spc_init(void)
|
||||
}
|
||||
|
||||
if (matched) {
|
||||
SET_SPC_BIT(ompi_spc_attached_event, i);
|
||||
mpi_t_enabled = true;
|
||||
found++;
|
||||
opal_atomic_fetch_add_32(&ompi_spc_events[i].num_attached, 1);
|
||||
}
|
||||
|
||||
/* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */
|
||||
ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description,
|
||||
ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_desc[i].counter_name, ompi_spc_events_desc[i].counter_description,
|
||||
OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE,
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT,
|
||||
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
|
||||
MCA_BASE_PVAR_FLAG_READONLY,
|
||||
ompi_spc_get_count, NULL, ompi_spc_notify, (void*)(uintptr_t)i);
|
||||
if( ret < 0 ) {
|
||||
mpi_t_enabled = false;
|
||||
@ -329,9 +322,6 @@ void ompi_spc_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* If this is a timer event, set the corresponding timer_event entry */
|
||||
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME);
|
||||
|
||||
opal_argv_free(arg_strings);
|
||||
}
|
||||
|
||||
@ -348,8 +338,8 @@ static void ompi_spc_dump(void)
|
||||
|
||||
/* Convert from cycles to usecs before sending */
|
||||
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
|
||||
if( IS_SPC_BIT_SET(ompi_spc_timer_event, i) ) {
|
||||
SPC_CYCLES_TO_USECS(&ompi_spc_events[i].value);
|
||||
if( ompi_spc_events[i].is_timer_event ) {
|
||||
ompi_spc_events[i].value = ompi_spc_cycles_to_usecs_internal(ompi_spc_events[i].value);
|
||||
}
|
||||
}
|
||||
|
||||
@ -383,11 +373,10 @@ static void ompi_spc_dump(void)
|
||||
for(j = 0; j < world_size; j++) {
|
||||
opal_output(0, "MPI_COMM_WORLD Rank %d:\n", j);
|
||||
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
|
||||
/* If this is a timer-based counter, we need to covert from cycles to usecs */
|
||||
if( 0 == recv_buffer[offset+i] ) {
|
||||
continue;
|
||||
}
|
||||
opal_output(0, "%s -> %lld\n", ompi_spc_events[i].name, recv_buffer[offset+i]);
|
||||
opal_output(0, "%s -> %lld\n", ompi_spc_events_desc[i].counter_name, recv_buffer[offset+i]);
|
||||
}
|
||||
opal_output(0, "\n");
|
||||
offset += OMPI_SPC_NUM_COUNTERS;
|
||||
@ -406,79 +395,26 @@ static void ompi_spc_dump(void)
|
||||
/* Frees any dynamically alocated OMPI SPC data structures */
|
||||
void ompi_spc_fini(void)
|
||||
{
|
||||
if (SPC_ENABLE == 1 && ompi_mpi_spc_dump_enabled) {
|
||||
if (ompi_mpi_spc_dump_enabled) {
|
||||
ompi_spc_dump();
|
||||
}
|
||||
|
||||
free(ompi_spc_events); ompi_spc_events = NULL;
|
||||
ompi_comm_free(&ompi_spc_comm);
|
||||
}
|
||||
|
||||
/* Records an update to a counter using an atomic add operation. */
|
||||
void ompi_spc_record(unsigned int event_id, ompi_spc_value_t value)
|
||||
{
|
||||
/* Denoted unlikely because counters will often be turned off. */
|
||||
if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, event_id)) ) {
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&(ompi_spc_events[event_id].value), value);
|
||||
}
|
||||
}
|
||||
|
||||
/* Starts cycle-precision timer and stores the start value in the 'cycles' argument.
|
||||
* Note: This assumes that the 'cycles' argument is initialized to 0 if the timer
|
||||
* hasn't been started yet.
|
||||
*/
|
||||
void ompi_spc_timer_start(unsigned int event_id, opal_timer_t *cycles)
|
||||
{
|
||||
/* Check whether cycles == 0.0 to make sure the timer hasn't started yet.
|
||||
* This is denoted unlikely because the counters will often be turned off.
|
||||
*/
|
||||
if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, event_id) && *cycles == 0) ) {
|
||||
*cycles = opal_timer_base_get_cycles();
|
||||
}
|
||||
}
|
||||
|
||||
/* Stops a cycle-precision timer and calculates the total elapsed time
|
||||
* based on the starting time in 'cycles' and stores the result in the
|
||||
* 'cycles' argument.
|
||||
*/
|
||||
void ompi_spc_timer_stop(unsigned int event_id, opal_timer_t *cycles)
|
||||
{
|
||||
/* This is denoted unlikely because the counters will often be turned off. */
|
||||
if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, event_id)) ) {
|
||||
*cycles = opal_timer_base_get_cycles() - *cycles;
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&ompi_spc_events[event_id].value, (size_t) *cycles);
|
||||
}
|
||||
}
|
||||
|
||||
/* Checks a tag, and records the user version of the counter if it's greater
|
||||
* than or equal to 0 and records the mpi version of the counter otherwise.
|
||||
*/
|
||||
void ompi_spc_user_or_mpi(int tag, ompi_spc_value_t value, unsigned int user_enum, unsigned int mpi_enum)
|
||||
{
|
||||
SPC_RECORD( (tag >= 0 ? user_enum : mpi_enum), value);
|
||||
}
|
||||
|
||||
/* Checks whether the counter denoted by value_enum exceeds the current value of the
|
||||
* counter denoted by watermark_enum, and if so sets the watermark_enum counter to the
|
||||
* value of the value_enum counter.
|
||||
*/
|
||||
void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_enum)
|
||||
{
|
||||
/* Denoted unlikely because counters will often be turned off. */
|
||||
if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, watermark_enum) &&
|
||||
IS_SPC_BIT_SET(ompi_spc_attached_event, value_enum)) ) {
|
||||
/* WARNING: This assumes that this function was called while a lock has already been taken.
|
||||
* This function is NOT thread safe otherwise!
|
||||
*/
|
||||
if(ompi_spc_events[value_enum].value > ompi_spc_events[watermark_enum].value) {
|
||||
ompi_spc_events[watermark_enum].value = ompi_spc_events[value_enum].value;
|
||||
}
|
||||
ompi_comm_free(&ompi_spc_comm);
|
||||
}
|
||||
}
|
||||
|
||||
/* Converts a counter value that is in cycles to microseconds.
|
||||
* Internal helper function that can be inlined.
|
||||
*/
|
||||
void ompi_spc_cycles_to_usecs(ompi_spc_value_t *cycles)
|
||||
static inline
|
||||
ompi_spc_value_t ompi_spc_cycles_to_usecs_internal(opal_timer_t cycles)
|
||||
{
|
||||
*cycles = *cycles / sys_clock_freq_mhz;
|
||||
return (cycles / sys_clock_freq_mhz);
|
||||
}
|
||||
|
||||
/* Converts a counter value that is in cycles to microseconds.
|
||||
*/
|
||||
void ompi_spc_cycles_to_usecs(opal_timer_t *cycles)
|
||||
{
|
||||
*cycles = ompi_spc_cycles_to_usecs_internal(*cycles);
|
||||
}
|
||||
|
||||
#endif // SPC_ENABLE
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The University of Tennessee and The University
|
||||
* Copyright (c) 2018-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
@ -15,19 +15,11 @@
|
||||
#ifndef OMPI_SPC
|
||||
#define OMPI_SPC
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <dlfcn.h>
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "opal/mca/timer/timer.h"
|
||||
#include "opal/mca/base/mca_base_pvar.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/include/opal/prefetch.h"
|
||||
#include "opal/mca/threads/thread_usage.h"
|
||||
|
||||
#include MCA_timer_IMPLEMENTATION_HEADER
|
||||
|
||||
@ -166,31 +158,31 @@ typedef enum ompi_spc_counters {
|
||||
/* There is currently no support for atomics on long long values so we will default to
|
||||
* size_t for now until support for such atomics is implemented.
|
||||
*/
|
||||
typedef opal_atomic_size_t ompi_spc_value_t;
|
||||
typedef long long ompi_spc_value_t;
|
||||
|
||||
/* A structure for storing the event data */
|
||||
typedef struct ompi_spc_s{
|
||||
char *name;
|
||||
ompi_spc_value_t value;
|
||||
opal_atomic_int64_t value;
|
||||
opal_atomic_int32_t num_attached;
|
||||
bool is_high_watermark;
|
||||
bool is_timer_event;
|
||||
} ompi_spc_t;
|
||||
|
||||
/* Events data structure initialization function */
|
||||
void ompi_spc_events_init(void);
|
||||
/* Definitions for using the SPC utility functions throughout the codebase.
|
||||
* If SPC_ENABLE is not 1, the macros become no-ops.
|
||||
*/
|
||||
#if SPC_ENABLE == 1
|
||||
|
||||
/* OMPI SPC utility functions */
|
||||
void ompi_spc_init(void);
|
||||
void ompi_spc_fini(void);
|
||||
void ompi_spc_record(unsigned int event_id, ompi_spc_value_t value);
|
||||
void ompi_spc_timer_start(unsigned int event_id, opal_timer_t *cycles);
|
||||
void ompi_spc_timer_stop(unsigned int event_id, opal_timer_t *cycles);
|
||||
void ompi_spc_user_or_mpi(int tag, ompi_spc_value_t value, unsigned int user_enum, unsigned int mpi_enum);
|
||||
void ompi_spc_cycles_to_usecs(ompi_spc_value_t *cycles);
|
||||
void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_enum);
|
||||
void ompi_spc_cycles_to_usecs(opal_timer_t *cycles);
|
||||
|
||||
/* Macros for using the SPC utility functions throughout the codebase.
|
||||
* If SPC_ENABLE is not 1, the macros become no-ops.
|
||||
/* An array of event structures to store the event data value, attachments, flags)
|
||||
* The memory is statically allocated to reduce the number of loads required.
|
||||
*/
|
||||
#if SPC_ENABLE == 1
|
||||
OPAL_DECLSPEC extern
|
||||
ompi_spc_t ompi_spc_events[OMPI_SPC_NUM_COUNTERS] __opal_attribute_aligned__(sizeof(ompi_spc_t));
|
||||
|
||||
#define SPC_INIT() \
|
||||
ompi_spc_init()
|
||||
@ -216,6 +208,79 @@ void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_e
|
||||
#define SPC_UPDATE_WATERMARK(watermark_enum, value_enum) \
|
||||
ompi_spc_update_watermark(watermark_enum, value_enum)
|
||||
|
||||
|
||||
/* Records an update to a counter using an atomic add operation. */
|
||||
static inline
|
||||
void ompi_spc_record(unsigned int event_id, ompi_spc_value_t value)
|
||||
{
|
||||
/* Denoted unlikely because counters will often be turned off. */
|
||||
if( ompi_spc_events[event_id].num_attached > 0 ) {
|
||||
OPAL_THREAD_ADD_FETCH64(&(ompi_spc_events[event_id].value), value);
|
||||
}
|
||||
}
|
||||
|
||||
/* Checks a tag, and records the user version of the counter if it's greater
|
||||
* than or equal to 0 and records the mpi version of the counter otherwise.
|
||||
*/
|
||||
static inline
|
||||
void ompi_spc_user_or_mpi(int tag, ompi_spc_value_t value, unsigned int user_enum, unsigned int mpi_enum)
|
||||
{
|
||||
ompi_spc_record( (tag >= 0 ? user_enum : mpi_enum), value);
|
||||
}
|
||||
|
||||
/* Checks whether the counter denoted by value_enum exceeds the current value of the
|
||||
* counter denoted by watermark_enum, and if so sets the watermark_enum counter to the
|
||||
* value of the value_enum counter.
|
||||
*/
|
||||
static inline
|
||||
void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_enum)
|
||||
{
|
||||
ompi_spc_t *watermark_event = &ompi_spc_events[watermark_enum];
|
||||
ompi_spc_t *value_event = &ompi_spc_events[value_enum];
|
||||
/* Denoted unlikely because counters will often be turned off. */
|
||||
if( watermark_event->num_attached &&
|
||||
value_event->num_attached ) {
|
||||
int64_t watermark = watermark_event->value;
|
||||
int64_t value = watermark_event->value;
|
||||
/* Try to atomically replace the watermark while the value is larger
|
||||
* (i.e, while no thread has replaced it with a larger value, including this thread) */
|
||||
while (value > watermark &&
|
||||
!OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64(&watermark_event->value,
|
||||
&watermark, value))
|
||||
{ }
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Starts cycle-precision timer and stores the start value in the 'cycles' argument.
|
||||
* The value is always stored in 'cycles' to avoid race conditions with other threads
|
||||
* activating a previously inactive timer counter in between start and stop.
|
||||
*/
|
||||
static inline
|
||||
void ompi_spc_timer_start(unsigned int event_id, opal_timer_t *cycles)
|
||||
{
|
||||
(void)event_id; /* unused */
|
||||
*cycles = 0;
|
||||
|
||||
if( (ompi_spc_events[event_id].num_attached > 0) ) {
|
||||
*cycles = opal_timer_base_get_cycles();
|
||||
}
|
||||
}
|
||||
|
||||
/* Stops a cycle-precision timer and calculates the total elapsed time
|
||||
* based on the starting time in 'cycles' and stores the result in the
|
||||
* 'cycles' argument.
|
||||
*/
|
||||
static inline
|
||||
void ompi_spc_timer_stop(unsigned int event_id, opal_timer_t *cycles)
|
||||
{
|
||||
if( ompi_spc_events[event_id].num_attached > 0 && *cycles > 0 ) {
|
||||
*cycles = opal_timer_base_get_cycles() - *cycles;
|
||||
OPAL_THREAD_ADD_FETCH64(&ompi_spc_events[event_id].value, *cycles);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else /* SPCs are not enabled */
|
||||
|
||||
#define SPC_INIT() \
|
||||
|
@ -74,6 +74,10 @@ int main(int argc, char **argv)
|
||||
MPI_result = MPI_T_pvar_get_info(i, name, &name_len, &verbosity,
|
||||
&var_class, &datatype, &enumtype, description, &desc_len, &bind,
|
||||
&readonly, &continuous, &atomic);
|
||||
if (MPI_result == MPI_T_ERR_INVALID) {
|
||||
// skip invalidated MPI_T pvars
|
||||
continue;
|
||||
}
|
||||
if(MPI_result != MPI_SUCCESS || MPI_result == MPI_T_ERR_PVAR_NO_STARTSTOP) {
|
||||
fprintf(stderr, "Failed to get pvar info.\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, MPI_result);
|
||||
@ -82,6 +86,7 @@ int main(int argc, char **argv)
|
||||
if(strcmp(name, counter_names[rank]) == 0) {
|
||||
index = i;
|
||||
printf("[%d] %s -> %s\n", rank, name, description);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user