1
1
Use the PVAR ctx to save the SPC index, so that no lookup nor
restriction on the SPC vars position is imposed.
Make sure the PVAR are always registered.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2019-05-17 15:32:17 -04:00
родитель cadf315ca9
Коммит dbf89404d7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 09C926752C9F09B1

Просмотреть файл

@ -1,11 +1,13 @@
/*
* Copyright (c) 2018 The University of Tennessee and The University
* Copyright (c) 2018-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2019 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -20,10 +22,8 @@ opal_timer_t sys_clock_freq_mhz = 0;
static void ompi_spc_dump(void);
/* Array for converting from SPC indices to MPI_T indices */
OMPI_DECLSPEC int mpi_t_offset = -1;
OMPI_DECLSPEC bool mpi_t_enabled = false;
OPAL_DECLSPEC ompi_communicator_t *comm = NULL;
static bool mpi_t_enabled = false;
static ompi_communicator_t *ompi_spc_comm = NULL;
typedef struct ompi_spc_event_t {
const char* counter_name;
@ -185,6 +185,8 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v
return MPI_SUCCESS;
}
index = (int)(uintptr_t)pvar->ctx; /* Convert from MPI_T pvar index to SPC index */
/* For this event, we need to set count to the number of long long type
* values for this counter. All SPC counters are one long long, so we
* always set count to 1.
@ -194,14 +196,10 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v
}
/* For this event, we need to turn on the counter */
else if(MCA_BASE_PVAR_HANDLE_START == event) {
/* Convert from MPI_T pvar index to SPC index */
index = pvar->pvar_index - mpi_t_offset;
SET_SPC_BIT(ompi_spc_attached_event, index);
}
/* For this event, we need to turn off the counter */
else if(MCA_BASE_PVAR_HANDLE_STOP == event) {
/* Convert from MPI_T pvar index to SPC index */
index = pvar->pvar_index - mpi_t_offset;
CLEAR_SPC_BIT(ompi_spc_attached_event, index);
}
@ -231,7 +229,7 @@ static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, v
}
/* Convert from MPI_T pvar index to SPC index */
int index = pvar->pvar_index - mpi_t_offset;
int index = (int)(uintptr_t)pvar->ctx;
/* Set the counter value to the current SPC value */
*counter_value = (long long)ompi_spc_events[index].value;
/* If this is a timer-based counter, convert from cycles to microseconds */
@ -268,7 +266,7 @@ void ompi_spc_events_init(void)
ompi_spc_events[i].value = 0;
}
ompi_comm_dup(&ompi_mpi_comm_world.comm, &comm);
ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm);
}
/* Initializes the SPC data structures and registers all counters as MPI_T pvars.
@ -287,14 +285,6 @@ void ompi_spc_init(void)
char **arg_strings = opal_argv_split(ompi_mpi_spc_attach_string, ',');
int num_args = opal_argv_count(arg_strings);
/* Reset all timer-based counters */
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
CLEAR_SPC_BIT(ompi_spc_timer_event, i);
}
/* If this is a timer event, set the corresponding timer_event entry */
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME);
/* If there is only one argument and it is 'all', then all counters
* should be turned on. If the size is 0, then no counters will be enabled.
*/
@ -304,47 +294,43 @@ void ompi_spc_init(void)
}
}
/* Turn on only the counters that were specified in the MCA parameter */
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
if(all_on) {
found++;
} else {
matched = 0;
/* Note: If no arguments were given, this will be skipped */
/* Reset all timer-based counters */
CLEAR_SPC_BIT(ompi_spc_timer_event, i);
matched = all_on;
if( !matched ) {
/* Turn on only the counters that were specified in the MCA parameter */
for(j = 0; j < num_args; j++) {
if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) {
found++;
matched = 1;
break;
}
}
}
if (all_on || matched) {
if (matched) {
SET_SPC_BIT(ompi_spc_attached_event, i);
mpi_t_enabled = true;
found++;
}
/* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */
ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description,
OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
ompi_spc_get_count, NULL, ompi_spc_notify, NULL);
/* Check to make sure that ret is a valid index and not an error code */
if( ret >= 0 ) {
if( mpi_t_offset == -1 ) {
mpi_t_offset = ret;
}
}
if( (ret < 0) || (ret != (mpi_t_offset + found - 1)) ) {
mpi_t_enabled = false;
opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true);
break;
}
/* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */
ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description,
OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
ompi_spc_get_count, NULL, ompi_spc_notify, (void*)(uintptr_t)i);
if( ret < 0 ) {
mpi_t_enabled = false;
opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true);
break;
}
}
/* If this is a timer event, set the corresponding timer_event entry */
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME);
opal_argv_free(arg_strings);
}
@ -356,8 +342,8 @@ static void ompi_spc_dump(void)
int i, j, world_size, offset;
long long *recv_buffer = NULL, *send_buffer;
int rank = ompi_comm_rank(comm);
world_size = ompi_comm_size(comm);
int rank = ompi_comm_rank(ompi_spc_comm);
world_size = ompi_comm_size(ompi_spc_comm);
/* Convert from cycles to usecs before sending */
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
@ -384,10 +370,10 @@ static void ompi_spc_dump(void)
return;
}
}
(void)comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG,
(void)ompi_spc_comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG,
recv_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG,
0, comm,
comm->c_coll->coll_gather_module);
0, ompi_spc_comm,
ompi_spc_comm->c_coll->coll_gather_module);
/* Once rank 0 has all of the information, print the aggregated counter values for each rank in order */
if(rank == 0) {
@ -413,7 +399,7 @@ static void ompi_spc_dump(void)
}
free(send_buffer);
comm->c_coll->coll_barrier(comm, comm->c_coll->coll_barrier_module);
ompi_spc_comm->c_coll->coll_barrier(ompi_spc_comm, ompi_spc_comm->c_coll->coll_barrier_module);
}
/* Frees any dynamically alocated OMPI SPC data structures */
@ -424,7 +410,7 @@ void ompi_spc_fini(void)
}
free(ompi_spc_events); ompi_spc_events = NULL;
ompi_comm_free(&comm);
ompi_comm_free(&ompi_spc_comm);
}
/* Records an update to a counter using an atomic add operation. */