1
1
Use the PVAR ctx to save the SPC index, so that no lookup nor
restriction on the SPC vars position is imposed.
Make sure the PVAR are always registered.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2019-05-17 15:32:17 -04:00
родитель cadf315ca9
Коммит dbf89404d7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 09C926752C9F09B1

Просмотреть файл

@ -1,11 +1,13 @@
/* /*
* Copyright (c) 2018 The University of Tennessee and The University * Copyright (c) 2018-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* *
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2018 Research Organization for Information Science * Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2019 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -20,10 +22,8 @@ opal_timer_t sys_clock_freq_mhz = 0;
static void ompi_spc_dump(void); static void ompi_spc_dump(void);
/* Array for converting from SPC indices to MPI_T indices */ /* Array for converting from SPC indices to MPI_T indices */
OMPI_DECLSPEC int mpi_t_offset = -1; static bool mpi_t_enabled = false;
OMPI_DECLSPEC bool mpi_t_enabled = false; static ompi_communicator_t *ompi_spc_comm = NULL;
OPAL_DECLSPEC ompi_communicator_t *comm = NULL;
typedef struct ompi_spc_event_t { typedef struct ompi_spc_event_t {
const char* counter_name; const char* counter_name;
@ -185,6 +185,8 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v
return MPI_SUCCESS; return MPI_SUCCESS;
} }
index = (int)(uintptr_t)pvar->ctx; /* Convert from MPI_T pvar index to SPC index */
/* For this event, we need to set count to the number of long long type /* For this event, we need to set count to the number of long long type
* values for this counter. All SPC counters are one long long, so we * values for this counter. All SPC counters are one long long, so we
* always set count to 1. * always set count to 1.
@ -194,14 +196,10 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v
} }
/* For this event, we need to turn on the counter */ /* For this event, we need to turn on the counter */
else if(MCA_BASE_PVAR_HANDLE_START == event) { else if(MCA_BASE_PVAR_HANDLE_START == event) {
/* Convert from MPI_T pvar index to SPC index */
index = pvar->pvar_index - mpi_t_offset;
SET_SPC_BIT(ompi_spc_attached_event, index); SET_SPC_BIT(ompi_spc_attached_event, index);
} }
/* For this event, we need to turn off the counter */ /* For this event, we need to turn off the counter */
else if(MCA_BASE_PVAR_HANDLE_STOP == event) { else if(MCA_BASE_PVAR_HANDLE_STOP == event) {
/* Convert from MPI_T pvar index to SPC index */
index = pvar->pvar_index - mpi_t_offset;
CLEAR_SPC_BIT(ompi_spc_attached_event, index); CLEAR_SPC_BIT(ompi_spc_attached_event, index);
} }
@ -231,7 +229,7 @@ static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, v
} }
/* Convert from MPI_T pvar index to SPC index */ /* Convert from MPI_T pvar index to SPC index */
int index = pvar->pvar_index - mpi_t_offset; int index = (int)(uintptr_t)pvar->ctx;
/* Set the counter value to the current SPC value */ /* Set the counter value to the current SPC value */
*counter_value = (long long)ompi_spc_events[index].value; *counter_value = (long long)ompi_spc_events[index].value;
/* If this is a timer-based counter, convert from cycles to microseconds */ /* If this is a timer-based counter, convert from cycles to microseconds */
@ -268,7 +266,7 @@ void ompi_spc_events_init(void)
ompi_spc_events[i].value = 0; ompi_spc_events[i].value = 0;
} }
ompi_comm_dup(&ompi_mpi_comm_world.comm, &comm); ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm);
} }
/* Initializes the SPC data structures and registers all counters as MPI_T pvars. /* Initializes the SPC data structures and registers all counters as MPI_T pvars.
@ -287,14 +285,6 @@ void ompi_spc_init(void)
char **arg_strings = opal_argv_split(ompi_mpi_spc_attach_string, ','); char **arg_strings = opal_argv_split(ompi_mpi_spc_attach_string, ',');
int num_args = opal_argv_count(arg_strings); int num_args = opal_argv_count(arg_strings);
/* Reset all timer-based counters */
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
CLEAR_SPC_BIT(ompi_spc_timer_event, i);
}
/* If this is a timer event, set the corresponding timer_event entry */
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME);
/* If there is only one argument and it is 'all', then all counters /* If there is only one argument and it is 'all', then all counters
* should be turned on. If the size is 0, then no counters will be enabled. * should be turned on. If the size is 0, then no counters will be enabled.
*/ */
@ -304,47 +294,43 @@ void ompi_spc_init(void)
} }
} }
/* Turn on only the counters that were specified in the MCA parameter */
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
if(all_on) { /* Reset all timer-based counters */
found++; CLEAR_SPC_BIT(ompi_spc_timer_event, i);
} else { matched = all_on;
matched = 0;
/* Note: If no arguments were given, this will be skipped */ if( !matched ) {
/* Turn on only the counters that were specified in the MCA parameter */
for(j = 0; j < num_args; j++) { for(j = 0; j < num_args; j++) {
if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) { if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) {
found++;
matched = 1; matched = 1;
break; break;
} }
} }
} }
if (all_on || matched) { if (matched) {
SET_SPC_BIT(ompi_spc_attached_event, i); SET_SPC_BIT(ompi_spc_attached_event, i);
mpi_t_enabled = true; mpi_t_enabled = true;
found++;
}
/* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */
ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description, ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description,
OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
ompi_spc_get_count, NULL, ompi_spc_notify, NULL); ompi_spc_get_count, NULL, ompi_spc_notify, (void*)(uintptr_t)i);
if( ret < 0 ) {
/* Check to make sure that ret is a valid index and not an error code */ mpi_t_enabled = false;
if( ret >= 0 ) { opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true);
if( mpi_t_offset == -1 ) { break;
mpi_t_offset = ret;
}
}
if( (ret < 0) || (ret != (mpi_t_offset + found - 1)) ) {
mpi_t_enabled = false;
opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true);
break;
}
} }
} }
/* If this is a timer event, set the corresponding timer_event entry */
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME);
opal_argv_free(arg_strings); opal_argv_free(arg_strings);
} }
@ -356,8 +342,8 @@ static void ompi_spc_dump(void)
int i, j, world_size, offset; int i, j, world_size, offset;
long long *recv_buffer = NULL, *send_buffer; long long *recv_buffer = NULL, *send_buffer;
int rank = ompi_comm_rank(comm); int rank = ompi_comm_rank(ompi_spc_comm);
world_size = ompi_comm_size(comm); world_size = ompi_comm_size(ompi_spc_comm);
/* Convert from cycles to usecs before sending */ /* Convert from cycles to usecs before sending */
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
@ -384,10 +370,10 @@ static void ompi_spc_dump(void)
return; return;
} }
} }
(void)comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, (void)ompi_spc_comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG,
recv_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG,
0, comm, 0, ompi_spc_comm,
comm->c_coll->coll_gather_module); ompi_spc_comm->c_coll->coll_gather_module);
/* Once rank 0 has all of the information, print the aggregated counter values for each rank in order */ /* Once rank 0 has all of the information, print the aggregated counter values for each rank in order */
if(rank == 0) { if(rank == 0) {
@ -413,7 +399,7 @@ static void ompi_spc_dump(void)
} }
free(send_buffer); free(send_buffer);
comm->c_coll->coll_barrier(comm, comm->c_coll->coll_barrier_module); ompi_spc_comm->c_coll->coll_barrier(ompi_spc_comm, ompi_spc_comm->c_coll->coll_barrier_module);
} }
/* Frees any dynamically alocated OMPI SPC data structures */ /* Frees any dynamically alocated OMPI SPC data structures */
@ -424,7 +410,7 @@ void ompi_spc_fini(void)
} }
free(ompi_spc_events); ompi_spc_events = NULL; free(ompi_spc_events); ompi_spc_events = NULL;
ompi_comm_free(&comm); ompi_comm_free(&ompi_spc_comm);
} }
/* Records an update to a counter using an atomic add operation. */ /* Records an update to a counter using an atomic add operation. */