6569019b06
This commit moves all the module stats into their own struct so that the stats only need to appear as a single line in the module_t definition, and then moves all the logic for reporting the stats into btl_usnic_stats.c|h. Further, the stats are now exported as MPI_T_BIND_NO_OBJECT entities (i.e., not bound to any particular MPI handle), and are marked as READONLY and CONTINUOUS. They currently all default to verbose level 5 ("Application tuner / detailed", according to https://svn.open-mpi.org/trac/ompi/wiki/MCAParamLevels). Most of the statistics are counters, but a small number are high watermark values. Due to how counters are reported via MPI_T, none of the counters are exported through MPI_T if the MCA param btl_usnic_stats_relative=1 (i.e., the module resets the stats back to zero at a given frequency). When MPI_T_pvar_handle_alloc() is invoked on any of these pvars, it will return a count that is equal to the number of active usnic BTL modules. The values returned for any given pvar (e.g., num_total_sends) are an array containing one value for each active usnic BTL module. The ordering of values in the array is both consistent across all usnic pvars and stable throughout a single job: array slot 0 corresponds to module X, array slot 1 corresponds to module Y, etc. Mapping which array slot corresponds to which underlying Linux usnic_X device works as follows: * The btl_usnic_devices MPI_T state pvar is associated with a btl_usnic_device MPI_T enum, and be obtained via MPI_T_pvar_get_info(). * If all usNIC pvars are of length N, the values [0,N) in the btl_usnic_device enum are associated with strings of the corresponding underlying Linux device. For exampe, to look up which Linux device is reported in all usNIC pvars' array slot 1, look up the int value 1 in the btl_usnic_devices enum. Its corresponding string value is underlying Linux device name (e.g., "usnic_1"). cmr=v1.7.4:subject="usnic BTL MPI_T pvars" This commit was SVN r29545.
82 строки
1.8 KiB
C
82 строки
1.8 KiB
C
/*
|
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
*
|
|
* Statistics for the usnic BTL component.
|
|
*/
|
|
|
|
#ifndef OMPI_BTL_USNIC_STATS_H
|
|
#define OMPI_BTL_USNIC_STATS_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include "opal/mca/event/event.h"
|
|
|
|
|
|
/**
|
|
* Struct containing all the statistics that are trackedx
|
|
*/
|
|
typedef struct ompi_btl_usnic_module_stats_t {
|
|
bool final_stats;
|
|
uint64_t report_num;
|
|
|
|
uint64_t num_total_sends;
|
|
uint64_t num_resends;
|
|
uint64_t num_timeout_retrans;
|
|
uint64_t num_fast_retrans;
|
|
uint64_t num_chunk_sends;
|
|
uint64_t num_frag_sends;
|
|
uint64_t num_ack_sends;
|
|
|
|
uint64_t num_total_recvs;
|
|
uint64_t num_unk_recvs;
|
|
uint64_t num_dup_recvs;
|
|
uint64_t num_oow_low_recvs;
|
|
uint64_t num_oow_high_recvs;
|
|
uint64_t num_frag_recvs;
|
|
uint64_t num_chunk_recvs;
|
|
uint64_t num_badfrag_recvs;
|
|
uint64_t num_ack_recvs;
|
|
uint64_t num_old_dup_acks;
|
|
uint64_t num_dup_acks;
|
|
uint64_t num_recv_reposts;
|
|
uint64_t num_crc_errors;
|
|
|
|
uint64_t max_sent_window_size;
|
|
uint64_t max_rcvd_window_size;
|
|
|
|
uint64_t pml_module_sends;
|
|
uint64_t pml_send_callbacks;
|
|
|
|
opal_event_t timer_event;
|
|
struct timeval timeout;
|
|
} ompi_btl_usnic_module_stats_t;
|
|
|
|
|
|
/**
|
|
* Initialize the stats on a module. Must use "struct
|
|
* ompi_btl_usnic_module_t*" here to avoid an #include cycle.
|
|
*/
|
|
int ompi_btl_usnic_stats_init(struct ompi_btl_usnic_module_t *module);
|
|
|
|
/**
|
|
* Finalize the stats on a module. Must use "struct
|
|
* ompi_btl_usnic_module_t*" here to avoid an #include cycle.
|
|
*/
|
|
int ompi_btl_usnic_stats_finalize(struct ompi_btl_usnic_module_t *module);
|
|
|
|
/**
|
|
* Initialize the MPI_T performance variables (for all modules)
|
|
*/
|
|
int ompi_btl_usnic_setup_mpit_pvars(void);
|
|
|
|
#endif
|