diff --git a/ompi/mca/hook/comm_method/Makefile.am b/ompi/mca/hook/comm_method/Makefile.am new file mode 100644 index 0000000000..3cc2f3e993 --- /dev/null +++ b/ompi/mca/hook/comm_method/Makefile.am @@ -0,0 +1,20 @@ +# +# Copyright (c) 2018 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + hook_comm_method.h \ + hook_comm_method_component.c \ + hook_comm_method_fns.c + +# This component will only ever be built statically -- never as a DSO. + +noinst_LTLIBRARIES = libmca_hook_comm_method.la + +libmca_hook_comm_method_la_SOURCES = $(sources) +libmca_hook_comm_method_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/hook/comm_method/configure.m4 b/ompi/mca/hook/comm_method/configure.m4 new file mode 100644 index 0000000000..d3dd70973a --- /dev/null +++ b/ompi/mca/hook/comm_method/configure.m4 @@ -0,0 +1,25 @@ +# +# Copyright (c) 2018 IBM Corporation. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make this a static component +AC_DEFUN([MCA_ompi_hook_comm_method_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + +# MCA_hook_comm_method_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_hook_comm_method_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/hook/comm_method/Makefile]) + + $1 +]) diff --git a/ompi/mca/hook/comm_method/hook_comm_method.h b/ompi/mca/hook/comm_method/hook_comm_method.h new file mode 100644 index 0000000000..e5251f25c5 --- /dev/null +++ b/ompi/mca/hook/comm_method/hook_comm_method.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef MCA_HOOK_COMM_METHOD_H +#define MCA_HOOK_COMM_METHOD_H + +#include "ompi_config.h" + +#include "ompi/constants.h" + +#include "ompi/mca/hook/hook.h" +#include "ompi/mca/hook/base/base.h" + +BEGIN_C_DECLS + +OMPI_MODULE_DECLSPEC extern const ompi_hook_base_component_1_0_0_t mca_hook_comm_method_component; + +extern int mca_hook_comm_method_verbose; +extern int mca_hook_comm_method_output; +extern bool hook_comm_method_enable_mpi_init; +extern bool hook_comm_method_enable_mpi_finalize; +extern int hook_comm_method_max; +extern int hook_comm_method_brief; +extern char *hook_comm_method_fakefile; + +void ompi_hook_comm_method_mpi_init_bottom(int argc, char **argv, int requested, int *provided); + +void ompi_hook_comm_method_mpi_finalize_top(void); + +END_C_DECLS + +#endif /* MCA_HOOK_COMM_METHOD_H */ diff --git a/ompi/mca/hook/comm_method/hook_comm_method_component.c b/ompi/mca/hook/comm_method/hook_comm_method_component.c new file mode 100644 index 0000000000..e39fe78b3f --- /dev/null +++ b/ompi/mca/hook/comm_method/hook_comm_method_component.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "hook_comm_method.h" + +static int ompi_hook_comm_method_component_open(void); +static int ompi_hook_comm_method_component_close(void); +static int ompi_hook_comm_method_component_register(void); + +/* + * Public string showing the component version number + */ +const char *mca_hook_comm_method_component_version_string = + "Open MPI 'comm_method' hook MCA component version " OMPI_VERSION; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +const ompi_hook_base_component_1_0_0_t mca_hook_comm_method_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + .hookm_version = { + OMPI_HOOK_BASE_VERSION_1_0_0, + + /* Component name and version */ + .mca_component_name = "comm_method", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = ompi_hook_comm_method_component_open, + .mca_close_component = ompi_hook_comm_method_component_close, + .mca_register_component_params = ompi_hook_comm_method_component_register, + + // Force this component to always be considered - component must be static + //.mca_component_flags = MCA_BASE_COMPONENT_FLAG_ALWAYS_CONSIDER, + }, + .hookm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + /* Component functions */ + .hookm_mpi_initialized_top = NULL, + .hookm_mpi_initialized_bottom = NULL, + + .hookm_mpi_finalized_top = NULL, + .hookm_mpi_finalized_bottom = NULL, + + .hookm_mpi_init_top = NULL, + .hookm_mpi_init_top_post_opal = NULL, + .hookm_mpi_init_bottom = ompi_hook_comm_method_mpi_init_bottom, + .hookm_mpi_init_error = NULL, + + .hookm_mpi_finalize_top = ompi_hook_comm_method_mpi_finalize_top, + .hookm_mpi_finalize_bottom = NULL, +}; + +int mca_hook_comm_method_verbose = 0; +int mca_hook_comm_method_output = -1; +bool hook_comm_method_enable_mpi_init = false; +bool hook_comm_method_enable_mpi_finalize = false; +int hook_comm_method_max = 12; +int hook_comm_method_brief = 0; +char *hook_comm_method_fakefile = NULL; + +static int ompi_hook_comm_method_component_open(void) +{ + // Nothing to do + return OMPI_SUCCESS; +} + +static int ompi_hook_comm_method_component_close(void) +{ + // Nothing to do + return OMPI_SUCCESS; +} + +static int ompi_hook_comm_method_component_register(void) +{ + + /* + * Component verbosity level + */ + // Inherit the verbosity of the base framework, but also allow this to be overridden + if( ompi_hook_base_framework.framework_verbose > MCA_BASE_VERBOSE_NONE ) { + mca_hook_comm_method_verbose = ompi_hook_base_framework.framework_verbose; + } + else { + mca_hook_comm_method_verbose = MCA_BASE_VERBOSE_NONE; + } + (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "verbose", + NULL, + MCA_BASE_VAR_TYPE_INT, NULL, + 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_hook_comm_method_verbose); + + mca_hook_comm_method_output = opal_output_open(NULL); + opal_output_set_verbosity(mca_hook_comm_method_output, mca_hook_comm_method_verbose); + + /* + * If the component is active for mpi_init / mpi_finalize + */ + hook_comm_method_enable_mpi_init = false; + (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "enable_mpi_init", + "Enable comm_method behavior on mpi_init", + MCA_BASE_VAR_TYPE_BOOL, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_comm_method_enable_mpi_init); + + hook_comm_method_enable_mpi_finalize = false; + (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "enable_mpi_finalize", + "Enable comm_method behavior on mpi_finalize", + MCA_BASE_VAR_TYPE_BOOL, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_comm_method_enable_mpi_finalize); + + // User can set the comm_method mca variable too + int hook_comm_method = -1; + (void) mca_base_var_register("ompi", NULL, NULL, "comm_method", + "Enable comm_method behavior (1) mpi_init or (2) mpi_finalize", + MCA_BASE_VAR_TYPE_INT, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_comm_method); + + if( 1 == hook_comm_method ) { + hook_comm_method_enable_mpi_init = true; + } + else if( 2 == hook_comm_method ) { + hook_comm_method_enable_mpi_finalize = true; + } + + // comm_method_max + (void) mca_base_var_register("ompi", NULL, NULL, "comm_method_max", + "Number of hosts for which to print unabbreviated 2d table of comm methods.", + MCA_BASE_VAR_TYPE_INT, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_comm_method_max); + // comm_method_brief + (void) mca_base_var_register("ompi", NULL, NULL, "comm_method_brief", + "Only print the comm method summary, skip the 2d table.", + MCA_BASE_VAR_TYPE_INT, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_comm_method_brief); + + // comm_method_fakefile is just for debugging, allows complete override of all the + // comm method in the table + (void) mca_base_var_register("ompi", NULL, NULL, "comm_method_fakefile", + "For debugging only: read comm methods from a file", + MCA_BASE_VAR_TYPE_STRING, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_comm_method_fakefile); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/hook/comm_method/hook_comm_method_fns.c b/ompi/mca/hook/comm_method/hook_comm_method_fns.c new file mode 100644 index 0000000000..3266c54d2a --- /dev/null +++ b/ompi/mca/hook/comm_method/hook_comm_method_fns.c @@ -0,0 +1,882 @@ +/* + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "hook_comm_method.h" + +#ifdef HAVE_DLFCN_H +#include +#endif + +#include "ompi/communicator/communicator.h" +#include "ompi/mca/pml/pml.h" +#include "opal/mca/btl/btl.h" +#include "ompi/mca/pml/base/base.h" +#include "ompi/mca/bml/base/base.h" +#include "ompi/mca/mtl/base/base.h" + +// In regular strncpy up to n bytes are copied, so if the 'to' buffer +// was char string[16] and you called strncpy(string, , 16) you could +// get 16 bytes of chars without a null. My preferred API is to let +// n be the size of the buffer, and to let n-1 chars be copied, and +// to guarantee null termination. +static void +mystrncpy(char *to, const char *from, int n) { + strncpy(to, from, n-1); + to[n-1] = 0; +} + +// For converting comm_method strings to comm_method id# and back. +// This starts as our local set of strings, but gets Allreduced into +// a global mapping so all the strings at all the ranks are represented. +// If an MCA's name is more than 15 chars it gets truncated. +#define COMM_METHOD_STRING_SIZE 16 +#define MAX_COMM_METHODS 50 +typedef struct { + int n; + char str[MAX_COMM_METHODS][COMM_METHOD_STRING_SIZE]; +} comm_method_string_conversion_t; + +static comm_method_string_conversion_t comm_method_string_conversion; + +#define MODE_IS_PML 1 +#define MODE_IS_MTL 2 +#define MODE_IS_BTL 3 + +// ---------------------------------------------------------------------------- + +// return the pml's module:component:name function pointer in fp +static char* +lookup_pml_name(void) +{ + return (char*) mca_pml_base_selected_component.pmlm_version.mca_component_name; +} +// return the mtl's module:component:name function pointer in fp +static char* +lookup_mtl_name(void) +{ + if (!ompi_mtl_base_selected_component) { return NULL; } + return (char*) ompi_mtl_base_selected_component->mtl_version.mca_component_name; +} +// Find the send btl's module:component:name for the incoming comm,rank +static char* +lookup_btl_name_for_send(ompi_communicator_t* comm, int rank) { + ompi_proc_t *dst_proc = ompi_group_peer_lookup_existing(comm->c_remote_group, rank); + + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint(dst_proc); + if (endpoint && + endpoint->btl_send.bml_btls && + endpoint->btl_send.bml_btls[0].btl) + { + return (char*) + endpoint->btl_send.bml_btls[0].btl->btl_component->btl_version.mca_component_name; + } + return NULL; +} + +// Use the above to lookup the mca_component_name for the rank's +// sending BTL/MTL/PML. The extra 3rd argument returns whether +// MODE_IS_BTL / MTL / PML (can pass NULL if you don't want that info). +// +// And this one puts the result into malloced mem of size +// COMM_METHOD_STRING_SIZE +// that the caller has to free. +static char * +comm_method_string(MPI_Comm comm, int rank, int *comm_mode) { + char *p; + char *string = malloc(COMM_METHOD_STRING_SIZE); + + if (!string) { return NULL; } + + p = lookup_pml_name(); + if (p && 0==strncmp("ob1", p, 4)) { // BTL + if (comm_mode) { *comm_mode = MODE_IS_BTL; } + mystrncpy(string, lookup_btl_name_for_send(comm, rank), COMM_METHOD_STRING_SIZE); + } + else if (p && 0==strncmp("cm", p, 3)) { // MTL + if (comm_mode) { *comm_mode = MODE_IS_MTL; } + mystrncpy(string, lookup_mtl_name(), COMM_METHOD_STRING_SIZE); + } else { // PML + if (comm_mode) { *comm_mode = MODE_IS_PML; } + mystrncpy(string, p, COMM_METHOD_STRING_SIZE); + } + return string; +} + +// ---------------------------------------------------------------------------- + +// Managing the comm_method_string_conversion structure +// and using it to convert strings to/from id numbers: + +// The data should be +// string 0 == "n/a" for unconnected / unknown +// string 1,2,... == "tcp" "self", etc, sorted +// self is important enough we want to make sure it's always in the list +static void +init_string_to_conversion_struct(comm_method_string_conversion_t *data) +{ + data->n = 0; + strcpy(data->str[data->n], "n/a"); + ++(data->n); + strcpy(data->str[data->n], "self"); + ++(data->n); +} + +static int +lookup_string_in_conversion_struct(comm_method_string_conversion_t *data, char *string) +{ + int i; + for (i=0; in; ++i) { + if (0==strncmp(data->str[i], string, COMM_METHOD_STRING_SIZE)) { + return i; + } + } + return 0; +} + +// For qsort of the str[] array in a comm_method_string_conversion_t +static int mycompar(const void *a, const void *b) { + return strcmp(a, b); +} + +static void +add_string_to_conversion_struct(comm_method_string_conversion_t *data, char *string) +{ + int i; + if (0 == strcmp(string, "n/a")) { return; } + + i = lookup_string_in_conversion_struct(data, string); + if (i == 0) { // didn't find string in list, so add it + if (data->n < MAX_COMM_METHODS) { + mystrncpy(data->str[data->n], string, COMM_METHOD_STRING_SIZE); + ++(data->n); + } + } + qsort(&data->str[1], data->n - 1, COMM_METHOD_STRING_SIZE, &mycompar); +} + +// For MPI_Allreduce of a comm_method_string_conversion_t +static void myfn(void* invec, void* inoutvec, int *len, MPI_Datatype *dt) { + comm_method_string_conversion_t *a, *b; + int i, j; + + for (i=0; i<*len; ++i) { + b = &((comm_method_string_conversion_t*)invec)[i]; + a = &((comm_method_string_conversion_t*)inoutvec)[i]; + for (j=0; jn; ++j) { // for each entry j in 'b', add it to 'a' + add_string_to_conversion_struct(a, b->str[j]); + } + qsort(&a->str[1], a->n - 1, COMM_METHOD_STRING_SIZE, &mycompar); + } +} + +// ---------------------------------------------------------------------------- + +// Use the static global comm_method_string_conversion to convert +// between comm_method string and id number + +// This might convert "pami" for example to 1, "yalla" to 2, etc. +static int +string_to_comm_method(char *str) { + // default to "n/a" for any bad or unrecognized inputs + if (!str || !str[0]) { return 0; } + + return lookup_string_in_conversion_struct(&comm_method_string_conversion, str); +} + +static char * +comm_method_to_string(int id) { + return comm_method_string_conversion.str[id]; +} + +static int +comm_method(MPI_Comm comm, int rank) { + char *p = comm_method_string(comm, rank, NULL); + int id = string_to_comm_method(p); + free(p); + return id; +} + +#define COMM_METHOD_SELF (string_to_comm_method("self")) +#define NUM_COMM_METHODS (comm_method_string_conversion.n) + +// ---------------------------------------------------------------------------- + +typedef void (*VoidFuncPtr)(void); // a function pointer to a function that takes no arguments and returns void. +static char* comm_method_string(MPI_Comm comm, int rank, int *comm_mode); +static int comm_method(MPI_Comm comm, int rank); +static char* comm_method_to_string(int method); +static int icompar(const void *a, const void *b); +static void abbreviate_list_into_string(char *str, int max, int *list, int nlist); +static void ompi_report_comm_methods(int called_from_location); + +void ompi_hook_comm_method_mpi_init_bottom(int argc, char **argv, int requested, int *provided) +{ + if( hook_comm_method_enable_mpi_init ) { + ompi_report_comm_methods( 1 ); + } +} + +void ompi_hook_comm_method_mpi_finalize_top(void) +{ + if( hook_comm_method_enable_mpi_finalize ) { + ompi_report_comm_methods( 2 ); + } +} + +// ---------------------------------------------------------------------------- + +static int +icompar(const void *a, const void *b) { + if (*(int*)a < *(int*)b) { return -1; } + if (*(int*)a > *(int*)b) { return 1; } + return 0; +} + +// Input list[] is expected to be sorted +static void +abbreviate_list_into_string(char *str, int max, int *list, int nlist) +{ + int lo, hi; + int i; + int per, tmp; + +/* + * How much space do we need in strings to store rank numbers. + * A 10000 rank run needs more digits to write the rank numbers in than + * a 4 rank job. + */ + per = 1; + tmp = list[nlist-1]; + while (tmp >= 10) { ++per; tmp /= 10; } + + str[0] = 0; + lo = hi = -1; + for (i=0; i hi) { + if (strlen(str)==0 || str[strlen(str)-1] != '.') { + if (strlen(str) != 0) { + strcpy(&str[strlen(str)], ", "); + } + if (lo != hi) { + sprintf(&str[strlen(str)], "%d - %d", lo, hi); + } else { + sprintf(&str[strlen(str)], "%d", lo); + } + } +/* + * If we've almost written to the end of the string, and we haven't + * already written ".." to indicate we're not writing amy more, then + * add the "..". Also set hi=lo=i since the data we just wrote is + * for the previous contiguous chunk, and the current i is the start + * of the next chunk. + */ + if (((int)strlen(str)) >= max - 5 - 2*per + && + (strlen(str) == 0 || str[strlen(str)-1] != '.')) + { + strcpy(&str[strlen(str)], ", .."); + break; + } + hi = lo = list[i]; + } + } + if (strlen(str)==0 || str[strlen(str)-1] != '.') { + if (strlen(str)!=0) { + strcpy(&str[strlen(str)], ", "); + } + if (lo != hi) { + sprintf(&str[strlen(str)], "%d - %d", lo, hi); + } else { + sprintf(&str[strlen(str)], "%d", lo); + } + } +} + +// Input argument tells where we're being called from: +// 1 for init, 2 for finalize. +// The other implicit input is an environment variable we look at. +// When activated from init: we establish connections before printing. +// When activated from finalize: we just print whatever info is available. +static void +ompi_report_comm_methods(int called_from_location) // 1 = from init, 2 = from finalize +{ + int numhosts, i, j, k; + int max2Dprottable = 12; + int max2D1Cprottable = 36; + int hpmp_myrank; + int mylocalrank, nlocalranks, myleaderrank, nleaderranks; + int ret; + ompi_communicator_t *local_comm, *leader_comm; + int *method; + char *hoststring; + char **allhoststrings; + int comm_mode; // MODE_IS_BTL / MTL / PML + +// early return in the case of spawn + // PMPI_Comm_get_parent(&parent); + if (ompi_mpi_comm_parent != MPI_COMM_NULL) { return; } + + hpmp_myrank = ompi_comm_rank(MPI_COMM_WORLD); + // hpmp_nprocs = ompi_comm_size(MPI_COMM_WORLD); + + max2Dprottable = hook_comm_method_max; + max2D1Cprottable = 3 * max2Dprottable; + if (hook_comm_method_brief) { + // force only the short summary output to be printed with no 2d table: + max2Dprottable = 0; + max2D1Cprottable = 0; + } + +// Gathering layout data the same way osc_rdma_component.c does + ret = ompi_comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, NULL, + &local_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return; + } + mylocalrank = ompi_comm_rank(local_comm); + nlocalranks = ompi_comm_size(local_comm); + + ret = ompi_comm_split(MPI_COMM_WORLD, + (0 == mylocalrank) ? 0 : MPI_UNDEFINED, + hpmp_myrank, &leader_comm, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + ompi_comm_free(&local_comm); + return; + } + +// Non-host-leaders return early. + if (mylocalrank != 0) { + ompi_comm_free(&local_comm); + return; + } +// ------------------------------------------------- +// Only host-leaders exist from this point on. +// ------------------------------------------------- + myleaderrank = ompi_comm_rank(leader_comm); + nleaderranks = numhosts = ompi_comm_size(leader_comm); + +/* + * Allocate space for each rank to store its communication method + * on a per-host basis. But rank 0 gets enough space to store the + * data for all pairs of hosts. + */ + method = malloc(numhosts * sizeof(int) * (hpmp_myrank?1:numhosts)); + if (!method) { + ompi_comm_free(&local_comm); + ompi_comm_free(&leader_comm); + return; + } + +// Each host leader figures out a string of basic info for its host +// in hoststring. (allocated at all host leaders, can be different sizes) + + { + int len; + int *ranklist; // comm-world ranks contained in local_comm + // sorted into comm-world order (although + // local_comm should already be constructed + // in that way) + int *ranklist_in; + + ompi_group_t *local_group, *world_group; + ompi_comm_group(local_comm, &local_group); + ompi_comm_group(MPI_COMM_WORLD, &world_group); + ranklist = malloc(nlocalranks * sizeof(int) * 2); + ranklist_in = ranklist + nlocalranks; + for (i=0; i 1) { + char *p = comm_method_string(local_comm, 1, NULL); + add_string_to_conversion_struct(&comm_method_string_conversion, p); + free(p); + } + + MPI_Datatype mydt; + MPI_Op myop; + MPI_Type_contiguous(sizeof(comm_method_string_conversion_t), MPI_BYTE, &mydt); + MPI_Type_commit(&mydt); + MPI_Op_create(myfn, 1, &myop); + leader_comm->c_coll->coll_allreduce( + MPI_IN_PLACE, (void*)&comm_method_string_conversion, 1, mydt, myop, leader_comm, + leader_comm->c_coll->coll_allreduce_module); + MPI_Op_free(&myop); + MPI_Type_free(&mydt); + +// Each host leader fills in a "numhosts" sized array method[] of +// how it communicates with each peer. + for (i=0; i 1) { + method[i] = comm_method(local_comm, 1); + } + } + } + +// Gather the strings and the methods at rank 0. +// The gatherv of the strings takes a few steps since we have to get +// the sizes first and allocate the receiving string. + { + int len, *lens, *disps; + + len = strlen(hoststring) + 1; + if (myleaderrank == 0) { + lens = malloc(nleaderranks * sizeof(int)); + disps = malloc(nleaderranks * sizeof(int)); + } else { + lens = disps = NULL; + } + leader_comm->c_coll->coll_gather( + &len, 1, MPI_INT, + lens, 1, MPI_INT, + 0, leader_comm, leader_comm->c_coll->coll_gather_module); + if (myleaderrank == 0) { + int tlen = 0; + char *p; + for (i=0; ic_coll->coll_gatherv( + hoststring, strlen(hoststring) + 1, MPI_CHAR, + &allhoststrings[0][0], lens, disps, MPI_CHAR, + 0, leader_comm, leader_comm->c_coll->coll_gatherv_module); + } else { + // matching above call from rank 0, just &allhoststrings[0][0] + // isn't legal here, and those args aren't used at non-root anyway + leader_comm->c_coll->coll_gatherv( + hoststring, strlen(hoststring) + 1, MPI_CHAR, + NULL, NULL, NULL, MPI_CHAR, + 0, leader_comm, leader_comm->c_coll->coll_gatherv_module); + } + if (myleaderrank == 0) { + free(lens); + free(disps); + } +// and a simpler gather for the methods + leader_comm->c_coll->coll_gather( + method, nleaderranks, MPI_INT, + method, nleaderranks, MPI_INT, + 0, leader_comm, leader_comm->c_coll->coll_gather_module); + } + ompi_comm_free(&local_comm); + ompi_comm_free(&leader_comm); + +// Interception for testing purposes. Let rank-0 meddle with all its method[] +// settings, this is only for testing, eg to make sure the printing comes out +// right. + if (myleaderrank == 0) { + if (hook_comm_method_fakefile) { + FILE *fp; + int setting; + fp = fopen(hook_comm_method_fakefile, "r"); + for (i=0; i= 10) { ++per; tmp /= 10; } + for (i=0; i per) { per = tmp+1; } + } + } + + str = malloc(nleaderranks * per + 1); + p = str; + for (i=0; i=str && ((*p)==' ')) { *(p--)=0; } + printf(" host | %s\n", str); + memset(str, (int)'=', tmp); + str[tmp] = 0; + printf("======|=%s\n", str); + + for (i=0; istr && *p==' ') { *(p--)=0; } + printf("%5d : %s\n", i, str); + } + printf("\n"); + free(str); + } + else if (nleaderranks <= max2D1Cprottable) { + char *str, *p; + int tmp, per, done; + char char_code[NUM_COMM_METHODS], next_char; + int method_count[NUM_COMM_METHODS]; + + // characters for the number column in the 2d table, + // must be large enough for the digits needed for host numbers + per = 2; + tmp = nleaderranks; + while (tmp >= 10) { ++per; tmp /= 10; } + + // pick a character code for each comm method based on + // how many times it's in the table, use 'A' for the least common + for (i=0; i=str && ((*p)==' ')) { *(p--)=0; } + tmp = (int)strlen(str) + 2; + printf(" host | %s\n", str); + memset(str, (int)'=', tmp); + str[tmp] = 0; + printf("======|=%s\n", str); + + for (i=0; istr && *p==' ') { *(p--)=0; } + printf("%5d : %s\n", i, str); + } + free(str); + for (i=0; i 0 && + majority_method_onhost == COMM_METHOD_SELF) + { + majority_method_onhost = i; + } + if (method_count[i] > method_count[majority_method_onhost]) { + if (i != COMM_METHOD_SELF) { + majority_method_onhost = i; + } + } + } + if (method_count[COMM_METHOD_SELF] > 0) { any_self = 1; } + + majority_method_offhost = -1; + uniformity_offhost = 1; + for (i=0; i 0 && majority_method_offhost == 0) { + majority_method_offhost = i; + } + if (method_count[i] > method_count[majority_method_offhost]) { + majority_method_offhost = i; + } + } + + char *all_or_most = "all"; + char *or_self = ""; + if (!uniformity_onhost) { + all_or_most = "most"; + } + if ((majority_method_onhost != COMM_METHOD_SELF) && any_self) { + or_self = " or self"; + } + printf(" on-host: %s connections are %s%s\n", all_or_most, + comm_method_to_string(majority_method_onhost), or_self); + + all_or_most = "all"; + if (!uniformity_offhost) { + all_or_most = "most"; + } + printf(" off-host: %s connections are %s\n", all_or_most, + comm_method_to_string(majority_method_offhost)); + + if (!uniformity_onhost || !uniformity_offhost) { + printf("Exceptions:\n"); + for (i=0; i 0) { +// if (!first) { +// strcat(str, " /"); +// } + sprintf(&str[strlen(str)], + " [%dx %s]", + method_count[k], + comm_method_to_string(k)); +// first = 0; + } + } + printf("%s\n", str); + free(str); + } + } + } + printf("\n"); + } + } + + if (myleaderrank == 0) { + free(allhoststrings); + } + free(method); +} diff --git a/ompi/mca/hook/comm_method/owner.txt b/ompi/mca/hook/comm_method/owner.txt new file mode 100644 index 0000000000..2fd247dddb --- /dev/null +++ b/ompi/mca/hook/comm_method/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: IBM +status: active diff --git a/ompi/mca/pml/base/pml_base_frame.c b/ompi/mca/pml/base/pml_base_frame.c index bf35186ef7..dd5cdc5009 100644 --- a/ompi/mca/pml/base/pml_base_frame.c +++ b/ompi/mca/pml/base/pml_base_frame.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,7 +79,8 @@ mca_pml_base_module_t mca_pml = { NULL, /* pml_dump */ NULL, /* pml_ft_event */ 0, /* pml_max_contextid */ - 0 /* pml_max_tag */ + 0, /* pml_max_tag */ + 0 /* pml_flags */ }; mca_pml_base_component_t mca_pml_base_selected_component = {{0}}; diff --git a/ompi/mca/pml/cm/pml_cm.c b/ompi/mca/pml/cm/pml_cm.c index a7322e4c33..54b691053f 100644 --- a/ompi/mca/pml/cm/pml_cm.c +++ b/ompi/mca/pml/cm/pml_cm.c @@ -11,6 +11,7 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,6 +29,7 @@ #include "pml_cm.h" #include "pml_cm_sendreq.h" #include "pml_cm_recvreq.h" +#include "pml_cm_component.h" ompi_pml_cm_t ompi_pml_cm = { { @@ -53,7 +55,8 @@ ompi_pml_cm_t ompi_pml_cm = { mca_pml_cm_dump, NULL, 0, - 0 + 0, + 0 /* flags */ } }; diff --git a/ompi/mca/pml/crcpw/pml_crcpw_module.c b/ompi/mca/pml/crcpw/pml_crcpw_module.c index c5982c5538..aa287fddbf 100644 --- a/ompi/mca/pml/crcpw/pml_crcpw_module.c +++ b/ompi/mca/pml/crcpw/pml_crcpw_module.c @@ -13,6 +13,7 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +58,8 @@ mca_pml_crcpw_module_t mca_pml_crcpw_module = { mca_pml_crcpw_ft_event, 32768, - INT_MAX + INT_MAX, + 0 /* flags */ } }; diff --git a/ompi/mca/pml/example/pml_example.c b/ompi/mca/pml/example/pml_example.c index 799e3abe45..146f3f0e99 100644 --- a/ompi/mca/pml/example/pml_example.c +++ b/ompi/mca/pml/example/pml_example.c @@ -6,6 +6,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +44,8 @@ mca_pml_example_t mca_pml_example = { mca_pml_example_ft_event, 32768, - (0x7fffffff) + (0x7fffffff), + 0 /* flags */ } }; diff --git a/ompi/mca/pml/monitoring/pml_monitoring_component.c b/ompi/mca/pml/monitoring/pml_monitoring_component.c index 44aa555bca..31de1f98ec 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_component.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_component.c @@ -6,6 +6,7 @@ * Copyright (c) 2015 Bull SAS. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,7 +49,8 @@ mca_pml_monitoring_module_t mca_pml_monitoring_module = { mca_pml_monitoring_dump, NULL, 65535, - INT_MAX + INT_MAX, + 0 /* flags */ }; /** diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 703c918dc6..cc633e3a57 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -20,6 +20,7 @@ * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2018 Sandia National Laboratories * All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -79,7 +80,8 @@ mca_pml_ob1_t mca_pml_ob1 = { mca_pml_ob1_dump, mca_pml_ob1_ft_event, 65535, - INT_MAX + INT_MAX, + 0 /* flags */ } }; @@ -1057,4 +1059,3 @@ int mca_pml_ob1_com_btl_comp(const void *v1, const void *v2) return 0; } - diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index cd5aa32810..5228f4e9ab 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,7 +74,8 @@ mca_pml_ucx_module_t ompi_pml_ucx = { .pml_dump = mca_pml_ucx_dump, .pml_ft_event = NULL, .pml_max_contextid = (1ul << (PML_UCX_CONTEXT_BITS)) - 1, - .pml_max_tag = (1ul << (PML_UCX_TAG_BITS - 1)) - 1 + .pml_max_tag = (1ul << (PML_UCX_TAG_BITS - 1)) - 1, + 0 /* flags */ }, .ucp_context = NULL, .ucp_worker = NULL diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index 03bb65d420..99ccb50fbb 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -3,6 +3,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,6 +52,7 @@ mca_pml_yalla_module_t ompi_pml_yalla = { NULL, /* FT */ 1ul << ((sizeof(mxm_ctxid_t)*8) - 1), 1ul << ((sizeof(mxm_tag_t)*8 - 1) - 1), + 0 /* flags */ }, NULL, NULL,