
This is closely related to Platform-MPI's old -prot feature. The long-format of the tables it prints could look like this: > Host 0 [myhost001] ranks 0 - 1 > Host 1 [myhost002] ranks 2 - 3 > Host 2 [myhost003] ranks 4 > Host 3 [myhost004] ranks 5 > Host 4 [myhost005] ranks 6 > Host 5 [myhost006] ranks 7 > Host 6 [myhost007] ranks 8 > Host 7 [myhost008] ranks 9 > Host 8 [myhost009] ranks 10 > > host | 0 1 2 3 4 5 6 7 8 > ======|============================================== > 0 : sm tcp tcp tcp tcp tcp tcp tcp tcp > 1 : tcp sm tcp tcp tcp tcp tcp tcp tcp > 2 : tcp tcp self tcp tcp tcp tcp tcp tcp > 3 : tcp tcp tcp self tcp tcp tcp tcp tcp > 4 : tcp tcp tcp tcp self tcp tcp tcp tcp > 5 : tcp tcp tcp tcp tcp self tcp tcp tcp > 6 : tcp tcp tcp tcp tcp tcp self tcp tcp > 7 : tcp tcp tcp tcp tcp tcp tcp self tcp > 8 : tcp tcp tcp tcp tcp tcp tcp tcp self > > Connection summary: > on-host: all connections are sm or self > off-host: all connections are tcp In this example hosts 0 and 1 had multiple ranks so "sm" was more meaningful than "self" to identify how the ranks on the host are talking to each other. While host 2..8 were one rank per host so "self" was more meaningful as their btl. Above a certain number of hosts (12 by default) the above table gets too big so we shrink to a more abbreviated looking table that has the same data: > host | 0 1 2 3 4 8 > ======|==================== > 0 : A C C C C C C C C > 1 : C A C C C C C C C > 2 : C C B C C C C C C > 3 : C C C B C C C C C > 4 : C C C C B C C C C > 5 : C C C C C B C C C > 6 : C C C C C C B C C > 7 : C C C C C C C B C > 8 : C C C C C C C C B > key: A == sm > key: B == self > key: C == tcp Then above 36 hosts we stop printing the 2d table entirely and just print the summary: > Connection summary: > on-host: all connections are sm or self > off-host: all connections are tcp The options to control it are -mca comm_method 1 : print the above table at the end of MPI_Init -mca comm_method 2 : print the above table at the beginning of MPI_Finalize -mca comm_method_max <n> : number of hosts <n> for which to print a full size 2d -mca comm_method_brief 1 : only print summary output, no 2d table -mca comm_method_fakefile <filename> : for debugging only * printing at init vs finalize: The most important difference between these two is that when printing the table during MPI_Init(), we send extra messages to make sure all hosts are connected to each other. So the table ends up working against the idea of on-demand connections (although it's only forcing the n^2 connections in the number of hosts, not the total ranks). If printing at MPI_Finalize() we don't create any connections that aren't already connected, so the table is more likely to have "n/a" entries if some hosts never connected to each other. * how many hosts <n> for which to print a full size 2d table The option -mca comm_method_max <n> can be used to specify a number of hosts <n> (default 12) that controls at what host-count the unabbreviated / abbreviated 2d tables get printed: 1 - n : full size 2d table n+1 - 3n : shortened 2d table 3n+1 - inf : summary only, no 2d table * brief The option -mca comm_method_brief 1 can be used to skip the printing of the 2d table and only show the short summary * fakefile This is a debugging option that allows easeir testing of all the printout routines by letting all the detected communication methods between the hosts be overridden by fake data from a file. The source of the information used in the table is the .mca_component_name In the case of BTLs, the module always had a .btl_component linking back to the component. The vars mca_pml_base_selected_component and ompi_mtl_base_selected_component offer similar functionality for pml/mtl. So with the ability to identify the component, we can then access the component name with code like this mca_pml_base_selected_component.pmlm_version.mca_component_name See the three lookup_{pml,mtl,btl}_name() functions in hook_comm_method_fns.c, and their use in comm_method() to parse the strings and produce an integer to represent the connection type being used. Signed-off-by: Mark Allen <markalle@us.ibm.com>
158 строки
4.6 KiB
C
158 строки
4.6 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2006-2007 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2018 IBM Corporation. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
#include "ompi/mca/pml/base/pml_base_request.h"
|
|
#include "ompi/mca/pml/base/pml_base_bsend.h"
|
|
#include "ompi/mca/pml/base/base.h"
|
|
|
|
#include "pml_cm.h"
|
|
#include "pml_cm_sendreq.h"
|
|
#include "pml_cm_recvreq.h"
|
|
#include "pml_cm_component.h"
|
|
|
|
ompi_pml_cm_t ompi_pml_cm = {
|
|
{
|
|
mca_pml_cm_add_procs,
|
|
mca_pml_cm_del_procs,
|
|
mca_pml_cm_enable,
|
|
NULL, /* No progress function. The MTL register their own */
|
|
mca_pml_cm_add_comm,
|
|
mca_pml_cm_del_comm,
|
|
mca_pml_cm_irecv_init,
|
|
mca_pml_cm_irecv,
|
|
mca_pml_cm_recv,
|
|
mca_pml_cm_isend_init,
|
|
mca_pml_cm_isend,
|
|
mca_pml_cm_send,
|
|
mca_pml_cm_iprobe,
|
|
mca_pml_cm_probe,
|
|
mca_pml_cm_start,
|
|
mca_pml_cm_improbe,
|
|
mca_pml_cm_mprobe,
|
|
mca_pml_cm_imrecv,
|
|
mca_pml_cm_mrecv,
|
|
mca_pml_cm_dump,
|
|
NULL,
|
|
0,
|
|
0,
|
|
0 /* flags */
|
|
}
|
|
};
|
|
|
|
|
|
int
|
|
mca_pml_cm_enable(bool enable)
|
|
{
|
|
/* BWB - FIX ME - need to have this actually do something,
|
|
maybe? */
|
|
opal_free_list_init (&mca_pml_base_send_requests,
|
|
sizeof(mca_pml_cm_hvy_send_request_t) + ompi_mtl->mtl_request_size,
|
|
opal_cache_line_size,
|
|
OBJ_CLASS(mca_pml_cm_hvy_send_request_t),
|
|
0,opal_cache_line_size,
|
|
ompi_pml_cm.free_list_num,
|
|
ompi_pml_cm.free_list_max,
|
|
ompi_pml_cm.free_list_inc,
|
|
NULL, 0, NULL, NULL, NULL);
|
|
|
|
opal_free_list_init (&mca_pml_base_recv_requests,
|
|
sizeof(mca_pml_cm_hvy_recv_request_t) + ompi_mtl->mtl_request_size,
|
|
opal_cache_line_size,
|
|
OBJ_CLASS(mca_pml_cm_hvy_recv_request_t),
|
|
0,opal_cache_line_size,
|
|
ompi_pml_cm.free_list_num,
|
|
ompi_pml_cm.free_list_max,
|
|
ompi_pml_cm.free_list_inc,
|
|
NULL, 0, NULL, NULL, NULL);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_cm_add_comm(ompi_communicator_t* comm)
|
|
{
|
|
/* should never happen, but it was, so check */
|
|
if (comm->c_contextid > ompi_pml_cm.super.pml_max_contextid) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
/* initialize per-communicator data. MTLs may override this. */
|
|
comm->c_pml_comm = NULL;
|
|
|
|
/* notify the MTL about the added communicator */
|
|
return OMPI_MTL_CALL(add_comm(ompi_mtl, comm));
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_cm_del_comm(ompi_communicator_t* comm)
|
|
{
|
|
/* notify the MTL about the deleted communicator */
|
|
return OMPI_MTL_CALL(del_comm(ompi_mtl, comm));
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_cm_add_procs(struct ompi_proc_t** procs, size_t nprocs)
|
|
{
|
|
int ret;
|
|
|
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
|
for (size_t i = 0 ; i < nprocs ; ++i) {
|
|
if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) {
|
|
return OMPI_ERR_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* make sure remote procs are using the same PML as us */
|
|
if (OMPI_SUCCESS != (ret = mca_pml_base_pml_check_selected("cm",
|
|
procs,
|
|
nprocs))) {
|
|
return ret;
|
|
}
|
|
|
|
ret = OMPI_MTL_CALL(add_procs(ompi_mtl, nprocs, procs));
|
|
return ret;
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_cm_del_procs(struct ompi_proc_t** procs, size_t nprocs)
|
|
{
|
|
int ret;
|
|
|
|
ret = OMPI_MTL_CALL(del_procs(ompi_mtl, nprocs, procs));
|
|
return ret;
|
|
}
|
|
|
|
|
|
/* print any available useful information from this communicator */
|
|
int
|
|
mca_pml_cm_dump(struct ompi_communicator_t* comm, int verbose)
|
|
{
|
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
|
}
|