a0d4122606
for dynamic selection of cpc methods based on what is available. It also allows for inclusion/exclusions of methods. It even futher allows for modifying the priorities of certain cpc methods to better determine the optimal cpc method. This patch also contains XRC compile time disablement (per Jeff's patch). At a high level, the cpc selections works by walking through each cpc and allowing it to test to see if it is permissable to run on this mpirun. It returns a priority if it is permissable or a -1 if not. All of the cpc names and priorities are rolled into a string. This string is then encapsulated in a message and passed around all the ompi processes. Once received and unpacked, the list received is compared to a local copy of the list. The connection method is chosen by comparing the lists passed around to all nodes via modex with the list generated locally. Any non-negative number is a potentially valid connection method. The method below of determining the optimal connection method is to take the cross-section of the two lists. The highest single value (and the other side being non-negative) is selected as the cpc method. svn merge -r 16948:17128 https://svn.open-mpi.org/svn/ompi/tmp-public/openib-cpc/ . This commit was SVN r17138.
129 строки
4.2 KiB
C
129 строки
4.2 KiB
C
/*
|
|
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
|
*
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
*
|
|
* This interface is designed to hide the back-end details of how IB
|
|
* RC connections are made from the rest of the openib BTL. There are
|
|
* module-like instances of the implemented functionality (dlopen and
|
|
* friends are not used, but all the functionality is accessed through
|
|
* struct's of function pointers, so you can swap between multiple
|
|
* different implementations at run time, just like real components).
|
|
*
|
|
* Currently, the connect functions are referenced by their names
|
|
* (e.g., "oob", "rdma_cm"). The decision which to use is made during
|
|
* the openib BTL init() function call.
|
|
*
|
|
* Note that the openib BTL's open() function calls the
|
|
* connect_base_open() function, which registers an MCA parameter, and
|
|
* scans all the connect modules to see if they have open() functions.
|
|
* If they do, they are called. In this way, the connect modules can
|
|
* register MCA parameters that show up in ompi_info output.
|
|
*
|
|
* There are four main functions to this interface:
|
|
*
|
|
* - open: as described above, used to register MCA params for connect
|
|
* modules
|
|
*
|
|
* - init: to select a connect module. The module is responsible for
|
|
* setting itself up for asynchronous operation for incoming
|
|
* connection requests (e.g., putting fd's in the progress engine,
|
|
* posting non-blocking RML requests, spawning a background thread,
|
|
* etc.).
|
|
*
|
|
* - start_connect: initiate a connection to a remote peer. Similar
|
|
* to init, the module is responsible for setting itself up for
|
|
* asyncronous operation for progressing the outgoing connection
|
|
* request.
|
|
*
|
|
* - finalize: shut down all asynchronous handling. No need to clean
|
|
* up the connections that were made; that's the responsibility of the
|
|
* main openib BTL.
|
|
*
|
|
* There are two functions in the main openib BTL that the module will
|
|
* call:
|
|
*
|
|
* - ompi_btl_openib_post_recvs(endpoint): once a QP is locally
|
|
* connected to the remote side (but we don't know if the remote side
|
|
* is connected to us yet), this function is invoked to post buffers
|
|
* on the QP, setup credits for the endpoint, etc.
|
|
*
|
|
* - ompi_btl_openib_connected(endpoint): once we know that a QP is
|
|
* connected on *both* sides, this function is invoked to tell the
|
|
* main openib BTL "ok, you can use this connection now." (e.g., the
|
|
* main openib BTL will start sending out fragments that were queued
|
|
* while the connection was establing, etc.).
|
|
*/
|
|
#ifndef BTL_OPENIB_CONNECT_H
|
|
#define BTL_OPENIB_CONNECT_H
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
#define BCF_MAX_NAME 64
|
|
|
|
/**
|
|
* Must forward declare mca_btl_openib_hca_t; it's defined in
|
|
* btl_openib.h, but that file includes this file.
|
|
*/
|
|
struct mca_btl_openib_hca_t;
|
|
|
|
/**
|
|
* Function to register MCA params in the connect functions
|
|
*/
|
|
typedef void (*ompi_btl_openib_connect_base_func_open_t)(void);
|
|
|
|
/**
|
|
* Function to intialize the connection functions (i.e., it's been
|
|
* selected, so do whatever setup is necessary).
|
|
*/
|
|
typedef int (*ompi_btl_openib_connect_base_func_init_t)(void);
|
|
|
|
/**
|
|
* Function to initiate a connection to a remote process
|
|
*/
|
|
typedef int (*ompi_btl_openib_connect_base_func_start_connect_t)
|
|
(struct mca_btl_base_endpoint_t *e);
|
|
|
|
/**
|
|
* Query the CPC to see if it wants to run on a specific HCA
|
|
*/
|
|
typedef int (*ompi_btl_openib_connect_base_func_query_t)(struct mca_btl_openib_hca_t *hca);
|
|
|
|
/**
|
|
* Function to finalize the connection functions
|
|
*/
|
|
typedef int (*ompi_btl_openib_connect_base_func_finalize_t)(void);
|
|
|
|
struct ompi_btl_openib_connect_base_funcs_t {
|
|
/** Name of this set of connection functions */
|
|
char bcf_name[BCF_MAX_NAME];
|
|
|
|
/** Open function */
|
|
ompi_btl_openib_connect_base_func_open_t bcf_open;
|
|
|
|
/** Init function */
|
|
ompi_btl_openib_connect_base_func_init_t bcf_init;
|
|
|
|
/** Connect function */
|
|
ompi_btl_openib_connect_base_func_start_connect_t bcf_start_connect;
|
|
|
|
/** Query function */
|
|
ompi_btl_openib_connect_base_func_query_t bcf_query;
|
|
|
|
/** Finalize function */
|
|
ompi_btl_openib_connect_base_func_finalize_t bcf_finalize;
|
|
};
|
|
typedef struct ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_base_funcs_t;
|
|
|
|
END_C_DECLS
|
|
|
|
#endif
|