/* * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * This interface is designed to hide the back-end details of how IB * RC connections are made from the rest of the openib BTL. There are * module-like instances of the implemented functionality (dlopen and * friends are not used, but all the functionality is accessed through * struct's of function pointers, so you can swap between multiple * different implementations at run time, just like real components). * Hence, these entities are referred to as "Connect * Pseudo-Components" (CPCs). * * The CPCs are referenced by their names (e.g., "oob", "rdma_cm"). * * CPCs are split into components and modules, similar to all other * MCA frameworks in this code base. * * Before diving into the CPC interface, let's discuss some * terminology and mappings of data structures: * * - a BTL module represents a network port (in the case of the openib * BTL, a LID) * - a CPC module represents one way to make connections to a BTL module * - hence, a BTL module has potentially multiple CPC modules * associated with it * - an endpoint represnts a connection between a local BTL module and * a remote BTL module (in the openib BTL, because of BSRQ, an * endpoint can contain multiple QPs) * - when an endpoint is created, one of the CPC modules associated * with the local BTL is selected and associated with the endpoint * (obviously, it is a CPC module that is common between the local * and remote BTL modules) * - endpoints may be created and destroyed during the MPI job * - endpoints are created lazily, during the first communication * between two peers * - endpoints are destroyed when two MPI processes become * disconnected (e.g., MPI-2 dynamics or MPI_FINALIZE) * - hence, BTL modules and CPC modules outlive endpoints. * Specifically, BTL modules and CPC modules live from MPI_INIT to * MPI_FINALIZE. endpoints come and go as MPI semantics demand it. * - therefore, CPC modules need to cache information on endpoints that * are specific to that connection. * * Component interface: * * - component_register(): The openib BTL's component_open() function * calls the connect_base_register() function, which scans all * compiled-in CPC's. If they have component_register() functions, * they are called (component_register() functions are only allowed to * register MCA parameters). * * NOTE: The connect_base_register() function will process the * btl_openib_cpc_include and btl_openib_cpc_exclude MCA parameters * and automatically include/exclude CPCs as relevant. If a CPC is * excluded, none of its other interface functions will be invoked for * the duration of the process. * * - component_init(): The openib BTL's component_init() function * calls connect_base_init(), which will invoke this query function on * each CPC to see if it wants to run at all. CPCs can gracefully * remove themselves from consideration in this process by returning * OMPI_ERR_NOT_SUPPORTED. * * - component_query(): The openib BTL's init_one_port() calls the * connect_base_select_for_local_port() function, which, for each LID * on that port, calls the component_query() function on every * available CPC on that LID. This function is intended to see if a * CPC can run on a sepcific openib BTL module (i.e., LID). If it * can, the CPC is supposed to create a CPC module that is specific to * that BTL/LID and return it. If it cannot, it should return * OMPI_ERR_NOT_SUPPORTED and be gracefully skipped for this * OpenFabrics port. * * component_finalize(): The openib BTL's component_close() function * calls connect_base_finalize(), which, in turn, calls the * component_finalize() function on all available CPCs. Note that all * CPC modules will have been finalized by this point; the CPC * component_finalize() function is a chance for the CPC to clean up * any component-specific resources. * * Module interface: * * cbm_component member: A pointer pointing to the single, global * instance of the CPC component. This member is used for creating a * unique index representing the modules' component so that it can be * shared with remote peer processes. * * cbm_priority member: An integer between 0 and 100, inclusive, * representing the priority of this CPC. * * cbm_modex_message member: A pointer to a blob buffer that will be * included in the modex message for this port for this CPC (it is * assumed that this blob is a) only understandable by the * corresponding CPC in the peer process, and b) contains specific * addressing/contact information for *this* port's CPC module). * * cbm_modex_message_len member: The length of the cbm_modex_message * blob, in bytes. * * cbm_endpoint_init(): Called during endpoint creation, allowing a * CPC module to cache information on the endpoint. A pointer to the * endpoint's CPC module is already cached on the endpoint. * * cbm_start_connect(): initiate a connection to a remote peer. The * CPC is responsible for setting itself up for asyncronous operation * for progressing the outgoing connection request. * * cbm_endpoint_finalize(): Called during the endpoint destrouction, * allowing the CPC module to destroy anything that it cached on the * endpoint. * * cbm_finalize(): shut down all asynchronous handling and clean up * any state that was setup for this CPC module/BTL. Some CPCs setup * asynchronous support on a per-HCA/NIC basis (vs. per-port/LID). It * is the reponsibility of the CPC to figure out such issues (e.g., * via reference counting) -- there is no notification from the * upper-level BTL about when an entire HCA/NIC is no longer being * used. There is only this function, which tells when a specific * CPC/BTL module is no longer being used. * * cbm_uses_cts: a bool that indicates whether the CPC will use the * CTS protocol or not. * - if true: the CPC will post the fragment on * endpoint->endpoint_cts_frag as a receive buffer and will *not* * call ompi_btl_openib_post_recvs(). * - if false: the CPC will call ompi_btl_openib_post_recvs() before * calling ompi_btl_openib_cpc_complete(). * * There are two functions in the main openib BTL that the CPC may * call: * * - ompi_btl_openib_post_recvs(endpoint): once a QP is locally * connected to the remote side (but we don't know if the remote side * is connected to us yet), this function is invoked to post buffers * on the QP, setup credits for the endpoint, etc. This function is * *only* invoked if the CPC's cbm_uses_cts is false. * * - ompi_btl_openib_cpc_complete(endpoint): once that a CPC knows * that a QP is connected on *both* sides, this function is invoked to * tell the main openib BTL "ok, you can use this connection now." * (e.g., the main openib BTL will either invoke the CTS protocol or * start sending out fragments that were queued while the connection * was establishing, etc.). */ #ifndef BTL_OPENIB_CONNECT_H #define BTL_OPENIB_CONNECT_H BEGIN_C_DECLS #define BCF_MAX_NAME 64 /** * Must forward declare these structs to avoid include file loops. */ struct mca_btl_openib_hca_t; struct mca_btl_openib_module_t; struct mca_btl_base_endpoint_t; /** * This is struct is defined below */ struct ompi_btl_openib_connect_base_module_t; /************************************************************************/ /** * Function to register MCA params in the connect functions. It * returns no value, so it cannot fail. */ typedef void (*ompi_btl_openib_connect_base_component_register_fn_t)(void); /** * This function is invoked once by the openib BTL component during * startup. It is intended to have CPC component-wide startup. * * Return value: * * - OMPI_SUCCESS: this CPC component will be used in selection during * this process. * * - OMPI_ERR_NOT_SUPPORTED: this CPC component will be silently * ignored in this process. * * - Other OMPI_ERR_* values: the error will be propagated upwards, * likely causing a fatal error (and/or the openib BTL component * being ignored). */ typedef int (*ompi_btl_openib_connect_base_component_init_fn_t)(void); /** * Query the CPC to see if it wants to run on a specific port (i.e., a * specific BTL module). If the component init function previously * returned OMPI_SUCCESS, this function is invoked once per BTL module * creation (i.e., for each port found by an MPI process). If this * CPC wants to be used on this BTL module, it returns a CPC module * that is specific to this BTL module. * * The BTL module in question is passed to the function; all of its * attributes can be used to query to see if it's eligible for this * CPC. * * If it is eligible, the CPC is responsible for creating a * corresponding CPC module, filling in all the relevant fields on the * modules, and for setting itself up to run (per above) and returning * a CPC module (this is effectively the "module_init" function). * Note that the module priority must be between 0 and 100 * (inclusive). When multiple CPCs are eligible for a single module, * the CPC with the highest priority will be used. * * Return value: * * - OMPI_SUCCESS if this CPC is eligible for and was able to be setup * for this BTL module. It is assumed that the CPC is now completely * setup to run on this openib module (per description above). * * - OMPI_ERR_NOT_SUPPORTED if this CPC cannot support this BTL * module. This is not an error; it's just the CPC saying "sorry, I * cannot support this BTL module." * * - Other OMPI_ERR_* code: an error occurred. */ typedef int (*ompi_btl_openib_connect_base_func_component_query_t) (struct mca_btl_openib_module_t *btl, struct ompi_btl_openib_connect_base_module_t **cpc); /** * This function is invoked once by the openib BTL component during * shutdown. It is intended to have CPC component-wide shutdown. */ typedef int (*ompi_btl_openib_connect_base_component_finalize_fn_t)(void); /** * CPC component struct */ struct ompi_btl_openib_connect_base_component_t { /** Name of this set of connection functions */ char cbc_name[BCF_MAX_NAME]; /** Register function. Can be NULL. */ ompi_btl_openib_connect_base_component_register_fn_t cbc_register; /** CPC component init function. Can be NULL. */ ompi_btl_openib_connect_base_component_init_fn_t cbc_init; /** Query the CPC component to get a CPC module corresponding to an openib BTL module. Cannot be NULL. */ ompi_btl_openib_connect_base_func_component_query_t cbc_query; /** CPC component finalize function. Can be NULL. */ ompi_btl_openib_connect_base_component_finalize_fn_t cbc_finalize; }; /** * Convenience typedef */ typedef struct ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_base_component_t; /************************************************************************/ /** * Function called when an endpoint has been created and has been * associated with a CPC. */ typedef int (*ompi_btl_openib_connect_base_module_endpoint_init_fn_t) (struct mca_btl_base_endpoint_t *endpoint); /** * Function to initiate a connection to a remote process. */ typedef int (*ompi_btl_openib_connect_base_module_start_connect_fn_t) (struct ompi_btl_openib_connect_base_module_t *cpc, struct mca_btl_base_endpoint_t *endpoint); /** * Function called when an endpoint is being destroyed. */ typedef int (*ompi_btl_openib_connect_base_module_endpoint_finalize_fn_t) (struct mca_btl_base_endpoint_t *endpoint); /** * Function to finalize the CPC module. It is called once when the * CPC module's corresponding openib BTL module is being finalized. */ typedef int (*ompi_btl_openib_connect_base_module_finalize_fn_t) (struct mca_btl_openib_module_t *btl, struct ompi_btl_openib_connect_base_module_t *cpc); /** * Meta data about a CPC module. This is in a standalone struct * because it is used in both the CPC module struct and the * openib_btl_proc_t struct to hold information received from the * modex. */ typedef struct ompi_btl_openib_connect_base_module_data_t { /** Pointer back to the component. Used by the base and openib btl to calculate this module's index for the modex. */ ompi_btl_openib_connect_base_component_t *cbm_component; /** Priority of the CPC module (must be >=0 and <=100) */ uint8_t cbm_priority; /** Blob that the CPC wants to include in the openib modex message for a specific port, or NULL if the CPC does not want to include a message in the modex. */ void *cbm_modex_message; /** Length of the cbm_modex_message blob (0 if cbm_modex_message==NULL). The message is intended to be short (because the size of the modex broadcast is a function of sum(cbm_modex_message_len[i]) for i=(0...total_num_ports_in_MPI_job) -- e.g., IBCM imposes its own [very short] limits (per IBTA volume 1, chapter 12). */ uint8_t cbm_modex_message_len; } ompi_btl_openib_connect_base_module_data_t; /** * Struct for holding CPC module and associated meta data */ typedef struct ompi_btl_openib_connect_base_module_t { /** Meta data about the module */ ompi_btl_openib_connect_base_module_data_t data; /** Endpoint initialization function */ ompi_btl_openib_connect_base_module_endpoint_init_fn_t cbm_endpoint_init; /** Connect function */ ompi_btl_openib_connect_base_module_start_connect_fn_t cbm_start_connect; /** Endpoint finalization function */ ompi_btl_openib_connect_base_module_endpoint_finalize_fn_t cbm_endpoint_finalize; /** Finalize the cpc module */ ompi_btl_openib_connect_base_module_finalize_fn_t cbm_finalize; /** Whether this module will use the CTS protocol or not. This directly states whether this module will call mca_btl_openib_endpoint_post_recvs() or not: true = this module will *not* call _post_recvs() and instead will post the receive buffer provided at endpoint->endpoint_cts_frag on qp 0. */ bool cbm_uses_cts; } ompi_btl_openib_connect_base_module_t; END_C_DECLS #endif