2015-02-17 22:15:31 +03:00
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2014-12-17 02:42:00 +03:00
/*
2015-04-18 01:27:17 +03:00
* Copyright ( c ) 2013 - 2015 Intel , Inc . All rights reserved
2014-12-17 02:42:00 +03:00
*
2015-02-14 01:46:05 +03:00
* Copyright ( c ) 2014 - 2015 Cisco Systems , Inc . All rights reserved .
2015-01-31 23:50:32 +03:00
* Copyright ( c ) 2015 Los Alamos National Security , LLC . All rights
* reserved .
2014-12-17 02:42:00 +03:00
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# include "mtl_ofi.h"
2015-09-17 20:05:05 +03:00
# include "opal/util/argv.h"
2014-12-17 02:42:00 +03:00
static int ompi_mtl_ofi_component_open ( void ) ;
2015-01-28 20:06:13 +03:00
static int ompi_mtl_ofi_component_query ( mca_base_module_t * * module , int * priority ) ;
2014-12-17 02:42:00 +03:00
static int ompi_mtl_ofi_component_close ( void ) ;
static int ompi_mtl_ofi_component_register ( void ) ;
static mca_mtl_base_module_t *
ompi_mtl_ofi_component_init ( bool enable_progress_threads ,
bool enable_mpi_threads ) ;
2015-01-28 20:06:13 +03:00
static int param_priority ;
2015-08-11 02:34:52 +03:00
static char * prov_include ;
static char * prov_exclude ;
2014-12-17 02:42:00 +03:00
mca_mtl_ofi_component_t mca_mtl_ofi_component = {
{
/* First, the mca_base_component_t struct containing meta
* information about the component itself */
2015-04-18 18:36:05 +03:00
. mtl_version = {
2014-12-17 02:42:00 +03:00
MCA_MTL_BASE_VERSION_2_0_0 ,
2015-04-18 18:36:05 +03:00
. mca_component_name = " ofi " ,
2015-05-13 01:47:50 +03:00
OFI_COMPAT_MCA_VERSION ,
2015-04-18 18:36:05 +03:00
. mca_open_component = ompi_mtl_ofi_component_open ,
. mca_close_component = ompi_mtl_ofi_component_close ,
. mca_query_component = ompi_mtl_ofi_component_query ,
. mca_register_component_params = ompi_mtl_ofi_component_register ,
2014-12-17 02:42:00 +03:00
} ,
2015-04-18 18:36:05 +03:00
. mtl_data = {
2014-12-17 02:42:00 +03:00
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
} ,
2015-04-18 18:36:05 +03:00
. mtl_init = ompi_mtl_ofi_component_init ,
2014-12-17 02:42:00 +03:00
}
} ;
static int
ompi_mtl_ofi_component_register ( void )
{
2015-10-19 19:42:22 +03:00
param_priority = 10 ; /* for now give a lower priority than the psm mtl and ob1 */
2015-08-11 02:34:52 +03:00
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" priority " , " Priority of the OFI MTL component " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_9 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& param_priority ) ;
prov_include = NULL ;
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" provider_include " ,
" Comma-delimited list of OFI providers that are considered for use (e.g., \" psm,sockets \" ; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude. " ,
MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
OPAL_INFO_LVL_1 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& prov_include ) ;
prov_exclude = " sockets,mxm " ;
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" provider_exclude " ,
" Comma-delimited list of OFI providers that are not considered for use (default: \" sockets,mxm \" ; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include. " ,
MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
OPAL_INFO_LVL_1 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& prov_exclude ) ;
2014-12-17 02:42:00 +03:00
return OMPI_SUCCESS ;
}
2015-01-28 20:06:13 +03:00
2014-12-17 02:42:00 +03:00
static int
ompi_mtl_ofi_component_open ( void )
{
ompi_mtl_ofi . base . mtl_request_size =
sizeof ( ompi_mtl_ofi_request_t ) - sizeof ( struct mca_mtl_request_t ) ;
ompi_mtl_ofi . domain = NULL ;
ompi_mtl_ofi . av = NULL ;
ompi_mtl_ofi . cq = NULL ;
ompi_mtl_ofi . ep = NULL ;
2015-08-11 02:34:52 +03:00
/**
* Sanity check : provider_include and provider_exclude must be mutually
* exclusive
*/
if ( OMPI_SUCCESS ! =
mca_base_var_check_exclusive ( " ompi " ,
mca_mtl_ofi_component . super . mtl_version . mca_type_name ,
mca_mtl_ofi_component . super . mtl_version . mca_component_name ,
" provider_include " ,
mca_mtl_ofi_component . super . mtl_version . mca_type_name ,
mca_mtl_ofi_component . super . mtl_version . mca_component_name ,
" provider_exclude " ) ) {
return OMPI_ERR_NOT_AVAILABLE ;
}
2014-12-17 02:42:00 +03:00
return OMPI_SUCCESS ;
}
2015-05-13 01:47:50 +03:00
static int
2015-01-28 20:06:13 +03:00
ompi_mtl_ofi_component_query ( mca_base_module_t * * module , int * priority )
{
* priority = param_priority ;
2015-02-04 23:35:05 +03:00
* module = ( mca_base_module_t * ) & ompi_mtl_ofi . base ;
2015-01-28 20:06:13 +03:00
return OMPI_SUCCESS ;
}
2014-12-17 02:42:00 +03:00
static int
ompi_mtl_ofi_component_close ( void )
{
return OMPI_SUCCESS ;
}
2015-07-27 18:15:14 +03:00
int
ompi_mtl_ofi_progress_no_inline ( void )
{
2015-07-30 00:14:34 +03:00
return ompi_mtl_ofi_progress ( ) ;
2015-07-27 18:15:14 +03:00
}
2014-12-17 02:42:00 +03:00
2015-08-11 02:34:52 +03:00
static int
is_in_list ( char * * list , char * item )
{
int i = 0 ;
if ( ( NULL = = list ) | | ( NULL = = item ) ) {
return 0 ;
}
while ( NULL ! = list [ i ] ) {
if ( 0 = = strncmp ( item , list [ i ] , strlen ( item ) ) ) {
return 1 ;
} else {
i + + ;
}
}
return 0 ;
}
2015-08-08 02:09:51 +03:00
static struct fi_info *
select_ofi_provider ( struct fi_info * providers )
{
2015-08-11 02:34:52 +03:00
char * * include_list = NULL ;
char * * exclude_list = NULL ;
2015-08-08 02:09:51 +03:00
struct fi_info * prov = providers ;
2015-08-11 02:34:52 +03:00
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi:provider_include = \" %s \" \n " ,
__FILE__ , __LINE__ , prov_include ) ;
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi:provider_exclude = \" %s \" \n " ,
__FILE__ , __LINE__ , prov_exclude ) ;
if ( NULL ! = prov_include ) {
include_list = opal_argv_split ( prov_include , ' , ' ) ;
while ( ( NULL ! = prov ) & &
( ! is_in_list ( include_list , prov - > fabric_attr - > prov_name ) ) ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi: \" %s \" not in include list \n " ,
__FILE__ , __LINE__ ,
prov - > fabric_attr - > prov_name ) ;
prov = prov - > next ;
}
} else if ( NULL ! = prov_exclude ) {
exclude_list = opal_argv_split ( prov_exclude , ' , ' ) ;
while ( ( NULL ! = prov ) & &
( is_in_list ( exclude_list , prov - > fabric_attr - > prov_name ) ) ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi: \" %s \" in exclude list \n " ,
__FILE__ , __LINE__ ,
prov - > fabric_attr - > prov_name ) ;
prov = prov - > next ;
2015-08-08 02:09:51 +03:00
}
}
2015-08-11 02:34:52 +03:00
opal_argv_free ( include_list ) ;
opal_argv_free ( exclude_list ) ;
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi:prov: %s \n " ,
__FILE__ , __LINE__ ,
( prov ? prov - > fabric_attr - > prov_name : " none " ) ) ;
2015-08-08 02:09:51 +03:00
return prov ;
}
2014-12-17 02:42:00 +03:00
static mca_mtl_base_module_t *
ompi_mtl_ofi_component_init ( bool enable_progress_threads ,
bool enable_mpi_threads )
{
int ret , fi_version ;
2015-03-07 01:38:19 +03:00
struct fi_info * hints ;
2014-12-17 02:42:00 +03:00
struct fi_info * providers = NULL , * prov = NULL ;
struct fi_cq_attr cq_attr = { 0 } ;
struct fi_av_attr av_attr = { 0 } ;
2015-02-04 19:08:12 +03:00
char ep_name [ FI_NAME_MAX ] = { 0 } ;
2014-12-17 02:42:00 +03:00
size_t namelen ;
/**
* Hints to filter providers
* See man fi_getinfo for a list of all filters
* mode : Select capabilities MTL is prepared to support .
* In this case , MTL will pass in context into communication calls
* ep_type : reliable datagram operation
2015-04-25 01:55:21 +03:00
* caps : Capabilities required from the provider .
* Tag matching is specified to implement MPI semantics .
2015-08-15 02:23:10 +03:00
* msg_order : Guarantee that messages with same tag are ordered .
2014-12-17 02:42:00 +03:00
*/
2015-03-07 01:38:19 +03:00
hints = fi_allocinfo ( ) ;
if ( ! hints ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: Could not allocate fi_info \n " ,
__FILE__ , __LINE__ ) ;
goto error ;
}
2015-08-15 02:23:10 +03:00
hints - > mode = FI_CONTEXT ;
hints - > ep_attr - > type = FI_EP_RDM ; /* Reliable datagram */
hints - > caps = FI_TAGGED ; /* Tag matching interface */
hints - > tx_attr - > msg_order = FI_ORDER_SAS ;
hints - > rx_attr - > msg_order = FI_ORDER_SAS ;
2014-12-17 02:42:00 +03:00
/**
* Refine filter for additional capabilities
* threading : Disable locking
* control_progress : enable async progress
*/
2015-03-07 01:38:19 +03:00
hints - > domain_attr - > threading = FI_THREAD_ENDPOINT ;
hints - > domain_attr - > control_progress = FI_PROGRESS_AUTO ;
2014-12-17 02:42:00 +03:00
/**
* FI_VERSION provides binary backward and forward compatibility support
* Specify the version of OFI is coded to , the provider will select struct
* layouts that are compatible with this version .
*/
fi_version = FI_VERSION ( 1 , 0 ) ;
/**
* fi_getinfo : returns information about fabric services for reaching a
* remote node or service . this does not necessarily allocate resources .
* Pass NULL for name / service because we want a list of providers supported .
*/
ret = fi_getinfo ( fi_version , /* OFI version requested */
NULL , /* Optional name or fabric to resolve */
NULL , /* Optional service name or port to request */
0ULL , /* Optional flag */
2015-03-07 01:38:19 +03:00
hints , /* In: Hints to filter providers */
2014-12-17 02:42:00 +03:00
& providers ) ; /* Out: List of matching providers */
if ( 0 ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_getinfo failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
/**
2015-08-08 02:09:51 +03:00
* Select a provider from the list returned by fi_getinfo ( ) .
2014-12-17 02:42:00 +03:00
*/
2015-08-08 02:09:51 +03:00
prov = select_ofi_provider ( providers ) ;
if ( ! prov ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: select_ofi_provider: no provider found \n " ,
__FILE__ , __LINE__ ) ;
goto error ;
}
2014-12-17 02:42:00 +03:00
/**
* Open fabric
* The getinfo struct returns a fabric attribute struct that can be used to
* instantiate the virtual or physical network . This opens a " fabric
* provider " . See man fi_fabric for details.
*/
ret = fi_fabric ( prov - > fabric_attr , /* In: Fabric attributes */
& ompi_mtl_ofi . fabric , /* Out: Fabric handle */
NULL ) ; /* Optional context for fabric events */
if ( 0 ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_fabric failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
/**
* Create the access domain , which is the physical or virtual network or
* hardware port / collection of ports . Returns a domain object that can be
* used to create endpoints . See man fi_domain for details .
*/
ret = fi_domain ( ompi_mtl_ofi . fabric , /* In: Fabric object */
prov , /* In: Provider */
& ompi_mtl_ofi . domain , /* Out: Domain oject */
NULL ) ; /* Optional context for domain events */
if ( 0 ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_domain failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
/**
* Create a transport level communication endpoint . To use the endpoint ,
* it must be bound to completion counters or event queues and enabled ,
* and the resources consumed by it , such as address vectors , counters ,
* completion queues , etc .
* see man fi_endpoint for more details .
*/
ret = fi_endpoint ( ompi_mtl_ofi . domain , /* In: Domain object */
prov , /* In: Provider */
& ompi_mtl_ofi . ep , /* Out: Endpoint object */
NULL ) ; /* Optional context */
if ( 0 ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_endpoint failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
2015-04-20 20:54:50 +03:00
/**
* Save the maximum inject size .
*/
ompi_mtl_ofi . max_inject_size = prov - > tx_attr - > inject_size ;
2014-12-17 02:42:00 +03:00
/**
* Create the objects that will be bound to the endpoint .
* The objects include :
* - completion queue for events
* - address vector of other endpoint addresses
* - dynamic memory - spanning memory region
*/
cq_attr . format = FI_CQ_FORMAT_TAGGED ;
ret = fi_cq_open ( ompi_mtl_ofi . domain , & cq_attr , & ompi_mtl_ofi . cq , NULL ) ;
if ( ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_cq_open failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
/**
* The remote fi_addr will be stored in the ofi_endpoint struct .
* So , we use the AV in " map " mode .
*/
av_attr . type = FI_AV_MAP ;
ret = fi_av_open ( ompi_mtl_ofi . domain , & av_attr , & ompi_mtl_ofi . av , NULL ) ;
if ( ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_av_open failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
2015-05-13 01:47:50 +03:00
2014-12-17 02:42:00 +03:00
/**
* Bind the CQ and AV to the endpoint object .
*/
2015-01-23 21:50:10 +03:00
ret = fi_ep_bind ( ompi_mtl_ofi . ep ,
( fid_t ) ompi_mtl_ofi . cq ,
FI_SEND | FI_RECV ) ;
2014-12-17 02:42:00 +03:00
if ( 0 ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_bind CQ-EP failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
2015-01-23 21:50:10 +03:00
ret = fi_ep_bind ( ompi_mtl_ofi . ep ,
( fid_t ) ompi_mtl_ofi . av ,
0 ) ;
2014-12-17 02:42:00 +03:00
if ( 0 ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_bind AV-EP failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
/**
* Enable the endpoint for communication
* This commits the bind operations .
*/
ret = fi_enable ( ompi_mtl_ofi . ep ) ;
if ( 0 ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_enable failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
/**
* Free providers info since it ' s not needed anymore .
*/
2015-03-07 01:38:19 +03:00
fi_freeinfo ( hints ) ;
hints = NULL ;
2014-12-17 02:42:00 +03:00
fi_freeinfo ( providers ) ;
2015-01-06 04:00:01 +03:00
providers = NULL ;
2014-12-17 02:42:00 +03:00
/**
* Get our address and publish it with modex .
*/
namelen = sizeof ( ep_name ) ;
2015-02-04 19:08:12 +03:00
ret = fi_getname ( ( fid_t ) ompi_mtl_ofi . ep , & ep_name [ 0 ] , & namelen ) ;
2014-12-17 02:42:00 +03:00
if ( ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: fi_getname failed: %s \n " ,
__FILE__ , __LINE__ , fi_strerror ( - ret ) ) ;
goto error ;
}
2015-05-13 01:47:50 +03:00
OFI_COMPAT_MODEX_SEND ( ret ,
& mca_mtl_ofi_component . super . mtl_version ,
& ep_name ,
namelen ) ;
2014-12-17 02:42:00 +03:00
if ( OMPI_SUCCESS ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
2015-05-13 01:47:50 +03:00
" %s:%d: modex_send failed: %d \n " ,
2014-12-17 02:42:00 +03:00
__FILE__ , __LINE__ , ret ) ;
goto error ;
}
ompi_mtl_ofi . epnamelen = namelen ;
/**
2015-02-04 19:09:12 +03:00
* Set the ANY_SRC address .
2014-12-17 02:42:00 +03:00
*/
2015-02-04 19:09:12 +03:00
ompi_mtl_ofi . any_addr = FI_ADDR_UNSPEC ;
2014-12-17 02:42:00 +03:00
/**
* Activate progress callback .
*/
2015-07-27 18:15:14 +03:00
ret = opal_progress_register ( ompi_mtl_ofi_progress_no_inline ) ;
2014-12-17 02:42:00 +03:00
if ( OMPI_SUCCESS ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: opal_progress_register failed: %d \n " ,
__FILE__ , __LINE__ , ret ) ;
goto error ;
}
return & ompi_mtl_ofi . base ;
error :
if ( providers ) {
( void ) fi_freeinfo ( providers ) ;
}
2015-03-07 01:38:19 +03:00
if ( hints ) {
( void ) fi_freeinfo ( hints ) ;
}
2014-12-17 02:42:00 +03:00
if ( ompi_mtl_ofi . av ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . av ) ;
}
if ( ompi_mtl_ofi . cq ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . cq ) ;
}
if ( ompi_mtl_ofi . ep ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . ep ) ;
}
if ( ompi_mtl_ofi . domain ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . domain ) ;
}
if ( ompi_mtl_ofi . fabric ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . fabric ) ;
}
return NULL ;
}
2015-07-27 18:15:14 +03:00
int
ompi_mtl_ofi_finalize ( struct mca_mtl_base_module_t * mtl )
{
opal_progress_unregister ( ompi_mtl_ofi_progress_no_inline ) ;
/**
* * Close all the OFI objects
* */
if ( fi_close ( ( fid_t ) ompi_mtl_ofi . ep ) ) {
opal_output ( ompi_mtl_base_framework . framework_output ,
" fi_close failed: %s " , strerror ( errno ) ) ;
abort ( ) ;
}
if ( fi_close ( ( fid_t ) ompi_mtl_ofi . cq ) ) {
opal_output ( ompi_mtl_base_framework . framework_output ,
" fi_close failed: %s " , strerror ( errno ) ) ;
abort ( ) ;
}
if ( fi_close ( ( fid_t ) ompi_mtl_ofi . av ) ) {
opal_output ( ompi_mtl_base_framework . framework_output ,
" fi_close failed: %s " , strerror ( errno ) ) ;
abort ( ) ;
}
if ( fi_close ( ( fid_t ) ompi_mtl_ofi . domain ) ) {
opal_output ( ompi_mtl_base_framework . framework_output ,
" fi_close failed: %s " , strerror ( errno ) ) ;
abort ( ) ;
}
if ( fi_close ( ( fid_t ) ompi_mtl_ofi . fabric ) ) {
opal_output ( ompi_mtl_base_framework . framework_output ,
" fi_close failed: %s " , strerror ( errno ) ) ;
abort ( ) ;
}
return OMPI_SUCCESS ;
}
2014-12-17 02:42:00 +03:00