2016-11-23 02:03:20 +03:00
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
2018-04-03 02:40:47 +03:00
* Copyright ( c ) 2013 - 2018 Intel , Inc . All rights reserved
2016-11-23 02:03:20 +03:00
*
2017-11-11 15:58:31 +03:00
* Copyright ( c ) 2014 - 2017 Cisco Systems , Inc . All rights reserved
2016-11-23 02:03:20 +03:00
* Copyright ( c ) 2015 - 2016 Los Alamos National Security , LLC . All rights
* reserved .
2018-10-07 02:58:16 +03:00
* Copyright ( c ) 2018 Amazon . com , Inc . or its affiliates . All Rights reserved .
2016-11-23 02:03:20 +03:00
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# include "mtl_ofi.h"
# include "opal/util/argv.h"
2018-10-07 02:58:16 +03:00
# include "opal/util/printf.h"
2016-11-23 02:03:20 +03:00
static int ompi_mtl_ofi_component_open ( void ) ;
static int ompi_mtl_ofi_component_query ( mca_base_module_t * * module , int * priority ) ;
static int ompi_mtl_ofi_component_close ( void ) ;
static int ompi_mtl_ofi_component_register ( void ) ;
static mca_mtl_base_module_t *
ompi_mtl_ofi_component_init ( bool enable_progress_threads ,
bool enable_mpi_threads ) ;
static int param_priority ;
static char * prov_include ;
static char * prov_exclude ;
static int control_progress ;
static int data_progress ;
static int av_type ;
2018-04-03 02:40:47 +03:00
static int ofi_tag_mode ;
2016-11-23 02:03:20 +03:00
2018-10-25 00:38:23 +03:00
# if OPAL_HAVE_THREAD_LOCAL
opal_thread_local int per_thread_ctx ;
opal_thread_local struct fi_cq_tagged_entry wc [ MTL_OFI_MAX_PROG_EVENT_COUNT ] ;
# endif
2016-11-23 02:03:20 +03:00
/*
* Enumerators
*/
enum {
MTL_OFI_PROG_AUTO = 1 ,
MTL_OFI_PROG_MANUAL ,
2017-11-08 18:58:24 +03:00
MTL_OFI_PROG_UNSPEC ,
2016-11-23 02:03:20 +03:00
} ;
mca_base_var_enum_value_t control_prog_type [ ] = {
{ MTL_OFI_PROG_AUTO , " auto " } ,
{ MTL_OFI_PROG_MANUAL , " manual " } ,
2017-11-08 18:58:24 +03:00
{ MTL_OFI_PROG_UNSPEC , " unspec " } ,
2016-11-23 02:03:20 +03:00
{ 0 , NULL }
} ;
mca_base_var_enum_value_t data_prog_type [ ] = {
{ MTL_OFI_PROG_AUTO , " auto " } ,
{ MTL_OFI_PROG_MANUAL , " manual " } ,
2017-11-08 18:58:24 +03:00
{ MTL_OFI_PROG_UNSPEC , " unspec " } ,
2016-11-23 02:03:20 +03:00
{ 0 , NULL }
} ;
enum {
MTL_OFI_AV_MAP = 1 ,
MTL_OFI_AV_TABLE ,
MTL_OFI_AV_UNKNOWN ,
} ;
mca_base_var_enum_value_t av_table_type [ ] = {
{ MTL_OFI_AV_MAP , " map " } ,
{ MTL_OFI_AV_TABLE , " table " } ,
{ 0 , NULL }
} ;
2018-04-03 02:40:47 +03:00
enum {
MTL_OFI_TAG_AUTO = 1 ,
MTL_OFI_TAG_1 ,
MTL_OFI_TAG_2 ,
MTL_OFI_TAG_FULL ,
} ;
mca_base_var_enum_value_t ofi_tag_mode_type [ ] = {
{ MTL_OFI_TAG_AUTO , " auto " } ,
{ MTL_OFI_TAG_1 , " ofi_tag_1 " } ,
{ MTL_OFI_TAG_2 , " ofi_tag_2 " } ,
{ MTL_OFI_TAG_FULL , " ofi_tag_full " } ,
{ 0 , NULL }
} ;
2016-11-23 02:03:20 +03:00
mca_mtl_ofi_component_t mca_mtl_ofi_component = {
{
/* First, the mca_base_component_t struct containing meta
* information about the component itself */
. mtl_version = {
MCA_MTL_BASE_VERSION_2_0_0 ,
. mca_component_name = " ofi " ,
OFI_COMPAT_MCA_VERSION ,
. mca_open_component = ompi_mtl_ofi_component_open ,
. mca_close_component = ompi_mtl_ofi_component_close ,
. mca_query_component = ompi_mtl_ofi_component_query ,
. mca_register_component_params = ompi_mtl_ofi_component_register ,
} ,
. mtl_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
} ,
. mtl_init = ompi_mtl_ofi_component_init ,
}
} ;
static int
ompi_mtl_ofi_component_register ( void )
{
int ret ;
mca_base_var_enum_t * new_enum = NULL ;
2018-02-13 20:41:38 +03:00
char * desc ;
2016-11-23 02:03:20 +03:00
2018-12-01 00:44:39 +03:00
param_priority = 25 ; /* for now give a lower priority than the psm mtl */
2016-11-23 02:03:20 +03:00
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" priority " , " Priority of the OFI MTL component " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_9 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& param_priority ) ;
2018-09-25 22:42:34 +03:00
prov_include = NULL ;
2016-11-23 02:03:20 +03:00
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" provider_include " ,
" Comma-delimited list of OFI providers that are considered for use (e.g., \" psm,psm2 \" ; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude. " ,
MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
OPAL_INFO_LVL_1 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& prov_include ) ;
2018-09-25 22:42:34 +03:00
prov_exclude = " shm,sockets,tcp,udp,rstream " ;
2016-11-23 02:03:20 +03:00
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" provider_exclude " ,
" Comma-delimited list of OFI providers that are not considered for use (default: \" sockets,mxm \" ; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include. " ,
MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
OPAL_INFO_LVL_1 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& prov_exclude ) ;
2018-10-25 00:38:23 +03:00
ompi_mtl_ofi . ofi_progress_event_count = MTL_OFI_MAX_PROG_EVENT_COUNT ;
2018-10-07 02:58:16 +03:00
opal_asprintf ( & desc , " Max number of events to read each call to OFI progress (default: %d events will be read per OFI progress call) " , ompi_mtl_ofi . ofi_progress_event_count ) ;
2018-02-13 20:41:38 +03:00
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" progress_event_cnt " ,
desc ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_6 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& ompi_mtl_ofi . ofi_progress_event_count ) ;
2018-04-03 02:40:47 +03:00
free ( desc ) ;
ret = mca_base_var_enum_create ( " ofi_tag_mode_type " , ofi_tag_mode_type , & new_enum ) ;
if ( OPAL_SUCCESS ! = ret ) {
return ret ;
}
ofi_tag_mode = MTL_OFI_TAG_AUTO ;
2018-10-07 02:58:16 +03:00
opal_asprintf ( & desc , " Mode specifying how many bits to use for various MPI values in OFI/Libfabric "
2018-04-03 02:40:47 +03:00
" communications. Some Libfabric provider network types can support most of Open MPI "
" needs; others can only supply a limited number of bits, which then must be split "
" across the MPI communicator ID, MPI source rank, and MPI tag. Three different "
" splitting schemes are available: ofi_tag_full (%d bits for the communicator, %d bits "
" for the source rank, and %d bits for the tag), ofi_tag_1 (%d bits for the communicator "
" , %d bits source rank, %d bits tag), ofi_tag_2 (%d bits for the communicator "
" , %d bits source rank, %d bits tag). By default, this MCA variable is set to \" auto \" , "
" which will first try to use ofi_tag_full, and if that fails, fall back to ofi_tag_1. " ,
MTL_OFI_CID_BIT_COUNT_DATA , 32 , MTL_OFI_TAG_BIT_COUNT_DATA ,
MTL_OFI_CID_BIT_COUNT_1 , MTL_OFI_SOURCE_BIT_COUNT_1 , MTL_OFI_TAG_BIT_COUNT_1 ,
MTL_OFI_CID_BIT_COUNT_2 , MTL_OFI_SOURCE_BIT_COUNT_2 , MTL_OFI_TAG_BIT_COUNT_2 ) ;
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" tag_mode " ,
desc ,
MCA_BASE_VAR_TYPE_INT , new_enum , 0 , 0 ,
OPAL_INFO_LVL_6 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& ofi_tag_mode ) ;
free ( desc ) ;
OBJ_RELEASE ( new_enum ) ;
2018-02-13 20:41:38 +03:00
2016-11-23 02:03:20 +03:00
ret = mca_base_var_enum_create ( " control_prog_type " , control_prog_type , & new_enum ) ;
if ( OPAL_SUCCESS ! = ret ) {
return ret ;
}
2017-11-08 18:58:24 +03:00
control_progress = MTL_OFI_PROG_UNSPEC ;
2016-11-23 02:03:20 +03:00
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" control_progress " ,
2017-11-08 18:58:24 +03:00
" Specify control progress model (default: unspecificed, use provider's default). Set to auto or manual for auto or manual progress respectively. " ,
2016-11-23 02:03:20 +03:00
MCA_BASE_VAR_TYPE_INT , new_enum , 0 , 0 ,
OPAL_INFO_LVL_3 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& control_progress ) ;
OBJ_RELEASE ( new_enum ) ;
ret = mca_base_var_enum_create ( " data_prog_type " , data_prog_type , & new_enum ) ;
if ( OPAL_SUCCESS ! = ret ) {
return ret ;
}
2017-11-08 18:58:24 +03:00
data_progress = MTL_OFI_PROG_UNSPEC ;
2016-11-23 02:03:20 +03:00
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" data_progress " ,
2017-11-08 18:58:24 +03:00
" Specify data progress model (default: unspecified, use provider's default). Set to auto or manual for auto or manual progress respectively. " ,
2016-11-23 02:03:20 +03:00
MCA_BASE_VAR_TYPE_INT , new_enum , 0 , 0 ,
OPAL_INFO_LVL_3 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& data_progress ) ;
OBJ_RELEASE ( new_enum ) ;
ret = mca_base_var_enum_create ( " av_type " , av_table_type , & new_enum ) ;
if ( OPAL_SUCCESS ! = ret ) {
return ret ;
}
av_type = MTL_OFI_AV_MAP ;
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" av " ,
" Specify AV type to use (default: map). Set to table for FI_AV_TABLE AV type. " ,
MCA_BASE_VAR_TYPE_INT , new_enum , 0 , 0 ,
OPAL_INFO_LVL_3 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& av_type ) ;
OBJ_RELEASE ( new_enum ) ;
2018-12-26 22:29:10 +03:00
ompi_mtl_ofi . enable_sep = 0 ;
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" enable_sep " ,
" Enable SEP feature " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_3 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& ompi_mtl_ofi . enable_sep ) ;
2018-10-25 00:38:23 +03:00
ompi_mtl_ofi . thread_grouping = 0 ;
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" thread_grouping " ,
" Enable/Disable Thread Grouping feature " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_3 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& ompi_mtl_ofi . thread_grouping ) ;
2018-12-26 22:29:10 +03:00
/*
* Default Policy : Create 1 context and let user ask for more for
* multi - threaded workloads . User needs to ask for as many contexts as the
* number of threads that are anticipated to make MPI calls .
*/
ompi_mtl_ofi . num_ofi_contexts = 1 ;
mca_base_component_var_register ( & mca_mtl_ofi_component . super . mtl_version ,
" num_ctxts " ,
" Specify number of OFI contexts to create " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_4 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& ompi_mtl_ofi . num_ofi_contexts ) ;
2016-11-23 02:03:20 +03:00
return OMPI_SUCCESS ;
}
static int
ompi_mtl_ofi_component_open ( void )
{
ompi_mtl_ofi . base . mtl_request_size =
sizeof ( ompi_mtl_ofi_request_t ) - sizeof ( struct mca_mtl_request_t ) ;
ompi_mtl_ofi . domain = NULL ;
ompi_mtl_ofi . av = NULL ;
2018-10-25 00:38:23 +03:00
ompi_mtl_ofi . sep = NULL ;
2016-11-23 02:03:20 +03:00
/**
* Sanity check : provider_include and provider_exclude must be mutually
* exclusive
*/
if ( OMPI_SUCCESS ! =
mca_base_var_check_exclusive ( " ompi " ,
mca_mtl_ofi_component . super . mtl_version . mca_type_name ,
mca_mtl_ofi_component . super . mtl_version . mca_component_name ,
" provider_include " ,
mca_mtl_ofi_component . super . mtl_version . mca_type_name ,
mca_mtl_ofi_component . super . mtl_version . mca_component_name ,
" provider_exclude " ) ) {
return OMPI_ERR_NOT_AVAILABLE ;
}
return OMPI_SUCCESS ;
}
static int
ompi_mtl_ofi_component_query ( mca_base_module_t * * module , int * priority )
{
* priority = param_priority ;
* module = ( mca_base_module_t * ) & ompi_mtl_ofi . base ;
return OMPI_SUCCESS ;
}
static int
ompi_mtl_ofi_component_close ( void )
{
return OMPI_SUCCESS ;
}
int
ompi_mtl_ofi_progress_no_inline ( void )
{
return ompi_mtl_ofi_progress ( ) ;
}
static int
is_in_list ( char * * list , char * item )
{
int i = 0 ;
if ( ( NULL = = list ) | | ( NULL = = item ) ) {
return 0 ;
}
while ( NULL ! = list [ i ] ) {
2017-09-20 02:41:33 +03:00
if ( 0 = = strncmp ( item , list [ i ] , strlen ( list [ i ] ) ) ) {
2016-11-23 02:03:20 +03:00
return 1 ;
} else {
i + + ;
}
}
return 0 ;
}
static struct fi_info *
select_ofi_provider ( struct fi_info * providers )
{
char * * include_list = NULL ;
char * * exclude_list = NULL ;
struct fi_info * prov = providers ;
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi:provider_include = \" %s \" \n " ,
__FILE__ , __LINE__ , prov_include ) ;
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi:provider_exclude = \" %s \" \n " ,
__FILE__ , __LINE__ , prov_exclude ) ;
if ( NULL ! = prov_include ) {
include_list = opal_argv_split ( prov_include , ' , ' ) ;
while ( ( NULL ! = prov ) & &
( ! is_in_list ( include_list , prov - > fabric_attr - > prov_name ) ) ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi: \" %s \" not in include list \n " ,
__FILE__ , __LINE__ ,
prov - > fabric_attr - > prov_name ) ;
prov = prov - > next ;
}
} else if ( NULL ! = prov_exclude ) {
exclude_list = opal_argv_split ( prov_exclude , ' , ' ) ;
while ( ( NULL ! = prov ) & &
( is_in_list ( exclude_list , prov - > fabric_attr - > prov_name ) ) ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi: \" %s \" in exclude list \n " ,
__FILE__ , __LINE__ ,
prov - > fabric_attr - > prov_name ) ;
prov = prov - > next ;
}
}
opal_argv_free ( include_list ) ;
opal_argv_free ( exclude_list ) ;
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: mtl:ofi:prov: %s \n " ,
__FILE__ , __LINE__ ,
( prov ? prov - > fabric_attr - > prov_name : " none " ) ) ;
return prov ;
}
2018-04-03 02:40:47 +03:00
/* Check if FI_REMOTE_CQ_DATA is supported, if so send the source rank there
* FI_DIRECTED_RECV is also needed so receives can discrimate the source
*/
static int
ompi_mtl_ofi_check_fi_remote_cq_data ( int fi_version ,
struct fi_info * hints ,
struct fi_info * provider ,
struct fi_info * * prov_cq_data )
{
int ret ;
char * provider_name ;
struct fi_info * hints_dup ;
hints_dup = fi_dupinfo ( hints ) ;
provider_name = strdup ( provider - > fabric_attr - > prov_name ) ;
hints_dup - > fabric_attr - > prov_name = provider_name ;
hints_dup - > caps | = FI_TAGGED | FI_DIRECTED_RECV ;
/* Ask for the size that OMPI uses for the source rank number */
hints_dup - > domain_attr - > cq_data_size = sizeof ( int ) ;
ret = fi_getinfo ( fi_version , NULL , NULL , 0ULL , hints_dup , prov_cq_data ) ;
if ( ( 0 ! = ret ) & & ( - FI_ENODATA ! = ret ) ) {
opal_show_help ( " help-mtl-ofi.txt " , " OFI call fail " , true ,
" fi_getinfo " ,
ompi_process_info . nodename , __FILE__ , __LINE__ ,
fi_strerror ( - ret ) , - ret ) ;
return ret ;
} else if ( - FI_ENODATA = = ret ) {
/* The provider does not support FI_REMOTE_CQ_DATA */
prov_cq_data = NULL ;
}
fi_freeinfo ( hints_dup ) ;
return OMPI_SUCCESS ;
}
static void
2018-07-23 21:39:11 +03:00
ompi_mtl_ofi_define_tag_mode ( int ofi_tag_mode , int * bits_for_cid ) {
2018-04-03 02:40:47 +03:00
switch ( ofi_tag_mode ) {
case MTL_OFI_TAG_1 :
2018-07-23 21:39:11 +03:00
* bits_for_cid = ( int ) MTL_OFI_CID_BIT_COUNT_1 ;
2018-04-03 02:40:47 +03:00
ompi_mtl_ofi . base . mtl_max_tag = ( int ) ( ( 1ULL < < ( MTL_OFI_TAG_BIT_COUNT_1 - 1 ) ) - 1 ) ;
ompi_mtl_ofi . source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_1 ;
ompi_mtl_ofi . num_bits_source_rank = MTL_OFI_SOURCE_BIT_COUNT_1 ;
ompi_mtl_ofi . source_rank_mask = MTL_OFI_SOURCE_MASK_1 ;
ompi_mtl_ofi . mpi_tag_mask = MTL_OFI_TAG_MASK_1 ;
ompi_mtl_ofi . num_bits_mpi_tag = MTL_OFI_TAG_BIT_COUNT_1 ;
ompi_mtl_ofi . sync_send = MTL_OFI_SYNC_SEND_1 ;
ompi_mtl_ofi . sync_send_ack = MTL_OFI_SYNC_SEND_ACK_1 ;
ompi_mtl_ofi . sync_proto_mask = MTL_OFI_PROTO_MASK_1 ;
break ;
case MTL_OFI_TAG_2 :
2018-07-23 21:39:11 +03:00
* bits_for_cid = ( int ) MTL_OFI_CID_BIT_COUNT_2 ;
2018-04-03 02:40:47 +03:00
ompi_mtl_ofi . base . mtl_max_tag = ( int ) ( ( 1ULL < < ( MTL_OFI_TAG_BIT_COUNT_2 - 1 ) ) - 1 ) ;
ompi_mtl_ofi . source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_2 ;
ompi_mtl_ofi . num_bits_source_rank = MTL_OFI_SOURCE_BIT_COUNT_2 ;
ompi_mtl_ofi . source_rank_mask = MTL_OFI_SOURCE_MASK_2 ;
ompi_mtl_ofi . mpi_tag_mask = MTL_OFI_TAG_MASK_2 ;
ompi_mtl_ofi . num_bits_mpi_tag = MTL_OFI_TAG_BIT_COUNT_2 ;
ompi_mtl_ofi . sync_send = MTL_OFI_SYNC_SEND_2 ;
ompi_mtl_ofi . sync_send_ack = MTL_OFI_SYNC_SEND_ACK_2 ;
ompi_mtl_ofi . sync_proto_mask = MTL_OFI_PROTO_MASK_2 ;
break ;
default : /* use FI_REMOTE_CQ_DATA */
2018-07-23 21:39:11 +03:00
* bits_for_cid = ( int ) MTL_OFI_CID_BIT_COUNT_DATA ;
2018-04-03 02:40:47 +03:00
ompi_mtl_ofi . base . mtl_max_tag = ( int ) ( ( 1ULL < < ( MTL_OFI_TAG_BIT_COUNT_DATA - 1 ) ) - 1 ) ;
ompi_mtl_ofi . mpi_tag_mask = MTL_OFI_TAG_MASK_DATA ;
ompi_mtl_ofi . sync_send = MTL_OFI_SYNC_SEND_DATA ;
ompi_mtl_ofi . sync_send_ack = MTL_OFI_SYNC_SEND_ACK_DATA ;
ompi_mtl_ofi . sync_proto_mask = MTL_OFI_PROTO_MASK_DATA ;
}
}
2018-12-26 22:29:10 +03:00
# define MTL_OFI_ALLOC_COMM_TO_CONTEXT(arr_size) \
2018-10-25 00:38:23 +03:00
do { \
2018-12-26 22:29:10 +03:00
ompi_mtl_ofi . comm_to_context = calloc ( arr_size , sizeof ( int ) ) ; \
2018-10-25 00:38:23 +03:00
if ( OPAL_UNLIKELY ( ! ompi_mtl_ofi . comm_to_context ) ) { \
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output , \
" %s:%d: alloc of comm_to_context array failed: %s \n " , \
__FILE__ , __LINE__ , strerror ( errno ) ) ; \
return ret ; \
} \
} while ( 0 ) ;
# define MTL_OFI_ALLOC_OFI_CTXTS() \
do { \
2018-12-26 22:29:10 +03:00
ompi_mtl_ofi . ofi_ctxt = ( mca_mtl_ofi_context_t * ) malloc ( ompi_mtl_ofi . num_ofi_contexts * \
2018-10-25 00:38:23 +03:00
sizeof ( mca_mtl_ofi_context_t ) ) ; \
if ( OPAL_UNLIKELY ( ! ompi_mtl_ofi . ofi_ctxt ) ) { \
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output , \
" %s:%d: alloc of ofi_ctxt array failed: %s \n " , \
__FILE__ , __LINE__ , strerror ( errno ) ) ; \
return ret ; \
} \
} while ( 0 ) ;
2019-01-24 22:44:53 +03:00
static int ompi_mtl_ofi_init_sep ( struct fi_info * prov , int universe_size )
2018-10-25 00:38:23 +03:00
{
int ret = OMPI_SUCCESS , num_ofi_ctxts ;
struct fi_av_attr av_attr = { 0 } ;
prov - > ep_attr - > tx_ctx_cnt = prov - > ep_attr - > rx_ctx_cnt =
2018-12-26 22:29:10 +03:00
ompi_mtl_ofi . num_ofi_contexts ;
2018-10-25 00:38:23 +03:00
ret = fi_scalable_ep ( ompi_mtl_ofi . domain , prov , & ompi_mtl_ofi . sep , NULL ) ;
if ( 0 ! = ret ) {
opal_show_help ( " help-mtl-ofi.txt " , " OFI call fail " , true ,
" fi_scalable_ep " ,
ompi_process_info . nodename , __FILE__ , __LINE__ ,
fi_strerror ( - ret ) , - ret ) ;
return ret ;
}
ompi_mtl_ofi . rx_ctx_bits = 0 ;
2018-12-26 22:29:10 +03:00
while ( ompi_mtl_ofi . num_ofi_contexts > > + + ompi_mtl_ofi . rx_ctx_bits ) ;
2018-10-25 00:38:23 +03:00
av_attr . type = ( MTL_OFI_AV_TABLE = = av_type ) ? FI_AV_TABLE : FI_AV_MAP ;
av_attr . rx_ctx_bits = ompi_mtl_ofi . rx_ctx_bits ;
2019-01-24 22:44:53 +03:00
av_attr . count = ompi_mtl_ofi . num_ofi_contexts * universe_size ;
2018-10-25 00:38:23 +03:00
ret = fi_av_open ( ompi_mtl_ofi . domain , & av_attr , & ompi_mtl_ofi . av , NULL ) ;
if ( 0 ! = ret ) {
MTL_OFI_LOG_FI_ERR ( ret , " fi_av_open failed " ) ;
return ret ;
}
ret = fi_scalable_ep_bind ( ompi_mtl_ofi . sep , ( fid_t ) ompi_mtl_ofi . av , 0 ) ;
if ( 0 ! = ret ) {
MTL_OFI_LOG_FI_ERR ( ret , " fi_bind AV-EP failed " ) ;
return ret ;
}
/*
* If SEP supported and Thread Grouping feature enabled , use
2018-12-26 22:29:10 +03:00
* num_ofi_contexts + 2. Extra 2 items is to accomodate Open MPI contextid
2018-10-25 00:38:23 +03:00
* numbering - COMM_WORLD is 0 , COMM_SELF is 1. Other user created
* Comm contextid values are assigned sequentially starting with 3.
*/
num_ofi_ctxts = ompi_mtl_ofi . thread_grouping ?
2018-12-26 22:29:10 +03:00
ompi_mtl_ofi . num_ofi_contexts + 2 : 1 ;
2018-10-25 00:38:23 +03:00
MTL_OFI_ALLOC_COMM_TO_CONTEXT ( num_ofi_ctxts ) ;
ompi_mtl_ofi . total_ctxts_used = 0 ;
ompi_mtl_ofi . threshold_comm_context_id = 0 ;
/* Allocate memory for OFI contexts */
MTL_OFI_ALLOC_OFI_CTXTS ( ) ;
return ret ;
}
2019-01-24 22:44:53 +03:00
static int ompi_mtl_ofi_init_regular_ep ( struct fi_info * prov , int universe_size )
2018-10-25 00:38:23 +03:00
{
2018-12-26 22:29:10 +03:00
int ret = OMPI_SUCCESS ;
2018-10-25 00:38:23 +03:00
struct fi_av_attr av_attr = { 0 } ;
struct fi_cq_attr cq_attr = { 0 } ;
cq_attr . format = FI_CQ_FORMAT_TAGGED ;
cq_attr . size = ompi_mtl_ofi . ofi_progress_event_count ;
2018-12-26 22:29:10 +03:00
/* Override any user defined setting */
ompi_mtl_ofi . num_ofi_contexts = 1 ;
2018-10-25 00:38:23 +03:00
ret = fi_endpoint ( ompi_mtl_ofi . domain , /* In: Domain object */
prov , /* In: Provider */
& ompi_mtl_ofi . sep , /* Out: Endpoint object */
NULL ) ; /* Optional context */
if ( 0 ! = ret ) {
opal_show_help ( " help-mtl-ofi.txt " , " OFI call fail " , true ,
" fi_endpoint " ,
ompi_process_info . nodename , __FILE__ , __LINE__ ,
fi_strerror ( - ret ) , - ret ) ;
return ret ;
}
/**
* Create the objects that will be bound to the endpoint .
* The objects include :
* - address vector and completion queues
*/
av_attr . type = ( MTL_OFI_AV_TABLE = = av_type ) ? FI_AV_TABLE : FI_AV_MAP ;
2019-01-24 22:44:53 +03:00
av_attr . count = universe_size ;
2018-10-25 00:38:23 +03:00
ret = fi_av_open ( ompi_mtl_ofi . domain , & av_attr , & ompi_mtl_ofi . av , NULL ) ;
if ( ret ) {
MTL_OFI_LOG_FI_ERR ( ret , " fi_av_open failed " ) ;
return ret ;
}
ret = fi_ep_bind ( ompi_mtl_ofi . sep ,
( fid_t ) ompi_mtl_ofi . av ,
0 ) ;
if ( 0 ! = ret ) {
MTL_OFI_LOG_FI_ERR ( ret , " fi_bind AV-EP failed " ) ;
return ret ;
}
2018-12-26 22:29:10 +03:00
MTL_OFI_ALLOC_COMM_TO_CONTEXT ( 1 ) ;
2018-10-25 00:38:23 +03:00
/* Allocate memory for OFI contexts */
MTL_OFI_ALLOC_OFI_CTXTS ( ) ;
ompi_mtl_ofi . ofi_ctxt [ 0 ] . tx_ep = ompi_mtl_ofi . sep ;
ompi_mtl_ofi . ofi_ctxt [ 0 ] . rx_ep = ompi_mtl_ofi . sep ;
ret = fi_cq_open ( ompi_mtl_ofi . domain , & cq_attr , & ompi_mtl_ofi . ofi_ctxt [ 0 ] . cq , NULL ) ;
if ( ret ) {
MTL_OFI_LOG_FI_ERR ( ret , " fi_cq_open failed " ) ;
return ret ;
}
/* Bind CQ to endpoint object */
ret = fi_ep_bind ( ompi_mtl_ofi . sep , ( fid_t ) ompi_mtl_ofi . ofi_ctxt [ 0 ] . cq ,
FI_TRANSMIT | FI_RECV | FI_SELECTIVE_COMPLETION ) ;
if ( 0 ! = ret ) {
MTL_OFI_LOG_FI_ERR ( ret , " fi_bind CQ-EP failed " ) ;
return ret ;
}
return ret ;
}
2016-11-23 02:03:20 +03:00
static mca_mtl_base_module_t *
ompi_mtl_ofi_component_init ( bool enable_progress_threads ,
bool enable_mpi_threads )
{
2018-12-26 22:29:10 +03:00
int ret , fi_version ;
int num_local_ranks , sep_support_in_provider , max_ofi_ctxts ;
2018-12-12 21:53:56 +03:00
int ofi_tag_leading_zeros , ofi_tag_bits_for_cid ;
2016-11-23 02:03:20 +03:00
struct fi_info * hints ;
2018-04-03 02:40:47 +03:00
struct fi_info * providers = NULL ;
struct fi_info * prov = NULL ;
struct fi_info * prov_cq_data = NULL ;
2016-11-23 02:03:20 +03:00
char ep_name [ FI_NAME_MAX ] = { 0 } ;
size_t namelen ;
2019-01-24 22:44:53 +03:00
int universe_size ;
char * univ_size_str ;
2016-11-23 02:03:20 +03:00
/**
* Hints to filter providers
* See man fi_getinfo for a list of all filters
* mode : Select capabilities MTL is prepared to support .
* In this case , MTL will pass in context into communication calls
* ep_type : reliable datagram operation
* caps : Capabilities required from the provider .
* Tag matching is specified to implement MPI semantics .
* msg_order : Guarantee that messages with same tag are ordered .
*/
hints = fi_allocinfo ( ) ;
if ( ! hints ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: Could not allocate fi_info \n " ,
__FILE__ , __LINE__ ) ;
goto error ;
}
hints - > mode = FI_CONTEXT ;
hints - > ep_attr - > type = FI_EP_RDM ; /* Reliable datagram */
hints - > caps = FI_TAGGED ; /* Tag matching interface */
hints - > tx_attr - > msg_order = FI_ORDER_SAS ;
hints - > rx_attr - > msg_order = FI_ORDER_SAS ;
2018-03-22 20:46:35 +03:00
hints - > rx_attr - > op_flags = FI_COMPLETION ;
hints - > tx_attr - > op_flags = FI_COMPLETION ;
2016-11-23 02:03:20 +03:00
2018-08-24 00:18:32 +03:00
if ( enable_mpi_threads ) {
2018-10-25 00:38:23 +03:00
ompi_mtl_ofi . mpi_thread_multiple = true ;
2018-08-24 00:18:32 +03:00
hints - > domain_attr - > threading = FI_THREAD_SAFE ;
} else {
2018-10-25 00:38:23 +03:00
ompi_mtl_ofi . mpi_thread_multiple = false ;
2018-08-24 00:18:32 +03:00
hints - > domain_attr - > threading = FI_THREAD_DOMAIN ;
}
2016-11-23 02:03:20 +03:00
2017-11-08 18:58:24 +03:00
switch ( control_progress ) {
case MTL_OFI_PROG_AUTO :
hints - > domain_attr - > control_progress = FI_PROGRESS_AUTO ;
break ;
case MTL_OFI_PROG_MANUAL :
2016-11-23 02:03:20 +03:00
hints - > domain_attr - > control_progress = FI_PROGRESS_MANUAL ;
2017-11-08 18:58:24 +03:00
break ;
default :
hints - > domain_attr - > control_progress = FI_PROGRESS_UNSPEC ;
2016-11-23 02:03:20 +03:00
}
2017-11-08 18:58:24 +03:00
switch ( data_progress ) {
case MTL_OFI_PROG_AUTO :
hints - > domain_attr - > data_progress = FI_PROGRESS_AUTO ;
break ;
case MTL_OFI_PROG_MANUAL :
2016-11-23 02:03:20 +03:00
hints - > domain_attr - > data_progress = FI_PROGRESS_MANUAL ;
2017-11-08 18:58:24 +03:00
break ;
default :
hints - > domain_attr - > data_progress = FI_PROGRESS_UNSPEC ;
2016-11-23 02:03:20 +03:00
}
if ( MTL_OFI_AV_TABLE = = av_type ) {
hints - > domain_attr - > av_type = FI_AV_TABLE ;
} else {
hints - > domain_attr - > av_type = FI_AV_MAP ;
}
hints - > domain_attr - > resource_mgmt = FI_RM_ENABLED ;
/**
* FI_VERSION provides binary backward and forward compatibility support
* Specify the version of OFI is coded to , the provider will select struct
* layouts that are compatible with this version .
*/
fi_version = FI_VERSION ( 1 , 0 ) ;
/**
* fi_getinfo : returns information about fabric services for reaching a
* remote node or service . this does not necessarily allocate resources .
* Pass NULL for name / service because we want a list of providers supported .
*/
ret = fi_getinfo ( fi_version , /* OFI version requested */
NULL , /* Optional name or fabric to resolve */
NULL , /* Optional service name or port to request */
0ULL , /* Optional flag */
2017-11-11 15:58:31 +03:00
hints , /* In: Hints to filter providers */
2016-11-23 02:03:20 +03:00
& providers ) ; /* Out: List of matching providers */
2017-11-11 16:01:15 +03:00
if ( FI_ENODATA = = - ret ) {
// It is not an error if no information is returned.
goto error ;
} else if ( 0 ! = ret ) {
2017-10-30 22:13:44 +03:00
opal_show_help ( " help-mtl-ofi.txt " , " OFI call fail " , true ,
" fi_getinfo " ,
ompi_process_info . nodename , __FILE__ , __LINE__ ,
2017-11-11 15:58:59 +03:00
fi_strerror ( - ret ) , - ret ) ;
2016-11-23 02:03:20 +03:00
goto error ;
}
/**
* Select a provider from the list returned by fi_getinfo ( ) .
*/
prov = select_ofi_provider ( providers ) ;
if ( ! prov ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: select_ofi_provider: no provider found \n " ,
__FILE__ , __LINE__ ) ;
goto error ;
}
2018-04-03 02:40:47 +03:00
/**
* Select the format of the OFI tag
*/
if ( ( MTL_OFI_TAG_AUTO = = ofi_tag_mode ) | |
( MTL_OFI_TAG_FULL = = ofi_tag_mode ) ) {
ret = ompi_mtl_ofi_check_fi_remote_cq_data ( fi_version ,
hints , prov ,
& prov_cq_data ) ;
if ( OMPI_SUCCESS ! = ret ) {
goto error ;
} else if ( NULL = = prov_cq_data ) {
/* No support for FI_REMTOTE_CQ_DATA */
fi_freeinfo ( prov_cq_data ) ;
ompi_mtl_ofi . fi_cq_data = false ;
if ( MTL_OFI_TAG_AUTO = = ofi_tag_mode ) {
/* Fallback to MTL_OFI_TAG_1 */
2018-07-23 21:39:11 +03:00
ompi_mtl_ofi_define_tag_mode ( MTL_OFI_TAG_1 , & ofi_tag_bits_for_cid ) ;
2018-04-03 02:40:47 +03:00
} else { /* MTL_OFI_TAG_FULL */
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: OFI provider %s does not support FI_REMOTE_CQ_DATA \n " ,
__FILE__ , __LINE__ , prov - > fabric_attr - > prov_name ) ;
goto error ;
}
} else {
/* Use FI_REMTOTE_CQ_DATA */
ompi_mtl_ofi . fi_cq_data = true ;
prov = prov_cq_data ;
2018-07-23 21:39:11 +03:00
ompi_mtl_ofi_define_tag_mode ( MTL_OFI_TAG_FULL , & ofi_tag_bits_for_cid ) ;
2018-04-03 02:40:47 +03:00
}
} else { /* MTL_OFI_TAG_1 or MTL_OFI_TAG_2 */
ompi_mtl_ofi . fi_cq_data = false ;
2018-07-23 21:39:11 +03:00
ompi_mtl_ofi_define_tag_mode ( ofi_tag_mode , & ofi_tag_bits_for_cid ) ;
2018-04-03 02:40:47 +03:00
}
2016-11-23 02:03:20 +03:00
2018-10-17 17:13:55 +03:00
/**
* Initialize the MTL OFI Symbol Tables & function pointers
* for specialized functions .
*/
ompi_mtl_ofi_send_symtable_init ( & ompi_mtl_ofi . sym_table ) ;
ompi_mtl_ofi . base . mtl_send =
ompi_mtl_ofi . sym_table . ompi_mtl_ofi_send [ ompi_mtl_ofi . fi_cq_data ] ;
ompi_mtl_ofi_isend_symtable_init ( & ompi_mtl_ofi . sym_table ) ;
ompi_mtl_ofi . base . mtl_isend =
ompi_mtl_ofi . sym_table . ompi_mtl_ofi_isend [ ompi_mtl_ofi . fi_cq_data ] ;
ompi_mtl_ofi_irecv_symtable_init ( & ompi_mtl_ofi . sym_table ) ;
ompi_mtl_ofi . base . mtl_irecv =
ompi_mtl_ofi . sym_table . ompi_mtl_ofi_irecv [ ompi_mtl_ofi . fi_cq_data ] ;
ompi_mtl_ofi_iprobe_symtable_init ( & ompi_mtl_ofi . sym_table ) ;
ompi_mtl_ofi . base . mtl_iprobe =
ompi_mtl_ofi . sym_table . ompi_mtl_ofi_iprobe [ ompi_mtl_ofi . fi_cq_data ] ;
ompi_mtl_ofi_improbe_symtable_init ( & ompi_mtl_ofi . sym_table ) ;
ompi_mtl_ofi . base . mtl_improbe =
ompi_mtl_ofi . sym_table . ompi_mtl_ofi_improbe [ ompi_mtl_ofi . fi_cq_data ] ;
2018-07-23 21:39:11 +03:00
/**
* Check for potential bits in the OFI tag that providers may be reserving
* for internal usage ( see mem_tag_format in fi_endpoint man page ) .
*/
ofi_tag_leading_zeros = 0 ;
while ( ! ( ( prov - > ep_attr - > mem_tag_format < < ofi_tag_leading_zeros + + ) &
( uint64_t ) MTL_OFI_HIGHEST_TAG_BIT ) & &
/* Do not keep looping if the provider does not support enough bits */
( ofi_tag_bits_for_cid > = MTL_OFI_MINIMUM_CID_BITS ) ) {
ofi_tag_bits_for_cid - - ;
}
if ( ofi_tag_bits_for_cid < MTL_OFI_MINIMUM_CID_BITS ) {
opal_show_help ( " help-mtl-ofi.txt " , " Not enough bits for CID " , true ,
prov - > fabric_attr - > prov_name ,
prov - > fabric_attr - > prov_name ,
ompi_process_info . nodename , __FILE__ , __LINE__ ) ;
goto error ;
}
/* Update the maximum supported Communicator ID */
ompi_mtl_ofi . base . mtl_max_contextid = ( int ) ( ( 1ULL < < ofi_tag_bits_for_cid ) - 1 ) ;
2018-11-02 00:03:00 +03:00
ompi_mtl_ofi . num_peers = 0 ;
2018-07-23 21:39:11 +03:00
2018-10-25 00:38:23 +03:00
/* Check if Scalable Endpoints can be enabled for the provider */
2018-12-26 22:29:10 +03:00
sep_support_in_provider = 0 ;
2018-10-25 00:38:23 +03:00
if ( ( prov - > domain_attr - > max_ep_tx_ctx > 1 ) | |
( prov - > domain_attr - > max_ep_rx_ctx > 1 ) ) {
2018-12-26 22:29:10 +03:00
sep_support_in_provider = 1 ;
2018-10-25 00:38:23 +03:00
}
2018-12-26 22:29:10 +03:00
if ( 1 = = ompi_mtl_ofi . enable_sep ) {
if ( 0 = = sep_support_in_provider ) {
opal_show_help ( " help-mtl-ofi.txt " , " SEP unavailable " , true ,
prov - > fabric_attr - > prov_name ,
ompi_process_info . nodename , __FILE__ , __LINE__ ) ;
goto error ;
} else if ( 1 = = sep_support_in_provider ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: Scalable EP supported in %s provider. Enabling in MTL. \n " ,
__FILE__ , __LINE__ , prov - > fabric_attr - > prov_name ) ;
}
} else {
/*
* Scalable Endpoints is required for Thread Grouping feature
*/
if ( 1 = = ompi_mtl_ofi . thread_grouping ) {
opal_show_help ( " help-mtl-ofi.txt " , " SEP required " , true ,
ompi_process_info . nodename , __FILE__ , __LINE__ ) ;
goto error ;
}
2018-10-25 00:38:23 +03:00
}
2016-11-23 02:03:20 +03:00
/**
* Open fabric
* The getinfo struct returns a fabric attribute struct that can be used to
* instantiate the virtual or physical network . This opens a " fabric
* provider " . See man fi_fabric for details.
*/
ret = fi_fabric ( prov - > fabric_attr , /* In: Fabric attributes */
& ompi_mtl_ofi . fabric , /* Out: Fabric handle */
NULL ) ; /* Optional context for fabric events */
if ( 0 ! = ret ) {
2017-10-30 22:13:44 +03:00
opal_show_help ( " help-mtl-ofi.txt " , " OFI call fail " , true ,
" fi_fabric " ,
ompi_process_info . nodename , __FILE__ , __LINE__ ,
2017-11-11 15:58:59 +03:00
fi_strerror ( - ret ) , - ret ) ;
2016-11-23 02:03:20 +03:00
goto error ;
}
/**
* Create the access domain , which is the physical or virtual network or
* hardware port / collection of ports . Returns a domain object that can be
* used to create endpoints . See man fi_domain for details .
*/
ret = fi_domain ( ompi_mtl_ofi . fabric , /* In: Fabric object */
prov , /* In: Provider */
& ompi_mtl_ofi . domain , /* Out: Domain oject */
NULL ) ; /* Optional context for domain events */
if ( 0 ! = ret ) {
2017-10-30 22:13:44 +03:00
opal_show_help ( " help-mtl-ofi.txt " , " OFI call fail " , true ,
" fi_domain " ,
ompi_process_info . nodename , __FILE__ , __LINE__ ,
2017-11-11 15:58:59 +03:00
fi_strerror ( - ret ) , - ret ) ;
2016-11-23 02:03:20 +03:00
goto error ;
}
/**
* Save the maximum inject size .
*/
ompi_mtl_ofi . max_inject_size = prov - > tx_attr - > inject_size ;
/**
2018-10-25 00:38:23 +03:00
* The user is not allowed to exceed MTL_OFI_MAX_PROG_EVENT_COUNT .
* The reason is because progress entries array is now a TLS variable
* as opposed to being allocated on the heap for thread - safety purposes .
2016-11-23 02:03:20 +03:00
*/
2018-10-25 00:38:23 +03:00
if ( ompi_mtl_ofi . ofi_progress_event_count > MTL_OFI_MAX_PROG_EVENT_COUNT ) {
ompi_mtl_ofi . ofi_progress_event_count = MTL_OFI_MAX_PROG_EVENT_COUNT ;
}
2018-02-13 20:41:38 +03:00
/**
2018-10-25 00:38:23 +03:00
* Create a transport level communication endpoint . To use the endpoint ,
* it must be bound to the resources consumed by it such as address
* vectors , completion counters or event queues etc , and enabled .
* See man fi_endpoint for more details .
2018-02-13 20:41:38 +03:00
*/
2019-01-24 22:44:53 +03:00
/* use the universe size as a rough guess on the address vector
* size hint that should be passed to fi_av_open ( ) . For regular
* endpoints , the count will be the universe size . For scalable
* endpoints , the count will be the universe size multiplied by
* the number of contexts . In either case , if the universe grows
* ( via dynamic processes ) , the count is a hint , not a hard limit ,
* so libfabric will just be slightly less efficient .
*/
univ_size_str = getenv ( " OMPI_UNIVERSE_SIZE " ) ;
if ( NULL = = univ_size_str | |
( universe_size = strtol ( univ_size_str , NULL , 0 ) ) < = 0 ) {
universe_size = ompi_proc_world_size ( ) ;
2018-12-12 21:53:56 +03:00
}
if ( 1 = = ompi_mtl_ofi . enable_sep ) {
2019-01-24 22:44:53 +03:00
max_ofi_ctxts = ( prov - > domain_attr - > max_ep_tx_ctx <
prov - > domain_attr - > max_ep_rx_ctx ) ?
prov - > domain_attr - > max_ep_tx_ctx :
prov - > domain_attr - > max_ep_rx_ctx ;
num_local_ranks = 1 + ompi_process_info . num_local_peers ;
if ( max_ofi_ctxts < = num_local_ranks ) {
opal_show_help ( " help-mtl-ofi.txt " , " Local ranks exceed ofi contexts " ,
true , prov - > fabric_attr - > prov_name ,
ompi_process_info . nodename , __FILE__ , __LINE__ ) ;
goto error ;
}
2018-12-12 21:53:56 +03:00
/* Provision enough contexts to service all ranks in a node */
2018-12-26 22:29:10 +03:00
max_ofi_ctxts / = num_local_ranks ;
/*
* If num ctxts user specified is more than max allowed , limit to max
* and start round - robining . Print warning to user .
*/
if ( max_ofi_ctxts < ompi_mtl_ofi . num_ofi_contexts ) {
opal_show_help ( " help-mtl-ofi.txt " , " Ctxts exceeded available " ,
true , max_ofi_ctxts ,
ompi_process_info . nodename , __FILE__ , __LINE__ ) ;
ompi_mtl_ofi . num_ofi_contexts = max_ofi_ctxts ;
}
2018-12-12 21:53:56 +03:00
2019-01-24 22:44:53 +03:00
ret = ompi_mtl_ofi_init_sep ( prov , universe_size ) ;
2018-10-25 00:38:23 +03:00
} else {
2019-01-24 22:44:53 +03:00
ret = ompi_mtl_ofi_init_regular_ep ( prov , universe_size ) ;
2016-11-23 02:03:20 +03:00
}
2018-10-25 00:38:23 +03:00
if ( OMPI_SUCCESS ! = ret ) {
2016-11-23 02:03:20 +03:00
goto error ;
}
2018-10-25 00:38:23 +03:00
ompi_mtl_ofi . total_ctxts_used = 0 ;
ompi_mtl_ofi . threshold_comm_context_id = 0 ;
2016-11-23 02:03:20 +03:00
2018-10-25 00:38:23 +03:00
/* Enable Endpoint for communication */
ret = fi_enable ( ompi_mtl_ofi . sep ) ;
2016-11-23 02:03:20 +03:00
if ( 0 ! = ret ) {
2018-10-25 00:38:23 +03:00
MTL_OFI_LOG_FI_ERR ( ret , " fi_enable failed " ) ;
2016-11-23 02:03:20 +03:00
goto error ;
}
2018-11-02 00:03:00 +03:00
ompi_mtl_ofi . provider_name = strdup ( prov - > fabric_attr - > prov_name ) ;
2016-11-23 02:03:20 +03:00
/**
* Free providers info since it ' s not needed anymore .
*/
fi_freeinfo ( hints ) ;
hints = NULL ;
fi_freeinfo ( providers ) ;
providers = NULL ;
/**
* Get our address and publish it with modex .
*/
namelen = sizeof ( ep_name ) ;
2018-10-25 00:38:23 +03:00
ret = fi_getname ( ( fid_t ) ompi_mtl_ofi . sep ,
& ep_name [ 0 ] ,
& namelen ) ;
2016-11-23 02:03:20 +03:00
if ( ret ) {
2018-10-25 00:38:23 +03:00
MTL_OFI_LOG_FI_ERR ( ret , " fi_getname failed " ) ;
2016-11-23 02:03:20 +03:00
goto error ;
}
OFI_COMPAT_MODEX_SEND ( ret ,
& mca_mtl_ofi_component . super . mtl_version ,
& ep_name ,
namelen ) ;
if ( OMPI_SUCCESS ! = ret ) {
opal_output_verbose ( 1 , ompi_mtl_base_framework . framework_output ,
" %s:%d: modex_send failed: %d \n " ,
__FILE__ , __LINE__ , ret ) ;
goto error ;
}
ompi_mtl_ofi . epnamelen = namelen ;
/**
* Set the ANY_SRC address .
*/
ompi_mtl_ofi . any_addr = FI_ADDR_UNSPEC ;
return & ompi_mtl_ofi . base ;
error :
if ( providers ) {
( void ) fi_freeinfo ( providers ) ;
}
2018-04-03 02:40:47 +03:00
if ( prov_cq_data ) {
( void ) fi_freeinfo ( prov_cq_data ) ;
}
2016-11-23 02:03:20 +03:00
if ( hints ) {
( void ) fi_freeinfo ( hints ) ;
}
2018-10-25 00:38:23 +03:00
if ( ompi_mtl_ofi . sep ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . sep ) ;
}
2016-11-23 02:03:20 +03:00
if ( ompi_mtl_ofi . av ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . av ) ;
}
2018-12-12 21:53:56 +03:00
if ( ( 0 = = ompi_mtl_ofi . enable_sep ) & &
2018-12-12 00:23:24 +03:00
ompi_mtl_ofi . ofi_ctxt ! = NULL & &
2018-10-25 00:38:23 +03:00
ompi_mtl_ofi . ofi_ctxt [ 0 ] . cq ) {
/* Check if CQ[0] was created for non-SEP case and close if needed */
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . ofi_ctxt [ 0 ] . cq ) ;
2016-11-23 02:03:20 +03:00
}
if ( ompi_mtl_ofi . domain ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . domain ) ;
}
if ( ompi_mtl_ofi . fabric ) {
( void ) fi_close ( ( fid_t ) ompi_mtl_ofi . fabric ) ;
}
2018-10-25 00:38:23 +03:00
if ( ompi_mtl_ofi . comm_to_context ) {
free ( ompi_mtl_ofi . comm_to_context ) ;
}
if ( ompi_mtl_ofi . ofi_ctxt ) {
free ( ompi_mtl_ofi . ofi_ctxt ) ;
}
2018-02-13 20:41:38 +03:00
2016-11-23 02:03:20 +03:00
return NULL ;
}
int
ompi_mtl_ofi_finalize ( struct mca_mtl_base_module_t * mtl )
{
2017-10-30 22:13:44 +03:00
ssize_t ret ;
2016-11-23 02:03:20 +03:00
opal_progress_unregister ( ompi_mtl_ofi_progress_no_inline ) ;
2017-10-30 22:13:44 +03:00
/* Close all the OFI objects */
2018-10-25 00:38:23 +03:00
if ( ( ret = fi_close ( ( fid_t ) ompi_mtl_ofi . sep ) ) ) {
2017-10-30 22:13:44 +03:00
goto finalize_err ;
}
2018-10-25 00:38:23 +03:00
if ( ( ret = fi_close ( ( fid_t ) ompi_mtl_ofi . av ) ) ) {
2017-10-30 22:13:44 +03:00
goto finalize_err ;
}
2018-12-12 21:53:56 +03:00
if ( 0 = = ompi_mtl_ofi . enable_sep ) {
2018-10-25 00:38:23 +03:00
/*
2018-12-12 21:53:56 +03:00
* CQ [ 0 ] is bound to SEP object Nwhen SEP is not supported by a
2018-10-25 00:38:23 +03:00
* provider . OFI spec requires that we close the Endpoint that is bound
* to the CQ before closing the CQ itself . So , for the non - SEP case , we
* handle the closing of CQ [ 0 ] here .
*/
if ( ( ret = fi_close ( ( fid_t ) ompi_mtl_ofi . ofi_ctxt [ 0 ] . cq ) ) ) {
goto finalize_err ;
}
2017-10-30 22:13:44 +03:00
}
2017-11-11 16:04:23 +03:00
if ( ( ret = fi_close ( ( fid_t ) ompi_mtl_ofi . domain ) ) ) {
2017-10-30 22:13:44 +03:00
goto finalize_err ;
}
2017-11-11 16:04:23 +03:00
if ( ( ret = fi_close ( ( fid_t ) ompi_mtl_ofi . fabric ) ) ) {
2017-10-30 22:13:44 +03:00
goto finalize_err ;
2016-11-23 02:03:20 +03:00
}
2018-10-25 00:38:23 +03:00
/* Free memory allocated for TX/RX contexts */
free ( ompi_mtl_ofi . comm_to_context ) ;
free ( ompi_mtl_ofi . ofi_ctxt ) ;
2016-11-23 02:03:20 +03:00
return OMPI_SUCCESS ;
2017-10-30 22:13:44 +03:00
finalize_err :
opal_show_help ( " help-mtl-ofi.txt " , " OFI call fail " , true ,
" fi_close " ,
ompi_process_info . nodename , __FILE__ , __LINE__ ,
2017-11-11 15:58:59 +03:00
fi_strerror ( - ret ) , - ret ) ;
2017-10-30 22:13:44 +03:00
return OMPI_ERROR ;
2016-11-23 02:03:20 +03:00
}