2013-09-10 15:34:09 +00:00
/*
* Copyright ( c ) 2013 Mellanox Technologies , Inc .
* All rights reserved .
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# define _GNU_SOURCE
# include <stdio.h>
# include <sys/types.h>
# include <unistd.h>
# include "oshmem_config.h"
# include "orte/util/show_help.h"
# include "shmem.h"
# include "oshmem/runtime/params.h"
# include "oshmem/mca/spml/spml.h"
# include "oshmem/mca/spml/base/base.h"
# include "spml_ikrit_component.h"
# include "oshmem/mca/spml/ikrit/spml_ikrit.h"
# include "orte/util/show_help.h"
static int mca_spml_ikrit_component_register ( void ) ;
static int mca_spml_ikrit_component_open ( void ) ;
static int mca_spml_ikrit_component_close ( void ) ;
static mca_spml_base_module_t *
mca_spml_ikrit_component_init ( int * priority ,
bool enable_progress_threads ,
bool enable_mpi_threads ) ;
static int mca_spml_ikrit_component_fini ( void ) ;
mca_spml_base_component_2_0_0_t mca_spml_ikrit_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
MCA_SPML_BASE_VERSION_2_0_0 ,
" ikrit " , /* MCA component name */
OSHMEM_MAJOR_VERSION , /* MCA component major version */
OSHMEM_MINOR_VERSION , /* MCA component minor version */
OSHMEM_RELEASE_VERSION , /* MCA component release version */
mca_spml_ikrit_component_open , /* component open */
mca_spml_ikrit_component_close , /* component close */
NULL ,
mca_spml_ikrit_component_register
} ,
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
} ,
mca_spml_ikrit_component_init , /* component init */
mca_spml_ikrit_component_fini /* component finalize */
} ;
2014-02-24 07:06:57 +00:00
# if MXM_API >= MXM_VERSION(2,1)
static int check_mxm_tls ( char * var )
{
char * str ;
str = getenv ( var ) ;
if ( NULL = = str ) {
return OSHMEM_SUCCESS ;
}
if ( NULL ! = strstr ( str , " shm " ) ) {
if ( 0 < asprintf ( & str ,
" %s=%s " ,
var , getenv ( var )
) ) {
orte_show_help ( " help-shmem-spml-ikrit.txt " , " mxm tls " , true ,
str ) ;
free ( str ) ;
}
return OSHMEM_ERROR ;
}
2014-02-25 15:03:53 +00:00
if ( NULL = = strstr ( str , " rc " ) & & NULL = = strstr ( str , " dc " ) ) {
2014-02-24 07:06:57 +00:00
mca_spml_ikrit . ud_only = 1 ;
} else {
mca_spml_ikrit . ud_only = 0 ;
}
return OSHMEM_SUCCESS ;
}
static int set_mxm_tls ( )
{
char * tls ;
tls = getenv ( " MXM_OSHMEM_TLS " ) ;
if ( NULL ! = tls ) {
return check_mxm_tls ( " MXM_OSHMEM_TLS " ) ;
}
tls = getenv ( " MXM_TLS " ) ;
if ( NULL = = tls ) {
setenv ( " MXM_OSHMEM_TLS " , mca_spml_ikrit . mxm_tls , 1 ) ;
return OSHMEM_SUCCESS ;
}
return check_mxm_tls ( " MXM_TLS " ) ;
}
# endif
2013-09-10 15:34:09 +00:00
static inline int mca_spml_ikrit_param_register_int ( const char * param_name ,
int default_value ,
const char * help_msg )
{
int param_value ;
param_value = default_value ;
( void ) mca_base_component_var_register ( & mca_spml_ikrit_component . spmlm_version ,
param_name ,
help_msg ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_9 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& param_value ) ;
return param_value ;
}
2013-12-26 08:05:08 +00:00
static void mca_spml_ikrit_param_register_string ( const char * param_name ,
char * default_value ,
const char * help_msg ,
char * * storage )
{
* storage = default_value ;
( void ) mca_base_component_var_register ( & mca_spml_ikrit_component . spmlm_version ,
param_name ,
help_msg ,
MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
OPAL_INFO_LVL_9 ,
MCA_BASE_VAR_SCOPE_READONLY ,
storage ) ;
}
2013-09-10 15:34:09 +00:00
static int mca_spml_ikrit_component_register ( void )
{
int np ;
mca_spml_ikrit . free_list_num =
mca_spml_ikrit_param_register_int ( " free_list_num " , 1024 , 0 ) ;
mca_spml_ikrit . free_list_max =
mca_spml_ikrit_param_register_int ( " free_list_max " , 1024 , 0 ) ;
mca_spml_ikrit . free_list_inc =
mca_spml_ikrit_param_register_int ( " free_list_inc " , 16 , 0 ) ;
mca_spml_ikrit . priority =
mca_spml_ikrit_param_register_int ( " priority " ,
20 ,
" [integer] ikrit priority " ) ;
2013-12-26 08:05:08 +00:00
mca_spml_ikrit_param_register_string ( " mxm_tls " ,
2014-01-07 11:56:36 +00:00
" rc,ud,self " ,
2013-12-26 08:05:08 +00:00
" [string] TL channels for MXM " ,
& mca_spml_ikrit . mxm_tls ) ;
2013-09-10 15:34:09 +00:00
np = mca_spml_ikrit_param_register_int ( " np " ,
2013-12-04 09:13:17 +00:00
# if MXM_API <= MXM_VERSION(2,0)
2013-09-10 15:34:09 +00:00
128 ,
2013-10-06 12:43:47 +00:00
# else
0 ,
# endif
2013-09-10 15:34:09 +00:00
" [integer] Minimal allowed job's NP to activate ikrit " ) ;
2014-02-06 08:42:45 +00:00
# if MXM_API >= MXM_VERSION(2,0)
mca_spml_ikrit . unsync_conn_max =
mca_spml_ikrit_param_register_int ( " unsync_conn_max " ,
8 ,
" [integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier " ) ;
# endif
2013-09-10 15:34:09 +00:00
if ( oshmem_num_procs ( ) < np ) {
SPML_VERBOSE ( 1 ,
" Not enough ranks (%d<%d), disqualifying spml/ikrit " ,
oshmem_num_procs ( ) , np ) ;
return OSHMEM_ERR_NOT_AVAILABLE ;
}
return OSHMEM_SUCCESS ;
}
int spml_ikrit_progress ( void )
{
mxm_error_t err ;
err = mxm_progress ( mca_spml_ikrit . mxm_context ) ;
if ( ( MXM_OK ! = err ) & & ( MXM_ERR_NO_PROGRESS ! = err ) ) {
orte_show_help ( " help-shmem-spml-ikrit.txt " ,
" errors during mxm_progress " ,
true ,
mxm_error_string ( err ) ) ;
}
return 1 ;
}
static int mca_spml_ikrit_component_open ( void )
{
mxm_error_t err ;
unsigned long cur_ver ;
cur_ver = mxm_get_version ( ) ;
if ( cur_ver ! = MXM_API ) {
2014-02-18 19:44:37 +00:00
SPML_WARNING (
" OSHMEM was compiled with MXM version %d.%d but version %ld.%ld detected. " ,
MXM_VERNO_MAJOR , MXM_VERNO_MINOR ,
( cur_ver > > MXM_MAJOR_BIT ) & 0xff ,
( cur_ver > > MXM_MINOR_BIT ) & 0xff ) ;
2013-09-10 15:34:09 +00:00
}
2014-02-24 07:06:57 +00:00
mca_spml_ikrit . ud_only = 0 ;
2013-12-04 09:13:17 +00:00
# if MXM_API < MXM_VERSION(2,1)
if ( ( MXM_OK ! = mxm_config_read_context_opts ( & mca_spml_ikrit . mxm_ctx_opts ) ) | |
( MXM_OK ! = mxm_config_read_ep_opts ( & mca_spml_ikrit . mxm_ep_opts ) ) )
2013-09-10 15:34:09 +00:00
# else
2014-02-24 07:06:57 +00:00
if ( OSHMEM_SUCCESS ! = set_mxm_tls ( ) ) {
return OSHMEM_ERROR ;
}
2013-12-04 09:13:17 +00:00
if ( MXM_OK ! = mxm_config_read_opts ( & mca_spml_ikrit . mxm_ctx_opts ,
& mca_spml_ikrit . mxm_ep_opts ,
" OSHMEM " , NULL , 0 ) )
# endif
{
2013-09-10 15:34:09 +00:00
SPML_ERROR ( " Failed to parse MXM configuration " ) ;
return OSHMEM_ERROR ;
}
2013-12-04 09:13:17 +00:00
# if MXM_API < MXM_VERSION(2,0)
2014-02-24 07:06:57 +00:00
mca_spml_ikrit . ud_only = 1 ;
2013-12-04 09:13:17 +00:00
mca_spml_ikrit . mxm_ctx_opts - > ptl_bitmap = ( MXM_BIT ( MXM_PTL_SELF ) | MXM_BIT ( MXM_PTL_RDMA ) ) ;
2013-09-10 15:34:09 +00:00
# endif
2013-12-04 09:13:17 +00:00
err = mxm_init ( mca_spml_ikrit . mxm_ctx_opts , & mca_spml_ikrit . mxm_context ) ;
2013-09-10 15:34:09 +00:00
if ( MXM_OK ! = err ) {
if ( MXM_ERR_NO_DEVICE = = err ) {
SPML_VERBOSE ( 1 ,
" No supported device found, disqualifying spml/ikrit " ) ;
} else {
orte_show_help ( " help-shmem-spml-ikrit.txt " ,
" mxm init " ,
true ,
mxm_error_string ( err ) ) ;
}
return OSHMEM_ERR_NOT_AVAILABLE ;
}
err = mxm_mq_create ( mca_spml_ikrit . mxm_context ,
MXM_SHMEM_MQ_ID ,
& mca_spml_ikrit . mxm_mq ) ;
if ( MXM_OK ! = err ) {
orte_show_help ( " help-shmem-spml-ikrit.txt " ,
" mxm mq create " ,
true ,
mxm_error_string ( err ) ) ;
return OSHMEM_ERROR ;
}
return OSHMEM_SUCCESS ;
}
static int mca_spml_ikrit_component_close ( void )
{
2013-12-04 09:13:17 +00:00
if ( mca_spml_ikrit . mxm_context ) {
2013-09-10 15:34:09 +00:00
mxm_cleanup ( mca_spml_ikrit . mxm_context ) ;
2013-12-04 09:13:17 +00:00
# if MXM_API < MXM_VERSION(2,0)
mxm_config_free ( mca_spml_ikrit . mxm_ep_opts ) ;
mxm_config_free ( mca_spml_ikrit . mxm_ctx_opts ) ;
# else
mxm_config_free_ep_opts ( mca_spml_ikrit . mxm_ep_opts ) ;
mxm_config_free_context_opts ( mca_spml_ikrit . mxm_ctx_opts ) ;
# endif
}
2013-09-10 15:34:09 +00:00
mca_spml_ikrit . mxm_context = NULL ;
return OSHMEM_SUCCESS ;
}
static int spml_ikrit_mxm_init ( void )
{
mxm_error_t err ;
# if MXM_API < MXM_VERSION(2,0)
/* Only relevant for SHM PTL - ignore */
2013-12-04 09:13:17 +00:00
mca_spml_ikrit . mxm_ep_opts - > job_id = 0 ;
mca_spml_ikrit . mxm_ep_opts - > local_rank = 0 ;
mca_spml_ikrit . mxm_ep_opts - > num_local_procs = 0 ;
mca_spml_ikrit . mxm_ep_opts - > rdma . drain_cq = 1 ;
2013-09-10 15:34:09 +00:00
# endif
/* Open MXM endpoint */
err = mxm_ep_create ( mca_spml_ikrit . mxm_context ,
2013-12-04 09:13:17 +00:00
mca_spml_ikrit . mxm_ep_opts ,
2013-09-10 15:34:09 +00:00
& mca_spml_ikrit . mxm_ep ) ;
if ( MXM_OK ! = err ) {
orte_show_help ( " help-shmem-spml-ikrit.txt " ,
" unable to create endpoint " ,
true ,
mxm_error_string ( err ) ) ;
return OSHMEM_ERROR ;
}
return OSHMEM_SUCCESS ;
}
static mca_spml_base_module_t *
mca_spml_ikrit_component_init ( int * priority ,
bool enable_progress_threads ,
bool enable_mpi_threads )
{
SPML_VERBOSE ( 10 , " in ikrit, my priority is %d \n " , mca_spml_ikrit . priority ) ;
if ( ( * priority ) > mca_spml_ikrit . priority ) {
* priority = mca_spml_ikrit . priority ;
return NULL ;
}
* priority = mca_spml_ikrit . priority ;
if ( OSHMEM_SUCCESS ! = spml_ikrit_mxm_init ( ) )
return NULL ;
mca_spml_ikrit . n_active_puts = 0 ;
mca_spml_ikrit . n_active_gets = 0 ;
mca_spml_ikrit . n_mxm_fences = 0 ;
SPML_VERBOSE ( 50 , " *** ikrit initialized **** " ) ;
return & mca_spml_ikrit . super ;
}
static int mca_spml_ikrit_component_fini ( void )
{
opal_progress_unregister ( spml_ikrit_progress ) ;
if ( NULL ! = mca_spml_ikrit . mxm_ep ) {
mxm_ep_destroy ( mca_spml_ikrit . mxm_ep ) ;
}
2013-10-22 06:13:00 +00:00
if ( ! mca_spml_ikrit . enabled )
return OSHMEM_SUCCESS ; /* never selected.. return success.. */
mca_spml_ikrit . enabled = false ; /* not anymore */
2013-09-10 15:34:09 +00:00
return OSHMEM_SUCCESS ;
}