/* * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #define _GNU_SOURCE #include #include #include #include "oshmem_config.h" #include "orte/util/show_help.h" #include "shmem.h" #include "oshmem/runtime/params.h" #include "oshmem/mca/spml/spml.h" #include "oshmem/mca/spml/base/base.h" #include "spml_ikrit_component.h" #include "oshmem/mca/spml/ikrit/spml_ikrit.h" #include "orte/util/show_help.h" static int mca_spml_ikrit_component_register(void); static int mca_spml_ikrit_component_open(void); static int mca_spml_ikrit_component_close(void); static mca_spml_base_module_t* mca_spml_ikrit_component_init(int* priority, bool enable_progress_threads, bool enable_mpi_threads); static int mca_spml_ikrit_component_fini(void); mca_spml_base_component_2_0_0_t mca_spml_ikrit_component = { /* First, the mca_base_component_t struct containing meta information about the component itself */ { MCA_SPML_BASE_VERSION_2_0_0, "ikrit", /* MCA component name */ OSHMEM_MAJOR_VERSION, /* MCA component major version */ OSHMEM_MINOR_VERSION, /* MCA component minor version */ OSHMEM_RELEASE_VERSION, /* MCA component release version */ mca_spml_ikrit_component_open, /* component open */ mca_spml_ikrit_component_close, /* component close */ NULL, mca_spml_ikrit_component_register }, { /* The component is checkpoint ready */ MCA_BASE_METADATA_PARAM_CHECKPOINT }, mca_spml_ikrit_component_init, /* component init */ mca_spml_ikrit_component_fini /* component finalize */ }; static inline int mca_spml_ikrit_param_register_int(const char* param_name, int default_value, const char *help_msg) { int param_value; param_value = default_value; (void) mca_base_component_var_register(&mca_spml_ikrit_component.spmlm_version, param_name, help_msg, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, ¶m_value); return param_value; } static int mca_spml_ikrit_component_register(void) { int np; mca_spml_ikrit.free_list_num = mca_spml_ikrit_param_register_int("free_list_num", 1024, 0); mca_spml_ikrit.free_list_max = mca_spml_ikrit_param_register_int("free_list_max", 1024, 0); mca_spml_ikrit.free_list_inc = mca_spml_ikrit_param_register_int("free_list_inc", 16, 0); mca_spml_ikrit.priority = mca_spml_ikrit_param_register_int("priority", 20, "[integer] ikrit priority"); mca_spml_ikrit.n_relays = mca_spml_ikrit_param_register_int("use_relays", -1, "[integer] First N ranks on host will receive and forward put messages to other ranks running on it. Can be used to as work around Sandy Bridge far socket problem"); np = mca_spml_ikrit_param_register_int("np", #if MXM_API > MXM_VERSION(1,5) 128, #else 0, #endif "[integer] Minimal allowed job's NP to activate ikrit"); if (oshmem_num_procs() < np) { SPML_VERBOSE(1, "Not enough ranks (%d<%d), disqualifying spml/ikrit", oshmem_num_procs(), np); return OSHMEM_ERR_NOT_AVAILABLE; } return OSHMEM_SUCCESS; } int spml_ikrit_progress(void) { mxm_error_t err; err = mxm_progress(mca_spml_ikrit.mxm_context); if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err)) { orte_show_help("help-shmem-spml-ikrit.txt", "errors during mxm_progress", true, mxm_error_string(err)); } return 1; } static int mca_spml_ikrit_component_open(void) { mxm_error_t err; unsigned long cur_ver; cur_ver = mxm_get_version(); if (cur_ver != MXM_API) { char *str; if (asprintf(&str, "SHMEM was compiled with MXM version %d.%d but " "version %ld.%ld detected.", MXM_VERNO_MAJOR, MXM_VERNO_MINOR, (cur_ver >> MXM_MAJOR_BIT) & 0xff, (cur_ver >> MXM_MINOR_BIT) & 0xff) > 0) { orte_show_help("help-shmem-spml-ikrit.txt", "mxm init", true, str); free(str); } return OSHMEM_ERROR; } #if MXM_API < MXM_VERSION(1,5) mxm_context_opts_t mxm_opts; mxm_fill_context_opts(&mxm_opts); /* only enable rmda and self ptls */ mxm_opts.ptl_bitmap = (MXM_BIT(MXM_PTL_SELF) | MXM_BIT(MXM_PTL_RDMA)); #else mxm_context_opts_t *mxm_opts; err = mxm_config_read_context_opts(&mxm_opts); if (MXM_OK != err) { SPML_ERROR("Failed to parse MXM configuration"); return OSHMEM_ERROR; } #if MXM_API < MXM_VERSION(2, 0) mxm_opts->ptl_bitmap = (MXM_BIT(MXM_PTL_SELF) | MXM_BIT(MXM_PTL_RDMA)); #endif #endif #if MXM_API < MXM_VERSION(1,5) err = mxm_init(&mxm_opts, &mca_spml_ikrit.mxm_context); #else err = mxm_init(mxm_opts, &mca_spml_ikrit.mxm_context); #if MXM_API < MXM_VERSION(2, 0) mxm_config_free(mxm_opts); #else mxm_config_free_context_opts(mxm_opts); #endif #endif if (MXM_OK != err) { if (MXM_ERR_NO_DEVICE == err) { SPML_VERBOSE(1, "No supported device found, disqualifying spml/ikrit"); } else { orte_show_help("help-shmem-spml-ikrit.txt", "mxm init", true, mxm_error_string(err)); } return OSHMEM_ERR_NOT_AVAILABLE; } err = mxm_mq_create(mca_spml_ikrit.mxm_context, MXM_SHMEM_MQ_ID, &mca_spml_ikrit.mxm_mq); if (MXM_OK != err) { orte_show_help("help-shmem-spml-ikrit.txt", "mxm mq create", true, mxm_error_string(err)); return OSHMEM_ERROR; } return OSHMEM_SUCCESS; } static int mca_spml_ikrit_component_close(void) { if (mca_spml_ikrit.mxm_context) mxm_cleanup(mca_spml_ikrit.mxm_context); mca_spml_ikrit.mxm_context = NULL; return OSHMEM_SUCCESS; } static int spml_ikrit_mxm_init(void) { mxm_error_t err; mxm_ep_opts_t *p_ep_opts; #if MXM_API < MXM_VERSION(1,5) mxm_ep_opts_t ep_opt; struct sockaddr_mxm_local_proc sa_bind_self; struct sockaddr_mxm_ib_local sa_bind_rdma; p_ep_opts = &ep_opt; /* Setup the endpoint options and local addresses to bind to. */ mxm_fill_ep_opts(&ep_opt); sa_bind_self.sa_family = AF_MXM_LOCAL_PROC; sa_bind_self.context_id = 0; sa_bind_self.process_id = oshmem_proc_local()->proc_name.vpid; sa_bind_rdma.sa_family = AF_MXM_IB_LOCAL; sa_bind_rdma.lid = 0; sa_bind_rdma.pkey = 0; sa_bind_rdma.qp_num = 0; sa_bind_rdma.sl = 0; ep_opt.ptl_bind_addr[MXM_PTL_SELF] = (struct sockaddr*) &sa_bind_self; ep_opt.ptl_bind_addr[MXM_PTL_RDMA] = (struct sockaddr*) &sa_bind_rdma; #else err = mxm_config_read_ep_opts(&p_ep_opts); if (err != MXM_OK) { SPML_ERROR("Failed to parse MXM configuration"); return OSHMEM_ERROR; } #if MXM_API < MXM_VERSION(2,0) /* Only relevant for SHM PTL - ignore */ p_ep_opts->job_id = 0; p_ep_opts->local_rank = 0; p_ep_opts->num_local_procs = 0; p_ep_opts->rdma.drain_cq = 1; #endif #endif /* Open MXM endpoint */ err = mxm_ep_create(mca_spml_ikrit.mxm_context, p_ep_opts, &mca_spml_ikrit.mxm_ep); if (MXM_OK != err) { orte_show_help("help-shmem-spml-ikrit.txt", "unable to create endpoint", true, mxm_error_string(err)); return OSHMEM_ERROR; } #if MXM_API >= MXM_VERSION(1,5) #if MXM_API < MXM_VERSION(2,0) mxm_config_free(p_ep_opts); #else mxm_config_free_ep_opts(p_ep_opts); #endif #endif return OSHMEM_SUCCESS; } static mca_spml_base_module_t* mca_spml_ikrit_component_init(int* priority, bool enable_progress_threads, bool enable_mpi_threads) { SPML_VERBOSE( 10, "in ikrit, my priority is %d\n", mca_spml_ikrit.priority); if ((*priority) > mca_spml_ikrit.priority) { *priority = mca_spml_ikrit.priority; return NULL ; } *priority = mca_spml_ikrit.priority; if (OSHMEM_SUCCESS != spml_ikrit_mxm_init()) return NULL ; mca_spml_ikrit.n_active_puts = 0; mca_spml_ikrit.n_active_gets = 0; mca_spml_ikrit.n_mxm_fences = 0; SPML_VERBOSE(50, "*** ikrit initialized ****"); return &mca_spml_ikrit.super; } static int mca_spml_ikrit_component_fini(void) { opal_progress_unregister(spml_ikrit_progress); if (NULL != mca_spml_ikrit.mxm_ep) { mxm_ep_destroy(mca_spml_ikrit.mxm_ep); } if(!mca_spml_ikrit.enabled) return OSHMEM_SUCCESS; /* never selected.. return success.. */ mca_spml_ikrit.enabled = false; /* not anymore */ return OSHMEM_SUCCESS; }