2011-12-09 21:24:07 +00:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
|
|
/*
|
2018-02-15 12:57:21 -07:00
|
|
|
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
|
2011-12-09 21:24:07 +00:00
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
2017-03-24 13:37:11 -06:00
|
|
|
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
2011-12-09 21:24:07 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "btl_ugni.h"
|
|
|
|
#include "btl_ugni_frag.h"
|
|
|
|
#include "btl_ugni_rdma.h"
|
2012-02-10 00:47:29 +00:00
|
|
|
#include "btl_ugni_smsg.h"
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
#include "opal/util/sys_limits.h"
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <fcntl.h>
|
2017-03-03 11:52:12 -07:00
|
|
|
#include <ctype.h>
|
2015-11-02 12:07:08 -07:00
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
#include "opal/memoryhooks/memory.h"
|
2014-08-08 18:02:46 +00:00
|
|
|
#include "opal/runtime/opal_params.h"
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2015-01-07 14:29:56 -07:00
|
|
|
#include "opal/mca/base/mca_base_pvar.h"
|
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
static int btl_ugni_component_register(void);
|
|
|
|
static int btl_ugni_component_open(void);
|
|
|
|
static int btl_ugni_component_close(void);
|
|
|
|
static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool);
|
|
|
|
static int mca_btl_ugni_component_progress(void);
|
2015-11-02 12:07:08 -07:00
|
|
|
static unsigned long mca_btl_ugni_ugni_page_size = 0;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
mca_btl_ugni_component_t mca_btl_ugni_component = {
|
2013-11-18 04:58:37 +00:00
|
|
|
.super = {
|
2011-12-09 21:24:07 +00:00
|
|
|
/* First, the mca_base_component_t struct containing meta information
|
|
|
|
about the component itself */
|
2013-11-18 04:58:37 +00:00
|
|
|
.btl_version = {
|
2014-07-10 16:31:15 +00:00
|
|
|
MCA_BTL_DEFAULT_VERSION("ugni"),
|
2013-11-18 04:58:37 +00:00
|
|
|
.mca_open_component = btl_ugni_component_open,
|
|
|
|
.mca_close_component = btl_ugni_component_close,
|
|
|
|
.mca_register_component_params = btl_ugni_component_register,
|
2011-12-09 21:24:07 +00:00
|
|
|
},
|
2013-11-18 04:58:37 +00:00
|
|
|
.btl_data = {
|
2014-07-10 16:31:15 +00:00
|
|
|
.param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT
|
2011-12-09 21:24:07 +00:00
|
|
|
},
|
2013-11-18 04:58:37 +00:00
|
|
|
.btl_init = mca_btl_ugni_component_init,
|
|
|
|
.btl_progress = mca_btl_ugni_component_progress,
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
mca_base_var_enum_value_t rcache_values[] = {
|
|
|
|
{MCA_BTL_UGNI_RCACHE_UDREG, "udreg"},
|
|
|
|
{MCA_BTL_UGNI_RCACHE_GRDMA, "grdma"},
|
2013-11-18 04:58:37 +00:00
|
|
|
{-1, NULL} /* sentinal */
|
|
|
|
};
|
|
|
|
|
2017-03-03 11:52:12 -07:00
|
|
|
mca_base_var_enum_value_flag_t cdm_flags[] = {
|
|
|
|
{.flag = GNI_CDM_MODE_FORK_NOCOPY, .string = "fork-no-copy", .conflicting_flag = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_FORK_PARTCOPY},
|
|
|
|
{.flag = GNI_CDM_MODE_FORK_FULLCOPY, .string = "fork-full-copy", .conflicting_flag = GNI_CDM_MODE_FORK_NOCOPY | GNI_CDM_MODE_FORK_PARTCOPY},
|
|
|
|
{.flag = GNI_CDM_MODE_FORK_PARTCOPY, .string = "fork-part-copy", .conflicting_flag = GNI_CDM_MODE_FORK_NOCOPY | GNI_CDM_MODE_FORK_FULLCOPY},
|
|
|
|
{.flag = GNI_CDM_MODE_ERR_NO_KILL, .string = "err-no-kill", .conflicting_flag = GNI_CDM_MODE_ERR_ALL_KILL},
|
|
|
|
{.flag = GNI_CDM_MODE_ERR_ALL_KILL, .string = "err-all-kill", .conflicting_flag = GNI_CDM_MODE_ERR_NO_KILL},
|
|
|
|
{.flag = GNI_CDM_MODE_FAST_DATAGRAM_POLL, .string = "fast-datagram-poll", .conflicting_flag = 0},
|
|
|
|
{.flag = GNI_CDM_MODE_BTE_SINGLE_CHANNEL, .string = "bte-single-channel", .conflicting_flag = 0},
|
|
|
|
{.flag = GNI_CDM_MODE_USE_PCI_IOMMU, .string = "use-pci-iommu", .conflicting_flag = 0},
|
|
|
|
{.flag = GNI_CDM_MODE_MDD_DEDICATED, .string = "mdd-dedicated", .conflicting_flag = GNI_CDM_MODE_MDD_SHARED},
|
|
|
|
{.flag = GNI_CDM_MODE_MDD_SHARED, .string = "mdd-shared", .conflicting_flag = GNI_CDM_MODE_MDD_DEDICATED},
|
|
|
|
{.flag = GNI_CDM_MODE_FMA_DEDICATED, .string = "fma-dedicated", .conflicting_flag = GNI_CDM_MODE_FMA_SHARED},
|
|
|
|
{.flag = GNI_CDM_MODE_FMA_SHARED, .string = "fma-shared", .conflicting_flag = GNI_CDM_MODE_FMA_DEDICATED},
|
|
|
|
{.flag = GNI_CDM_MODE_CACHED_AMO_ENABLED, .string = "cached-amo-enabled", .conflicting_flag = 0},
|
|
|
|
{.flag = GNI_CDM_MODE_CQ_NIC_LOCAL_PLACEMENT, .string = "cq-nic-placement", .conflicting_flag = 0},
|
|
|
|
{.flag = GNI_CDM_MODE_FMA_SMALL_WINDOW, .string = "fma-small-window", .conflicting_flag = 0},
|
2017-03-12 22:37:35 -06:00
|
|
|
{.string = NULL}
|
2017-03-03 11:52:12 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline int mca_btl_ugni_get_stat (const mca_base_pvar_t *pvar, void *value, void *obj)
|
|
|
|
{
|
|
|
|
gni_statistic_t statistic = (gni_statistic_t) (intptr_t) pvar->ctx;
|
|
|
|
gni_return_t rc = GNI_RC_SUCCESS;
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
|
|
|
rc = GNI_GetNicStat (mca_btl_ugni_component.modules[0].devices[i].dev_handle, statistic,
|
|
|
|
((unsigned int *) value) + i);
|
|
|
|
}
|
2017-03-03 11:52:12 -07:00
|
|
|
|
|
|
|
return mca_btl_rc_ugni_to_opal (rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int mca_btl_ugni_notify_stat (mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj, int *count)
|
|
|
|
{
|
|
|
|
if (MCA_BASE_PVAR_HANDLE_BIND == event) {
|
|
|
|
/* one value for each virtual device handle */
|
|
|
|
*count = mca_btl_ugni_component.virtual_device_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btl_ugni_component_register(void)
|
2011-12-09 21:24:07 +00:00
|
|
|
{
|
2013-11-18 04:58:37 +00:00
|
|
|
mca_base_var_enum_t *new_enum;
|
2015-01-05 16:03:15 -07:00
|
|
|
gni_nic_device_t device_type;
|
2015-11-02 12:07:08 -07:00
|
|
|
char *mpool_hints_tmp = NULL;
|
2013-11-18 04:58:37 +00:00
|
|
|
int rc;
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
|
2014-10-08 10:10:19 -06:00
|
|
|
"uGNI byte transport layer");
|
2013-03-27 21:09:41 +00:00
|
|
|
|
|
|
|
mca_btl_ugni_component.ugni_free_list_num = 8;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.ugni_free_list_num);
|
2014-10-08 10:10:19 -06:00
|
|
|
mca_btl_ugni_component.ugni_free_list_max = 4096;
|
2013-03-27 21:09:41 +00:00
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.ugni_free_list_max);
|
|
|
|
mca_btl_ugni_component.ugni_free_list_inc = 64;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.ugni_free_list_inc);
|
|
|
|
|
|
|
|
mca_btl_ugni_component.ugni_eager_num = 16;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
2013-11-18 04:58:37 +00:00
|
|
|
"eager_num", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
|
|
|
&mca_btl_ugni_component.ugni_eager_num);
|
2013-03-27 21:09:41 +00:00
|
|
|
mca_btl_ugni_component.ugni_eager_max = 128;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"eager_max", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.ugni_eager_max);
|
|
|
|
mca_btl_ugni_component.ugni_eager_inc = 16;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"eager_inc", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.ugni_eager_inc);
|
|
|
|
|
|
|
|
mca_btl_ugni_component.remote_cq_size = 40000;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"remote_cq_size", "Remote SMSG completion queue "
|
|
|
|
"size (default 40000)", MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.remote_cq_size);
|
2018-02-15 12:57:21 -07:00
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
mca_btl_ugni_component.local_cq_size = 8192;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
2018-02-15 12:57:21 -07:00
|
|
|
"local_cq_size", "Local SMSG completion queue size "
|
|
|
|
"(default 8k)", MCA_BASE_VAR_TYPE_INT,
|
2013-03-27 21:09:41 +00:00
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.local_cq_size);
|
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
mca_btl_ugni_component.local_rdma_cq_size = 1024;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"local_rdma_cq_size", "Local FMA/RDMA completion queue size "
|
|
|
|
"(default: 1024)",MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
|
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL,
|
|
|
|
&mca_btl_ugni_component.local_rdma_cq_size);
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
mca_btl_ugni_component.ugni_smsg_limit = 0;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"smsg_limit", "Maximum size message that "
|
|
|
|
"will be sent using the SMSG/MSGQ protocol "
|
|
|
|
"(0 - autoselect(default), 16k max)",
|
|
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.ugni_smsg_limit);
|
|
|
|
|
|
|
|
mca_btl_ugni_component.smsg_max_credits = 32;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"smsg_max_credits", "Maximum number of "
|
|
|
|
"outstanding SMSG/MSGQ message (default 32)",
|
|
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.smsg_max_credits);
|
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
#if OPAL_C_HAVE__THREAD_LOCAL
|
|
|
|
mca_btl_ugni_component.bind_threads_to_devices = true;
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
2018-02-15 12:57:21 -07:00
|
|
|
"bind_devices", "Bind threads to virtual "
|
|
|
|
"devices. In general this should improve "
|
|
|
|
"RDMA performance (default: true)",
|
|
|
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
|
2013-03-27 21:09:41 +00:00
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2018-02-15 12:57:21 -07:00
|
|
|
&mca_btl_ugni_component.bind_threads_to_devices);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
mca_btl_ugni_component.ugni_fma_limit = -1;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"fma_limit", "Default maximum size message that "
|
|
|
|
"will be sent using the FMA (Fast Memory "
|
|
|
|
"Access) protocol (default: -1 (don't use), 64k max)",
|
|
|
|
MCA_BASE_VAR_TYPE_LONG, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE | MCA_BASE_VAR_FLAG_DEPRECATED,
|
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.ugni_fma_limit);
|
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
mca_btl_ugni_component.ugni_fma_get_limit = 2048;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"fma_get_limit", "Maximum size message that "
|
|
|
|
"will be sent using the FMA (Fast Memory "
|
|
|
|
"Access) protocol for get (default 2k, "
|
|
|
|
"64k max)",
|
|
|
|
MCA_BASE_VAR_TYPE_LONG, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
|
|
|
&mca_btl_ugni_component.ugni_fma_get_limit);
|
|
|
|
|
|
|
|
mca_btl_ugni_component.ugni_fma_put_limit = 4096;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"fma_put_limit", "Maximum size message that "
|
|
|
|
"will be sent using the FMA (Fast Memory "
|
|
|
|
"Access) protocol for put (default: 4k, "
|
|
|
|
"64k max)",
|
|
|
|
MCA_BASE_VAR_TYPE_LONG, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
|
|
|
&mca_btl_ugni_component.ugni_fma_put_limit);
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
mca_btl_ugni_component.rdma_max_retries = 16;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.rdma_max_retries);
|
|
|
|
|
|
|
|
mca_btl_ugni_component.smsg_max_retries = 16;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.smsg_max_retries);
|
|
|
|
|
|
|
|
mca_btl_ugni_component.max_mem_reg = 0;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"max_mem_reg", "Maximum number of "
|
|
|
|
"memory registrations a process can "
|
|
|
|
"hold (0 - autoselect, -1 - unlimited)"
|
|
|
|
" (default 0)", MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
2013-11-18 04:58:37 +00:00
|
|
|
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
|
2013-03-27 21:09:41 +00:00
|
|
|
&mca_btl_ugni_component.max_mem_reg);
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
mca_btl_ugni_component.mbox_increment = 0;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"mbox_inc", "Number of SMSG mailboxes to "
|
|
|
|
"allocate in each block (0 - autoselect(default))",
|
|
|
|
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
/* communication domain flags */
|
|
|
|
rc = mca_base_var_enum_create_flag ("btl_ugni_cdm_flags", cdm_flags, (mca_base_var_enum_flag_t **) &new_enum);
|
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
mca_btl_ugni_component.cdm_flags = GNI_CDM_MODE_FORK_PARTCOPY | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
|
|
|
|
GNI_CDM_MODE_MDD_SHARED | GNI_CDM_MODE_FMA_SHARED | GNI_CDM_MODE_FMA_SMALL_WINDOW;
|
2018-02-15 12:57:21 -07:00
|
|
|
mca_btl_ugni_component.cdm_flags_id = mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
2017-03-12 22:37:35 -06:00
|
|
|
"cdm_flags", "Flags to set when creating a communication domain "
|
2018-02-15 12:57:21 -07:00
|
|
|
" (default: fork-full-copy,cached-amo-enabled,err-no-kill,fast-datagram-poll,"
|
2017-03-12 22:37:35 -06:00
|
|
|
"fma-shared,fma-small-window)",
|
|
|
|
MCA_BASE_VAR_TYPE_UNSIGNED_INT, new_enum, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.cdm_flags);
|
|
|
|
OBJ_RELEASE(new_enum);
|
|
|
|
|
|
|
|
mca_btl_ugni_component.virtual_device_count = 0;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"virtual_device_count", "Number of virtual devices to create. Higher numbers may "
|
2018-02-15 12:57:21 -07:00
|
|
|
"result in better performance when using threads. (default: 0 (auto), max: 128)",
|
2017-03-12 22:37:35 -06:00
|
|
|
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.virtual_device_count);
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
/* determine if there are get alignment restrictions */
|
|
|
|
GNI_GetDeviceType (&device_type);
|
|
|
|
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
mca_btl_ugni_component.smsg_page_size = 2 << 20;
|
2015-11-02 12:07:08 -07:00
|
|
|
if (GNI_DEVICE_GEMINI == device_type) {
|
|
|
|
if (access ("/sys/class/gemini/ghal0/mrt", R_OK)) {
|
|
|
|
int fd = open ("/sys/class/gemini/ghal0/mrt", O_RDONLY);
|
|
|
|
char buffer[10];
|
|
|
|
|
|
|
|
if (0 <= fd) {
|
|
|
|
memset (buffer, 0, sizeof (buffer));
|
|
|
|
read (fd, buffer, sizeof (buffer) - 1);
|
|
|
|
close (fd);
|
|
|
|
mca_btl_ugni_ugni_page_size = strtol (buffer, NULL, 10) * 1024;
|
|
|
|
mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
2017-03-12 22:37:35 -06:00
|
|
|
"smsg_page_size", "Page size to use for SMSG mailbox allocation (default: detect)",
|
|
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size);
|
2013-11-18 04:58:37 +00:00
|
|
|
|
2014-12-24 10:41:58 -07:00
|
|
|
mca_btl_ugni_component.progress_thread_requested = 0;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"request_progress_thread",
|
|
|
|
"Enable to request ugni btl progress thread - requires MPI_THREAD_MULTIPLE support",
|
|
|
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL,
|
|
|
|
&mca_btl_ugni_component.progress_thread_requested);
|
|
|
|
|
2015-01-07 14:29:56 -07:00
|
|
|
/* performance variables */
|
|
|
|
mca_btl_ugni_progress_thread_wakeups = 0;
|
|
|
|
(void) mca_base_component_pvar_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"progress_thread_wakeups", "Number of times the progress thread "
|
|
|
|
"has been woken", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
|
|
|
|
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
|
|
|
|
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL,
|
|
|
|
NULL, NULL, &mca_btl_ugni_progress_thread_wakeups);
|
|
|
|
|
2017-03-03 11:52:12 -07:00
|
|
|
/* register network statistics as performance variables */
|
|
|
|
for (int i = 0 ; i < GNI_NUM_STATS ; ++i) {
|
|
|
|
char name[128], desc[128];
|
|
|
|
size_t str_len = strlen (gni_statistic_str[i]);
|
|
|
|
|
|
|
|
assert (str_len < sizeof (name));
|
|
|
|
|
|
|
|
/* we can get an all-caps string for the variable from gni_statistic_str. need to make it lowercase
|
|
|
|
* to match ompi standards */
|
|
|
|
for (size_t j = 0 ; j < str_len ; ++j) {
|
|
|
|
name[j] = tolower (gni_statistic_str[i][j]);
|
|
|
|
desc[j] = ('_' == name[j]) ? ' ' : name[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
name[str_len] = '\0';
|
|
|
|
desc[str_len] = '\0';
|
|
|
|
|
|
|
|
(void) mca_base_component_pvar_register (&mca_btl_ugni_component.super.btl_version, name, desc,
|
|
|
|
OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER,
|
|
|
|
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
|
|
|
|
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
|
|
|
|
mca_btl_ugni_get_stat, NULL, mca_btl_ugni_notify_stat,
|
|
|
|
(void *) (intptr_t) i);
|
|
|
|
}
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
/* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource
|
2013-11-18 04:58:37 +00:00
|
|
|
* structures) */
|
2015-11-02 12:07:08 -07:00
|
|
|
rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum);
|
2013-11-18 04:58:37 +00:00
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
/* NTH: there are known *serious* performance issues with udreg. if they are ever resolved it is the preferred rcache */
|
|
|
|
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_GRDMA;
|
2013-11-18 04:58:37 +00:00
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
2017-03-12 22:37:35 -06:00
|
|
|
"rcache", "registration cache to use (default: grdma)", MCA_BASE_VAR_TYPE_INT, new_enum,
|
2013-11-18 04:58:37 +00:00
|
|
|
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
2015-11-02 12:07:08 -07:00
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type);
|
2013-11-18 04:58:37 +00:00
|
|
|
OBJ_RELEASE(new_enum);
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
if (mca_btl_ugni_ugni_page_size) {
|
|
|
|
rc = asprintf (&mpool_hints_tmp, "page_size=%lu", mca_btl_ugni_ugni_page_size);
|
|
|
|
if (rc < 0) {
|
|
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
|
|
|
|
|
|
|
mca_btl_ugni_component.mpool_hints = mpool_hints_tmp;
|
|
|
|
} else {
|
|
|
|
mca_btl_ugni_component.mpool_hints = "page_size=2M";
|
|
|
|
}
|
|
|
|
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
"mpool_hints", "hints to use when selecting a memory pool (default: "
|
|
|
|
"\"page_size=2M\")", MCA_BASE_VAR_TYPE_STRING, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_hints);
|
|
|
|
free (mpool_hints_tmp);
|
|
|
|
|
2015-09-29 15:40:35 -06:00
|
|
|
/* ensure we loose send exclusivity to sm and vader if they are enabled */
|
|
|
|
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
/* smsg threshold */
|
2012-02-10 00:47:29 +00:00
|
|
|
mca_btl_ugni_module.super.btl_eager_limit = 8 * 1024;
|
2011-12-09 21:24:07 +00:00
|
|
|
mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024;
|
2012-04-10 19:56:19 +00:00
|
|
|
mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024;
|
2012-02-10 00:47:29 +00:00
|
|
|
mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024;
|
|
|
|
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2015-01-05 16:03:15 -07:00
|
|
|
mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024;
|
|
|
|
|
2015-10-13 10:25:13 -05:00
|
|
|
/*
|
|
|
|
* see def. of ALIGNMENT_MASK to figure this one out
|
|
|
|
*/
|
2016-05-25 10:45:00 -06:00
|
|
|
/* both gemini and aries have a 4-byte alignment requirement on remote addresses */
|
|
|
|
mca_btl_ugni_module.super.btl_get_alignment = 4;
|
2015-01-05 16:03:15 -07:00
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
/* threshold for put */
|
2012-02-10 00:47:29 +00:00
|
|
|
mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
|
2015-01-05 14:35:00 -07:00
|
|
|
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS |
|
|
|
|
MCA_BTL_FLAGS_ATOMIC_FOPS;
|
|
|
|
mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD |
|
|
|
|
MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR |
|
|
|
|
MCA_BTL_ATOMIC_SUPPORTS_CSWAP;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2016-05-25 10:42:52 -06:00
|
|
|
if (GNI_DEVICE_ARIES == device_type) {
|
|
|
|
/* aries supports additional atomic operations */
|
|
|
|
mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX |
|
|
|
|
MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR |
|
|
|
|
MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT;
|
|
|
|
}
|
|
|
|
|
2015-01-05 16:03:15 -07:00
|
|
|
mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
|
2012-06-21 17:09:12 +00:00
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
|
|
|
|
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
|
|
|
|
|
2015-02-19 16:13:37 -07:00
|
|
|
mca_btl_ugni_module.super.btl_get_local_registration_threshold = 0;
|
2018-02-15 12:57:21 -07:00
|
|
|
mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_put_limit;
|
2015-02-19 16:13:37 -07:00
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
/* Call the BTL based to register its MCA params */
|
|
|
|
mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
|
|
|
|
&mca_btl_ugni_module.super);
|
2012-04-19 21:51:44 +00:00
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
btl_ugni_component_open(void)
|
|
|
|
{
|
|
|
|
mca_btl_ugni_component.ugni_num_btls = 0;
|
|
|
|
mca_btl_ugni_component.modules = NULL;
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* component cleanup - sanity checking of queue lengths
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
btl_ugni_component_close(void)
|
|
|
|
{
|
2017-03-12 22:37:35 -06:00
|
|
|
mca_btl_ugni_fini ();
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
free (mca_btl_ugni_component.modules);
|
|
|
|
mca_btl_ugni_component.modules = NULL;
|
2014-05-13 21:22:33 +00:00
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static mca_btl_base_module_t **
|
|
|
|
mca_btl_ugni_component_init (int *num_btl_modules,
|
|
|
|
bool enable_progress_threads,
|
|
|
|
bool enable_mpi_threads)
|
|
|
|
{
|
|
|
|
struct mca_btl_base_module_t **base_modules;
|
|
|
|
mca_btl_ugni_module_t *ugni_modules;
|
|
|
|
int rc;
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
if (16384 < mca_btl_ugni_component.ugni_smsg_limit) {
|
|
|
|
mca_btl_ugni_component.ugni_smsg_limit = 16384;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (65536 < mca_btl_ugni_component.ugni_fma_limit) {
|
|
|
|
mca_btl_ugni_component.ugni_fma_limit = 65536;
|
|
|
|
}
|
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
if (-1 != mca_btl_ugni_component.ugni_fma_limit) {
|
|
|
|
mca_btl_ugni_component.ugni_fma_get_limit = mca_btl_ugni_component.ugni_fma_limit;
|
|
|
|
} else if (65536 < mca_btl_ugni_component.ugni_fma_get_limit) {
|
|
|
|
mca_btl_ugni_component.ugni_fma_get_limit = 65536;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (-1 != mca_btl_ugni_component.ugni_fma_limit) {
|
|
|
|
mca_btl_ugni_component.ugni_fma_put_limit = mca_btl_ugni_component.ugni_fma_limit;
|
|
|
|
} else if (65536 < mca_btl_ugni_component.ugni_fma_put_limit) {
|
|
|
|
mca_btl_ugni_component.ugni_fma_put_limit = 65536;
|
|
|
|
}
|
|
|
|
|
|
|
|
mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_put_limit;
|
|
|
|
|
|
|
|
/* limit the number of outstanding RDMA operations over all devices */
|
|
|
|
mca_btl_ugni_component.active_rdma_threshold = mca_btl_ugni_component.local_rdma_cq_size;
|
2015-02-19 16:13:37 -07:00
|
|
|
|
2014-12-24 10:41:58 -07:00
|
|
|
if (enable_mpi_threads && mca_btl_ugni_component.progress_thread_requested) {
|
|
|
|
mca_btl_ugni_component.progress_thread_enabled = 1;
|
2014-12-18 10:23:14 -07:00
|
|
|
}
|
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
/* Initialize ugni library and create communication domain */
|
2017-03-12 22:37:35 -06:00
|
|
|
rc = mca_btl_ugni_init();
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_SUCCESS != rc) {
|
2011-12-09 21:24:07 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
/* For now only create a single BTL module */
|
|
|
|
mca_btl_ugni_component.ugni_num_btls = 1;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
BTL_VERBOSE(("btl/ugni initializing"));
|
|
|
|
|
|
|
|
ugni_modules = mca_btl_ugni_component.modules = (mca_btl_ugni_module_t *)
|
2017-03-12 22:37:35 -06:00
|
|
|
calloc (mca_btl_ugni_component.ugni_num_btls, sizeof (mca_btl_ugni_module_t));
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
if (OPAL_UNLIKELY(NULL == mca_btl_ugni_component.modules)) {
|
|
|
|
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
base_modules = (struct mca_btl_base_module_t **)
|
|
|
|
calloc (mca_btl_ugni_component.ugni_num_btls,
|
|
|
|
sizeof (struct mca_btl_base_module_t *));
|
|
|
|
if (OPAL_UNLIKELY(NULL == base_modules)) {
|
|
|
|
BTL_ERROR(("Malloc failed : %s:%d", __FILE__, __LINE__));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
if (mca_btl_ugni_component.smsg_page_size != (unsigned long) opal_getpagesize ()) {
|
|
|
|
if (mca_btl_ugni_ugni_page_size > mca_btl_ugni_component.smsg_page_size) {
|
|
|
|
mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-10 00:47:29 +00:00
|
|
|
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
rc = mca_btl_ugni_module_init (ugni_modules);
|
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
|
|
|
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
|
|
|
|
__LINE__));
|
|
|
|
return NULL;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
*base_modules = (mca_btl_base_module_t *) ugni_modules;
|
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
BTL_VERBOSE(("btl/ugni done initializing %d module(s)", *num_btl_modules));
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
return base_modules;
|
|
|
|
}
|
|
|
|
|
2017-03-14 10:10:05 -06:00
|
|
|
int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device)
|
2011-12-09 21:24:07 +00:00
|
|
|
{
|
2017-03-14 10:10:05 -06:00
|
|
|
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
|
2018-06-13 12:35:08 -05:00
|
|
|
mca_btl_base_endpoint_t *ep = NULL;
|
2012-02-10 00:47:29 +00:00
|
|
|
gni_ep_handle_t handle;
|
2013-11-18 04:58:37 +00:00
|
|
|
int count = 0, rc;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
rc = mca_btl_ugni_get_datagram (ugni_module, device, &handle, &ep);
|
|
|
|
if (1 != rc) {
|
|
|
|
return rc;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 13:37:11 -06:00
|
|
|
BTL_VERBOSE(("remote datagram completion on handle %p", (void*)handle));
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
|
|
|
|
if (handle == ugni_module->wildcard_ep) {
|
2017-03-12 22:37:35 -06:00
|
|
|
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
|
|
|
|
|
|
|
|
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc: %s",
|
|
|
|
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
|
|
|
|
|
|
|
|
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
|
|
|
|
if (OPAL_UNLIKELY(NULL == ep)) {
|
|
|
|
/* there is no way to recover from this error so just abort() */
|
|
|
|
BTL_ERROR(("could not find/allocate a btl endpoint for peer %s",
|
|
|
|
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
|
|
|
|
abort ();
|
|
|
|
return OPAL_ERR_NOT_FOUND;
|
2013-11-18 04:58:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* should not have gotten a NULL endpoint */
|
|
|
|
assert (NULL != ep);
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
BTL_VERBOSE(("got a datagram completion: ep = %p. wc = %d", (void *) ep, handle == ugni_module->wildcard_ep));
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
/* NTH: TODO -- error handling */
|
2016-08-04 16:08:01 -06:00
|
|
|
opal_mutex_lock (&ep->lock);
|
|
|
|
if (handle != ugni_module->wildcard_ep) {
|
|
|
|
/* directed post complete */
|
2017-03-12 22:37:35 -06:00
|
|
|
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
|
|
|
|
|
2016-08-04 16:08:01 -06:00
|
|
|
ep->dg_posted = false;
|
2017-11-29 14:55:46 -07:00
|
|
|
(void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, -1);
|
2016-08-04 16:08:01 -06:00
|
|
|
}
|
|
|
|
|
2011-12-09 21:24:07 +00:00
|
|
|
(void) mca_btl_ugni_ep_connect_progress (ep);
|
2016-08-04 16:08:01 -06:00
|
|
|
opal_mutex_unlock (&ep->lock);
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2012-04-19 21:51:55 +00:00
|
|
|
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
|
2011-12-09 21:24:07 +00:00
|
|
|
/* process messages waiting in the endpoint's smsg mailbox */
|
2012-02-10 00:47:29 +00:00
|
|
|
count = mca_btl_ugni_smsg_process (ep);
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
2012-05-31 20:02:41 +00:00
|
|
|
/* repost the wildcard datagram */
|
2013-11-18 04:58:37 +00:00
|
|
|
if (handle == ugni_module->wildcard_ep) {
|
2012-05-31 20:02:41 +00:00
|
|
|
mca_btl_ugni_wildcard_ep_post (ugni_module);
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
void mca_btl_ugni_handle_rdma_completions (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
|
|
|
|
struct mca_btl_ugni_post_descriptor_t *post_desc, const int count)
|
2015-01-05 16:03:15 -07:00
|
|
|
{
|
2018-02-15 12:57:21 -07:00
|
|
|
int bte_complete = 0;
|
2015-01-05 16:03:15 -07:00
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
for (int i = 0 ; i < count ; ++i) {
|
2018-07-01 06:26:38 -07:00
|
|
|
BTL_VERBOSE(("post descriptor complete. status: %d", post_desc[i].rc));
|
2015-01-05 16:03:15 -07:00
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != post_desc[i].rc)) {
|
|
|
|
/* dump the post descriptor if in a debug build */
|
|
|
|
btl_ugni_dump_post_desc (post_desc + i);
|
|
|
|
}
|
2015-01-05 16:03:15 -07:00
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
bte_complete += post_desc[i].use_bte == true;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
mca_btl_ugni_post_desc_complete (ugni_module, post_desc + i, post_desc[i].rc);
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
if (bte_complete > 0) {
|
|
|
|
(void) OPAL_THREAD_FETCH_ADD32 (&ugni_module->active_rdma_count, -bte_complete);
|
|
|
|
}
|
2017-03-12 22:37:35 -06:00
|
|
|
}
|
2014-10-08 10:10:19 -06:00
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
|
|
|
|
mca_btl_ugni_cq_t *cq)
|
|
|
|
{
|
2018-02-15 12:57:21 -07:00
|
|
|
mca_btl_ugni_post_descriptor_t post_desc[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
|
2017-03-12 22:37:35 -06:00
|
|
|
int rc;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
rc = mca_btl_ugni_cq_get_completed_desc (device, cq, post_desc, MCA_BTL_UGNI_COMPLETIONS_PER_LOOP);
|
2017-03-12 22:37:35 -06:00
|
|
|
if (0 >= rc) {
|
|
|
|
return rc;
|
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
BTL_VERBOSE(("got %d completed rdma descriptors", rc));
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2018-02-15 12:57:21 -07:00
|
|
|
mca_btl_ugni_handle_rdma_completions (ugni_module, device, post_desc, rc);
|
2017-03-12 22:37:35 -06:00
|
|
|
|
|
|
|
return rc;
|
2012-05-31 20:02:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
|
|
|
|
{
|
2014-10-08 10:10:19 -06:00
|
|
|
int rc = OPAL_SUCCESS;
|
|
|
|
mca_btl_base_endpoint_t *endpoint = NULL;
|
|
|
|
int count;
|
2012-05-31 20:02:41 +00:00
|
|
|
|
2016-05-25 14:27:34 -06:00
|
|
|
if (0 == opal_list_get_size(&ugni_module->ep_wait_list)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
/* check the count before taking the lock to avoid unnecessary locking */
|
2016-02-02 11:57:35 -07:00
|
|
|
count = opal_list_get_size(&ugni_module->ep_wait_list);
|
2017-03-12 22:37:35 -06:00
|
|
|
if (0 == count) {
|
|
|
|
return 0;
|
|
|
|
}
|
2012-05-31 20:02:41 +00:00
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
|
|
|
|
count = opal_list_get_size(&ugni_module->ep_wait_list);
|
2014-10-08 10:10:19 -06:00
|
|
|
do {
|
|
|
|
endpoint = (mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list);
|
|
|
|
if (endpoint != NULL) {
|
|
|
|
rc = mca_btl_ugni_progress_send_wait_list (endpoint);
|
|
|
|
|
2016-02-02 11:57:35 -07:00
|
|
|
if (OPAL_SUCCESS != rc) {
|
2014-10-08 10:10:19 -06:00
|
|
|
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
|
2016-02-02 11:57:35 -07:00
|
|
|
} else {
|
|
|
|
endpoint->wait_listed = false;
|
2014-10-08 10:10:19 -06:00
|
|
|
}
|
2012-05-31 20:02:41 +00:00
|
|
|
}
|
2016-02-02 11:57:35 -07:00
|
|
|
} while (endpoint != NULL && --count > 0) ;
|
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
|
2014-10-08 10:10:19 -06:00
|
|
|
|
|
|
|
return rc;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
2012-04-19 21:51:55 +00:00
|
|
|
static int mca_btl_ugni_component_progress (void)
|
2011-12-09 21:24:07 +00:00
|
|
|
{
|
2017-03-12 22:37:35 -06:00
|
|
|
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
|
2012-04-19 21:51:55 +00:00
|
|
|
int count = 0;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
|
2016-05-25 14:27:34 -06:00
|
|
|
|
2017-03-14 10:10:05 -06:00
|
|
|
if (ugni_module->active_datagrams) {
|
|
|
|
count += mca_btl_ugni_progress_datagram (ugni_module->devices);
|
2017-03-12 22:37:35 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
|
|
|
mca_btl_ugni_device_t *device = ugni_module->devices + i;
|
|
|
|
|
|
|
|
if (device->smsg_connections) {
|
|
|
|
count += mca_btl_ugni_progress_local_smsg (ugni_module, device);
|
2016-05-25 14:27:34 -06:00
|
|
|
mca_btl_ugni_progress_wait_list (ugni_module);
|
|
|
|
}
|
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
if (device->dev_rdma_local_cq.active_operations) {
|
|
|
|
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_cq);
|
2016-05-25 14:27:34 -06:00
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2017-03-12 22:37:35 -06:00
|
|
|
if (mca_btl_ugni_component.progress_thread_enabled && device->dev_rdma_local_irq_cq.active_operations) {
|
|
|
|
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_irq_cq);
|
2014-12-04 16:18:16 -07:00
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
2018-02-15 12:57:21 -07:00
|
|
|
|
|
|
|
int mca_btl_ugni_flush (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint)
|
|
|
|
{
|
|
|
|
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
|
|
|
|
|
|
|
|
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
|
|
|
mca_btl_ugni_device_t *device = ugni_module->devices + i;
|
|
|
|
/* spin on progress until all active operations are complete. it is tempting to
|
|
|
|
* take an initial count then wait until that many operations have been completed
|
|
|
|
* but it is impossible to tell if those are the operations the caller is waiting
|
|
|
|
* on. */
|
|
|
|
while (device->dev_rdma_local_cq.active_operations) {
|
|
|
|
(void) mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_cq);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mark that the device was recently flushed */
|
|
|
|
device->flushed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
void btl_ugni_dump_post_desc (mca_btl_ugni_post_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
|
|
|
|
fprintf (stderr, "desc->gni_desc.post_id = %" PRIx64 "\n", desc->gni_desc.post_id);
|
|
|
|
fprintf (stderr, "desc->gni_desc.status = %" PRIx64 "\n", desc->gni_desc.status);
|
|
|
|
fprintf (stderr, "desc->gni_desc.cq_mode_complete = %hu\n", desc->gni_desc.cq_mode_complete);
|
|
|
|
fprintf (stderr, "desc->gni_desc.type = %d\n", desc->gni_desc.type);
|
|
|
|
fprintf (stderr, "desc->gni_desc.cq_mode = %hu\n", desc->gni_desc.cq_mode);
|
|
|
|
fprintf (stderr, "desc->gni_desc.dlvr_mode = %hu\n", desc->gni_desc.dlvr_mode);
|
|
|
|
fprintf (stderr, "desc->gni_desc.local_addr = %" PRIx64 "\n", desc->gni_desc.local_addr);
|
|
|
|
fprintf (stderr, "desc->gni_desc.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->gni_desc.local_mem_hndl.qword1,
|
|
|
|
desc->gni_desc.local_mem_hndl.qword2);
|
|
|
|
fprintf (stderr, "desc->gni_desc.remote_addr = %" PRIx64 "\n", desc->gni_desc.remote_addr);
|
|
|
|
fprintf (stderr, "desc->gni_desc.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->gni_desc.remote_mem_hndl.qword1,
|
|
|
|
desc->gni_desc.remote_mem_hndl.qword2);
|
|
|
|
fprintf (stderr, "desc->gni_desc.length = %" PRIu64 "\n", desc->gni_desc.length);
|
|
|
|
fprintf (stderr, "desc->gni_desc.rdma_mode = %hu\n", desc->gni_desc.rdma_mode);
|
|
|
|
fprintf (stderr, "desc->gni_desc.amo_cmd = %d\n", desc->gni_desc.amo_cmd);
|
|
|
|
}
|