517 строки
18 KiB
C
517 строки
18 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
|
*
|
|
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
|
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "mtl_ofi.h"
|
|
#include "opal/util/argv.h"
|
|
|
|
static int ompi_mtl_ofi_component_open(void);
|
|
static int ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority);
|
|
static int ompi_mtl_ofi_component_close(void);
|
|
static int ompi_mtl_ofi_component_register(void);
|
|
|
|
static mca_mtl_base_module_t*
|
|
ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
|
bool enable_mpi_threads);
|
|
|
|
static int param_priority;
|
|
static char *prov_include;
|
|
static char *prov_exclude;
|
|
|
|
mca_mtl_ofi_component_t mca_mtl_ofi_component = {
|
|
{
|
|
|
|
/* First, the mca_base_component_t struct containing meta
|
|
* information about the component itself */
|
|
|
|
.mtl_version = {
|
|
MCA_MTL_BASE_VERSION_2_0_0,
|
|
|
|
.mca_component_name = "ofi",
|
|
OFI_COMPAT_MCA_VERSION,
|
|
.mca_open_component = ompi_mtl_ofi_component_open,
|
|
.mca_close_component = ompi_mtl_ofi_component_close,
|
|
.mca_query_component = ompi_mtl_ofi_component_query,
|
|
.mca_register_component_params = ompi_mtl_ofi_component_register,
|
|
},
|
|
.mtl_data = {
|
|
/* The component is not checkpoint ready */
|
|
MCA_BASE_METADATA_PARAM_NONE
|
|
},
|
|
|
|
.mtl_init = ompi_mtl_ofi_component_init,
|
|
}
|
|
};
|
|
|
|
static int
|
|
ompi_mtl_ofi_component_register(void)
|
|
{
|
|
param_priority = 25; /* for now give a lower priority than the psm mtl */
|
|
mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
|
|
"priority", "Priority of the OFI MTL component",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
¶m_priority);
|
|
|
|
prov_include = NULL;
|
|
mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
|
|
"provider_include",
|
|
"Comma-delimited list of OFI providers that are considered for use (e.g., \"psm,sockets\"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude.",
|
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
|
OPAL_INFO_LVL_1,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&prov_include);
|
|
|
|
prov_exclude = "sockets,mxm";
|
|
mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
|
|
"provider_exclude",
|
|
"Comma-delimited list of OFI providers that are not considered for use (default: \"sockets,mxm\"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include.",
|
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
|
OPAL_INFO_LVL_1,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&prov_exclude);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
ompi_mtl_ofi_component_open(void)
|
|
{
|
|
ompi_mtl_ofi.base.mtl_request_size =
|
|
sizeof(ompi_mtl_ofi_request_t) - sizeof(struct mca_mtl_request_t);
|
|
|
|
ompi_mtl_ofi.domain = NULL;
|
|
ompi_mtl_ofi.av = NULL;
|
|
ompi_mtl_ofi.cq = NULL;
|
|
ompi_mtl_ofi.ep = NULL;
|
|
|
|
/**
|
|
* Sanity check: provider_include and provider_exclude must be mutually
|
|
* exclusive
|
|
*/
|
|
if (OMPI_SUCCESS !=
|
|
mca_base_var_check_exclusive("ompi",
|
|
mca_mtl_ofi_component.super.mtl_version.mca_type_name,
|
|
mca_mtl_ofi_component.super.mtl_version.mca_component_name,
|
|
"provider_include",
|
|
mca_mtl_ofi_component.super.mtl_version.mca_type_name,
|
|
mca_mtl_ofi_component.super.mtl_version.mca_component_name,
|
|
"provider_exclude")) {
|
|
return OMPI_ERR_NOT_AVAILABLE;
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static int
|
|
ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority)
|
|
{
|
|
*priority = param_priority;
|
|
*module = (mca_base_module_t *)&ompi_mtl_ofi.base;
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static int
|
|
ompi_mtl_ofi_component_close(void)
|
|
{
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int
|
|
ompi_mtl_ofi_progress_no_inline(void)
|
|
{
|
|
return ompi_mtl_ofi_progress();
|
|
}
|
|
|
|
static int
|
|
is_in_list(char **list, char *item)
|
|
{
|
|
int i = 0;
|
|
|
|
if ((NULL == list) || (NULL == item)) {
|
|
return 0;
|
|
}
|
|
|
|
while (NULL != list[i]) {
|
|
if (0 == strncmp(item, list[i], strlen(item))) {
|
|
return 1;
|
|
} else {
|
|
i++;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct fi_info*
|
|
select_ofi_provider(struct fi_info *providers)
|
|
{
|
|
char **include_list = NULL;
|
|
char **exclude_list = NULL;
|
|
struct fi_info *prov = providers;
|
|
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: mtl:ofi:provider_include = \"%s\"\n",
|
|
__FILE__, __LINE__, prov_include);
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: mtl:ofi:provider_exclude = \"%s\"\n",
|
|
__FILE__, __LINE__, prov_exclude);
|
|
|
|
if (NULL != prov_include) {
|
|
include_list = opal_argv_split(prov_include, ',');
|
|
while ((NULL != prov) &&
|
|
(!is_in_list(include_list, prov->fabric_attr->prov_name))) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: mtl:ofi: \"%s\" not in include list\n",
|
|
__FILE__, __LINE__,
|
|
prov->fabric_attr->prov_name);
|
|
prov = prov->next;
|
|
}
|
|
} else if (NULL != prov_exclude) {
|
|
exclude_list = opal_argv_split(prov_exclude, ',');
|
|
while ((NULL != prov) &&
|
|
(is_in_list(exclude_list, prov->fabric_attr->prov_name))) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: mtl:ofi: \"%s\" in exclude list\n",
|
|
__FILE__, __LINE__,
|
|
prov->fabric_attr->prov_name);
|
|
prov = prov->next;
|
|
}
|
|
}
|
|
|
|
opal_argv_free(include_list);
|
|
opal_argv_free(exclude_list);
|
|
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: mtl:ofi:prov: %s\n",
|
|
__FILE__, __LINE__,
|
|
(prov ? prov->fabric_attr->prov_name : "none"));
|
|
|
|
return prov;
|
|
}
|
|
|
|
static mca_mtl_base_module_t*
|
|
ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
|
bool enable_mpi_threads)
|
|
{
|
|
int ret, fi_version;
|
|
struct fi_info *hints;
|
|
struct fi_info *providers = NULL, *prov = NULL;
|
|
struct fi_cq_attr cq_attr = {0};
|
|
struct fi_av_attr av_attr = {0};
|
|
char ep_name[FI_NAME_MAX] = {0};
|
|
size_t namelen;
|
|
|
|
/**
|
|
* Hints to filter providers
|
|
* See man fi_getinfo for a list of all filters
|
|
* mode: Select capabilities MTL is prepared to support.
|
|
* In this case, MTL will pass in context into communication calls
|
|
* ep_type: reliable datagram operation
|
|
* caps: Capabilities required from the provider.
|
|
* Tag matching is specified to implement MPI semantics.
|
|
* msg_order: Guarantee that messages with same tag are ordered.
|
|
*/
|
|
hints = fi_allocinfo();
|
|
if (!hints) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: Could not allocate fi_info\n",
|
|
__FILE__, __LINE__);
|
|
goto error;
|
|
}
|
|
hints->mode = FI_CONTEXT;
|
|
hints->ep_attr->type = FI_EP_RDM; /* Reliable datagram */
|
|
hints->caps = FI_TAGGED; /* Tag matching interface */
|
|
hints->tx_attr->msg_order = FI_ORDER_SAS;
|
|
hints->rx_attr->msg_order = FI_ORDER_SAS;
|
|
|
|
hints->domain_attr->threading = FI_THREAD_UNSPEC;
|
|
hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
|
|
hints->domain_attr->resource_mgmt = FI_RM_ENABLED;
|
|
hints->domain_attr->av_type = FI_AV_MAP;
|
|
|
|
/**
|
|
* FI_VERSION provides binary backward and forward compatibility support
|
|
* Specify the version of OFI is coded to, the provider will select struct
|
|
* layouts that are compatible with this version.
|
|
*/
|
|
fi_version = FI_VERSION(1, 0);
|
|
|
|
/**
|
|
* fi_getinfo: returns information about fabric services for reaching a
|
|
* remote node or service. this does not necessarily allocate resources.
|
|
* Pass NULL for name/service because we want a list of providers supported.
|
|
*/
|
|
ret = fi_getinfo(fi_version, /* OFI version requested */
|
|
NULL, /* Optional name or fabric to resolve */
|
|
NULL, /* Optional service name or port to request */
|
|
0ULL, /* Optional flag */
|
|
hints, /* In: Hints to filter providers */
|
|
&providers); /* Out: List of matching providers */
|
|
if (0 != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_getinfo failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Select a provider from the list returned by fi_getinfo().
|
|
*/
|
|
prov = select_ofi_provider(providers);
|
|
if (!prov) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: select_ofi_provider: no provider found\n",
|
|
__FILE__, __LINE__);
|
|
goto error;
|
|
}
|
|
|
|
|
|
/**
|
|
* Open fabric
|
|
* The getinfo struct returns a fabric attribute struct that can be used to
|
|
* instantiate the virtual or physical network. This opens a "fabric
|
|
* provider". See man fi_fabric for details.
|
|
*/
|
|
ret = fi_fabric(prov->fabric_attr, /* In: Fabric attributes */
|
|
&ompi_mtl_ofi.fabric, /* Out: Fabric handle */
|
|
NULL); /* Optional context for fabric events */
|
|
if (0 != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_fabric failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Create the access domain, which is the physical or virtual network or
|
|
* hardware port/collection of ports. Returns a domain object that can be
|
|
* used to create endpoints. See man fi_domain for details.
|
|
*/
|
|
ret = fi_domain(ompi_mtl_ofi.fabric, /* In: Fabric object */
|
|
prov, /* In: Provider */
|
|
&ompi_mtl_ofi.domain, /* Out: Domain oject */
|
|
NULL); /* Optional context for domain events */
|
|
if (0 != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_domain failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Create a transport level communication endpoint. To use the endpoint,
|
|
* it must be bound to completion counters or event queues and enabled,
|
|
* and the resources consumed by it, such as address vectors, counters,
|
|
* completion queues, etc.
|
|
* see man fi_endpoint for more details.
|
|
*/
|
|
ret = fi_endpoint(ompi_mtl_ofi.domain, /* In: Domain object */
|
|
prov, /* In: Provider */
|
|
&ompi_mtl_ofi.ep, /* Out: Endpoint object */
|
|
NULL); /* Optional context */
|
|
if (0 != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_endpoint failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Save the maximum inject size.
|
|
*/
|
|
ompi_mtl_ofi.max_inject_size = prov->tx_attr->inject_size;
|
|
|
|
/**
|
|
* Create the objects that will be bound to the endpoint.
|
|
* The objects include:
|
|
* - completion queue for events
|
|
* - address vector of other endpoint addresses
|
|
* - dynamic memory-spanning memory region
|
|
*/
|
|
cq_attr.format = FI_CQ_FORMAT_TAGGED;
|
|
ret = fi_cq_open(ompi_mtl_ofi.domain, &cq_attr, &ompi_mtl_ofi.cq, NULL);
|
|
if (ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_cq_open failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* The remote fi_addr will be stored in the ofi_endpoint struct.
|
|
* So, we use the AV in "map" mode.
|
|
*/
|
|
av_attr.type = FI_AV_MAP;
|
|
ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL);
|
|
if (ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_av_open failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Bind the CQ and AV to the endpoint object.
|
|
*/
|
|
ret = fi_ep_bind(ompi_mtl_ofi.ep,
|
|
(fid_t)ompi_mtl_ofi.cq,
|
|
FI_SEND | FI_RECV);
|
|
if (0 != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_bind CQ-EP failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
ret = fi_ep_bind(ompi_mtl_ofi.ep,
|
|
(fid_t)ompi_mtl_ofi.av,
|
|
0);
|
|
if (0 != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_bind AV-EP failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Enable the endpoint for communication
|
|
* This commits the bind operations.
|
|
*/
|
|
ret = fi_enable(ompi_mtl_ofi.ep);
|
|
if (0 != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_enable failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
/**
|
|
* Free providers info since it's not needed anymore.
|
|
*/
|
|
fi_freeinfo(hints);
|
|
hints = NULL;
|
|
fi_freeinfo(providers);
|
|
providers = NULL;
|
|
|
|
/**
|
|
* Get our address and publish it with modex.
|
|
*/
|
|
namelen = sizeof(ep_name);
|
|
ret = fi_getname((fid_t)ompi_mtl_ofi.ep, &ep_name[0], &namelen);
|
|
if (ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: fi_getname failed: %s\n",
|
|
__FILE__, __LINE__, fi_strerror(-ret));
|
|
goto error;
|
|
}
|
|
|
|
OFI_COMPAT_MODEX_SEND(ret,
|
|
&mca_mtl_ofi_component.super.mtl_version,
|
|
&ep_name,
|
|
namelen);
|
|
if (OMPI_SUCCESS != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: modex_send failed: %d\n",
|
|
__FILE__, __LINE__, ret);
|
|
goto error;
|
|
}
|
|
|
|
ompi_mtl_ofi.epnamelen = namelen;
|
|
|
|
/**
|
|
* Set the ANY_SRC address.
|
|
*/
|
|
ompi_mtl_ofi.any_addr = FI_ADDR_UNSPEC;
|
|
|
|
/**
|
|
* Activate progress callback.
|
|
*/
|
|
ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline);
|
|
if (OMPI_SUCCESS != ret) {
|
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
|
"%s:%d: opal_progress_register failed: %d\n",
|
|
__FILE__, __LINE__, ret);
|
|
goto error;
|
|
}
|
|
|
|
return &ompi_mtl_ofi.base;
|
|
|
|
error:
|
|
if (providers) {
|
|
(void) fi_freeinfo(providers);
|
|
}
|
|
if (hints) {
|
|
(void) fi_freeinfo(hints);
|
|
}
|
|
if (ompi_mtl_ofi.av) {
|
|
(void) fi_close((fid_t)ompi_mtl_ofi.av);
|
|
}
|
|
if (ompi_mtl_ofi.cq) {
|
|
(void) fi_close((fid_t)ompi_mtl_ofi.cq);
|
|
}
|
|
if (ompi_mtl_ofi.ep) {
|
|
(void) fi_close((fid_t)ompi_mtl_ofi.ep);
|
|
}
|
|
if (ompi_mtl_ofi.domain) {
|
|
(void) fi_close((fid_t)ompi_mtl_ofi.domain);
|
|
}
|
|
if (ompi_mtl_ofi.fabric) {
|
|
(void) fi_close((fid_t)ompi_mtl_ofi.fabric);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl)
|
|
{
|
|
opal_progress_unregister(ompi_mtl_ofi_progress_no_inline);
|
|
|
|
/**
|
|
* * Close all the OFI objects
|
|
* */
|
|
if (fi_close((fid_t)ompi_mtl_ofi.ep)) {
|
|
opal_output(ompi_mtl_base_framework.framework_output,
|
|
"fi_close failed: %s", strerror(errno));
|
|
abort();
|
|
}
|
|
if (fi_close((fid_t)ompi_mtl_ofi.cq)) {
|
|
opal_output(ompi_mtl_base_framework.framework_output,
|
|
"fi_close failed: %s", strerror(errno));
|
|
abort();
|
|
}
|
|
if (fi_close((fid_t)ompi_mtl_ofi.av)) {
|
|
opal_output(ompi_mtl_base_framework.framework_output,
|
|
"fi_close failed: %s", strerror(errno));
|
|
abort();
|
|
}
|
|
if (fi_close((fid_t)ompi_mtl_ofi.domain)) {
|
|
opal_output(ompi_mtl_base_framework.framework_output,
|
|
"fi_close failed: %s", strerror(errno));
|
|
abort();
|
|
}
|
|
if (fi_close((fid_t)ompi_mtl_ofi.fabric)) {
|
|
opal_output(ompi_mtl_base_framework.framework_output,
|
|
"fi_close failed: %s", strerror(errno));
|
|
abort();
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
|