2014-02-26 16:32:23 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
|
|
|
* All rights reserved.
|
2014-08-05 05:35:57 +00:00
|
|
|
* Copyright (c) 2014 Research Organization for Information Science
|
|
|
|
* and Technology (RIST). All rights reserved.
|
2014-08-06 13:57:53 +00:00
|
|
|
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
|
2014-02-26 16:32:23 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "oshmem_config.h"
|
|
|
|
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif /* HAVE_UNISTD_H */
|
|
|
|
|
|
|
|
#include "opal/constants.h"
|
2014-08-06 13:57:53 +00:00
|
|
|
#include "opal/util/sys_limits.h"
|
2014-02-26 16:32:23 +00:00
|
|
|
|
|
|
|
#include "oshmem/mca/sshmem/sshmem.h"
|
|
|
|
#include "oshmem/mca/sshmem/base/base.h"
|
|
|
|
|
|
|
|
#include "sshmem_verbs.h"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* public string showing the shmem ompi_mmap component version number
|
|
|
|
*/
|
|
|
|
const char *mca_sshmem_verbs_component_version_string =
|
|
|
|
"OSHMEM mmap sshmem MCA component version " OSHMEM_VERSION;
|
|
|
|
|
|
|
|
int mca_sshmem_verbs_relocate_backing_file = 0;
|
|
|
|
char *mca_sshmem_verbs_backing_file_base_dir = NULL;
|
|
|
|
bool mca_sshmem_verbs_nfs_warning = true;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* local functions
|
|
|
|
*/
|
|
|
|
static int verbs_register(void);
|
|
|
|
static int verbs_open(void);
|
|
|
|
static int verbs_close(void);
|
|
|
|
static int verbs_query(mca_base_module_t **module, int *priority);
|
|
|
|
static int verbs_runtime_query(mca_base_module_t **module,
|
|
|
|
int *priority,
|
|
|
|
const char *hint);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* instantiate the public struct with all of our public information
|
|
|
|
* and pointers to our public functions in it
|
|
|
|
*/
|
|
|
|
mca_sshmem_verbs_component_t mca_sshmem_verbs_component = {
|
|
|
|
/* ////////////////////////////////////////////////////////////////////// */
|
|
|
|
/* super */
|
|
|
|
/* ////////////////////////////////////////////////////////////////////// */
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* common MCA component data
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
MCA_SSHMEM_BASE_VERSION_2_0_0,
|
|
|
|
|
|
|
|
/* component name and version */
|
|
|
|
"verbs",
|
|
|
|
OSHMEM_MAJOR_VERSION,
|
|
|
|
OSHMEM_MINOR_VERSION,
|
|
|
|
OSHMEM_RELEASE_VERSION,
|
|
|
|
|
|
|
|
/* component open */
|
|
|
|
verbs_open,
|
|
|
|
/* component close */
|
|
|
|
verbs_close,
|
|
|
|
/* component query */
|
|
|
|
verbs_query,
|
|
|
|
/* component register */
|
|
|
|
verbs_register
|
|
|
|
},
|
|
|
|
/* MCA v2.0.0 component meta data */
|
|
|
|
{
|
|
|
|
/* the component is checkpoint ready */
|
|
|
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
|
|
|
},
|
|
|
|
verbs_runtime_query,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
/* ////////////////////////////////////////////////////////////////////////// */
|
|
|
|
static int
|
|
|
|
verbs_runtime_query(mca_base_module_t **module,
|
|
|
|
int *priority,
|
|
|
|
const char *hint)
|
|
|
|
{
|
|
|
|
int rc = OSHMEM_SUCCESS;
|
|
|
|
openib_device_t my_device;
|
|
|
|
openib_device_t *device = &my_device;
|
|
|
|
int num_devs = 0;
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
*priority = 0;
|
|
|
|
*module = NULL;
|
|
|
|
|
|
|
|
memset(device, 0, sizeof(*device));
|
|
|
|
|
|
|
|
#ifdef HAVE_IBV_GET_DEVICE_LIST
|
|
|
|
device->ib_devs = ibv_get_device_list(&num_devs);
|
|
|
|
#else
|
|
|
|
#error unsupported ibv_get_device_list in infiniband/verbs.h
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (num_devs == 0 || !device->ib_devs) {
|
|
|
|
return OSHMEM_ERR_NOT_SUPPORTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Open device */
|
|
|
|
if (NULL != mca_sshmem_verbs_component.hca_name) {
|
|
|
|
for (i = 0; i < num_devs; i++) {
|
2014-04-23 11:32:52 +00:00
|
|
|
if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
|
2014-02-26 16:32:23 +00:00
|
|
|
device->ib_dev = device->ib_devs[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
device->ib_dev = device->ib_devs[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NULL == device->ib_dev) {
|
|
|
|
rc = OSHMEM_ERR_NOT_FOUND;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
|
|
|
|
rc = OSHMEM_ERR_RESOURCE_BUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Obtain device attributes */
|
|
|
|
if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
|
|
|
|
rc = OSHMEM_ERR_RESOURCE_BUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate the protection domain for the device */
|
|
|
|
device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
|
|
|
|
if (NULL == device->ib_pd) {
|
|
|
|
rc = OSHMEM_ERR_RESOURCE_BUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate memory */
|
|
|
|
if (!rc) {
|
|
|
|
void *addr = NULL;
|
2014-08-05 05:35:57 +00:00
|
|
|
size_t size = (size_t)opal_getpagesize();
|
2014-02-26 16:32:23 +00:00
|
|
|
struct ibv_mr *ib_mr = NULL;
|
2014-04-13 16:54:18 +00:00
|
|
|
uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
|
2014-02-26 16:32:23 +00:00
|
|
|
IBV_ACCESS_REMOTE_WRITE |
|
2014-04-13 16:54:18 +00:00
|
|
|
IBV_ACCESS_REMOTE_READ;
|
|
|
|
uint64_t exp_access_flag = 0;
|
2014-02-26 16:32:23 +00:00
|
|
|
|
|
|
|
OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
|
|
|
|
opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));
|
|
|
|
|
|
|
|
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
2014-04-13 16:54:18 +00:00
|
|
|
exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR |
|
2014-02-26 16:32:23 +00:00
|
|
|
IBV_EXP_ACCESS_SHARED_MR_USER_READ |
|
2014-04-13 16:54:18 +00:00
|
|
|
IBV_EXP_ACCESS_SHARED_MR_USER_WRITE;
|
2014-02-26 16:32:23 +00:00
|
|
|
#endif /* MPAGE_ENABLE */
|
|
|
|
|
2014-04-13 16:54:18 +00:00
|
|
|
struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};
|
2014-02-26 16:32:23 +00:00
|
|
|
ib_mr = ibv_exp_reg_mr(&in);
|
|
|
|
if (NULL == ib_mr) {
|
|
|
|
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
|
|
|
} else {
|
|
|
|
device->ib_mr_shared = ib_mr;
|
|
|
|
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
2014-09-17 18:42:47 +03:00
|
|
|
if (!rc && mca_sshmem_verbs_component.has_shared_mr > 0) {
|
2014-04-23 11:38:44 +00:00
|
|
|
struct ibv_exp_reg_shared_mr_in in_smr;
|
|
|
|
|
2014-02-26 16:32:23 +00:00
|
|
|
access_flag = IBV_ACCESS_LOCAL_WRITE |
|
|
|
|
IBV_ACCESS_REMOTE_WRITE |
|
|
|
|
IBV_ACCESS_REMOTE_READ|
|
|
|
|
IBV_EXP_ACCESS_NO_RDMA;
|
|
|
|
|
|
|
|
addr = (void *)mca_sshmem_base_start_address;
|
2014-04-23 11:38:44 +00:00
|
|
|
mca_sshmem_verbs_fill_shared_mr(&in_smr, device->ib_pd, device->ib_mr_shared->handle, addr, access_flag);
|
|
|
|
ib_mr = ibv_exp_reg_shared_mr(&in_smr);
|
2014-02-26 16:32:23 +00:00
|
|
|
if (NULL == ib_mr) {
|
2014-09-17 18:42:47 +03:00
|
|
|
if (mca_sshmem_verbs_component.has_shared_mr == 1)
|
|
|
|
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
2014-04-13 16:54:18 +00:00
|
|
|
mca_sshmem_verbs_component.has_shared_mr = 0;
|
2014-02-26 16:32:23 +00:00
|
|
|
} else {
|
|
|
|
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
|
2014-04-13 16:54:18 +00:00
|
|
|
mca_sshmem_verbs_component.has_shared_mr = 1;
|
2014-02-26 16:32:23 +00:00
|
|
|
}
|
|
|
|
}
|
2014-09-17 18:42:47 +03:00
|
|
|
#else
|
|
|
|
if (!rc && mca_sshmem_verbs_component.has_shared_mr == 1) {
|
|
|
|
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
|
|
|
mca_sshmem_verbs_component.has_shared_mr = 0;
|
2014-02-26 16:32:23 +00:00
|
|
|
#endif /* MPAGE_ENABLE */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* all is well - rainbows and butterflies */
|
|
|
|
if (!rc) {
|
|
|
|
*priority = mca_sshmem_verbs_component.priority;
|
|
|
|
*module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (device) {
|
|
|
|
if (opal_value_array_get_size(&device->ib_mr_array)) {
|
|
|
|
struct ibv_mr** array;
|
|
|
|
struct ibv_mr* ib_mr = NULL;
|
|
|
|
array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
|
|
|
|
while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
|
|
|
|
ib_mr = array[0];
|
|
|
|
ibv_dereg_mr(ib_mr);
|
|
|
|
opal_value_array_remove_item(&device->ib_mr_array, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (device->ib_mr_shared) {
|
|
|
|
device->ib_mr_shared = NULL;
|
|
|
|
}
|
|
|
|
OBJ_DESTRUCT(&device->ib_mr_array);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (device->ib_pd) {
|
|
|
|
ibv_dealloc_pd(device->ib_pd);
|
|
|
|
device->ib_pd = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(device->ib_dev_context) {
|
|
|
|
ibv_close_device(device->ib_dev_context);
|
|
|
|
device->ib_dev_context = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(device->ib_devs) {
|
|
|
|
ibv_free_device_list(device->ib_devs);
|
|
|
|
device->ib_devs = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ////////////////////////////////////////////////////////////////////////// */
|
|
|
|
static int
|
|
|
|
verbs_register(void)
|
|
|
|
{
|
|
|
|
int index;
|
|
|
|
|
|
|
|
/* ////////////////////////////////////////////////////////////////////// */
|
|
|
|
/* (default) priority - set high to make verbs the default */
|
|
|
|
mca_sshmem_verbs_component.priority = 40;
|
|
|
|
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
|
|
|
|
"priority", "Priority for sshmem verbs "
|
|
|
|
"component (default: 40)", MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_ALL_EQ,
|
|
|
|
&mca_sshmem_verbs_component.priority);
|
|
|
|
|
|
|
|
mca_sshmem_verbs_component.hca_name = NULL;
|
|
|
|
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
|
|
|
|
"hca_name", "Preferred hca (default: the first)", MCA_BASE_VAR_TYPE_STRING,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
|
|
&mca_sshmem_verbs_component.hca_name);
|
|
|
|
if (index) {
|
|
|
|
(void) mca_base_var_register_synonym(index, "oshmem", "memheap", "base",
|
|
|
|
"hca_name",
|
|
|
|
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
|
|
|
|
}
|
2014-09-21 11:56:12 +03:00
|
|
|
/* allow user specify hca port, extract hca name
|
|
|
|
* ex: mlx_4_0:1 is allowed
|
|
|
|
*/
|
|
|
|
if (mca_sshmem_verbs_component.hca_name) {
|
|
|
|
char *p;
|
|
|
|
|
|
|
|
p = strchr(mca_sshmem_verbs_component.hca_name, ':');
|
|
|
|
if (p)
|
|
|
|
*p = 0;
|
|
|
|
}
|
|
|
|
|
2014-02-26 16:32:23 +00:00
|
|
|
|
|
|
|
mca_sshmem_verbs_component.mr_interleave_factor = 2;
|
|
|
|
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
|
|
|
|
"mr_interleave_factor", "try to give at least N Gbytes spaces between mapped memheaps "
|
|
|
|
"of other PEs that are local to me (default: 2)", MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
|
|
&mca_sshmem_verbs_component.mr_interleave_factor);
|
|
|
|
if (index) {
|
|
|
|
(void) mca_base_var_register_synonym(index, "oshmem", "memheap", "base",
|
|
|
|
"mr_interleave_factor",
|
|
|
|
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
|
|
|
|
}
|
|
|
|
|
2014-09-17 18:42:47 +03:00
|
|
|
mca_sshmem_verbs_component.has_shared_mr = 2;
|
|
|
|
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
|
|
|
|
"shared_mr", "Shared memory region usage "
|
|
|
|
"[0 - off, 1 - on, 2 - auto] (default: 2)", MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
|
|
|
OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_ALL_EQ,
|
|
|
|
&mca_sshmem_verbs_component.has_shared_mr);
|
|
|
|
|
2014-02-26 16:32:23 +00:00
|
|
|
return OSHMEM_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ////////////////////////////////////////////////////////////////////////// */
|
|
|
|
static int
|
|
|
|
verbs_open(void)
|
|
|
|
{
|
|
|
|
return OSHMEM_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ////////////////////////////////////////////////////////////////////////// */
|
|
|
|
static int
|
|
|
|
verbs_query(mca_base_module_t **module, int *priority)
|
|
|
|
{
|
|
|
|
*priority = mca_sshmem_verbs_component.priority;
|
|
|
|
*module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
|
|
|
|
return OSHMEM_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ////////////////////////////////////////////////////////////////////////// */
|
|
|
|
static int
|
|
|
|
verbs_close(void)
|
|
|
|
{
|
|
|
|
return OSHMEM_SUCCESS;
|
|
|
|
}
|
|
|
|
|