1
1
openmpi/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c
2015-06-23 20:59:57 -07:00

354 строки
12 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include "opal/constants.h"
#include "opal/util/sys_limits.h"
#include "opal/mca/common/verbs/common_verbs.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_verbs.h"
/**
* public string showing the shmem ompi_mmap component version number
*/
const char *mca_sshmem_verbs_component_version_string =
"OSHMEM mmap sshmem MCA component version " OSHMEM_VERSION;
int mca_sshmem_verbs_relocate_backing_file = 0;
char *mca_sshmem_verbs_backing_file_base_dir = NULL;
bool mca_sshmem_verbs_nfs_warning = true;
/**
* local functions
*/
static int verbs_register(void);
static int verbs_open(void);
static int verbs_close(void);
static int verbs_query(mca_base_module_t **module, int *priority);
static int verbs_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint);
/**
* instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
mca_sshmem_verbs_component_t mca_sshmem_verbs_component = {
/* ////////////////////////////////////////////////////////////////////// */
/* super */
/* ////////////////////////////////////////////////////////////////////// */
{
/**
* common MCA component data
*/
.base_version = {
MCA_SSHMEM_BASE_VERSION_2_0_0,
/* component name and version */
.mca_component_name = "verbs",
MCA_BASE_MAKE_VERSION(component, OSHMEM_MAJOR_VERSION, OSHMEM_MINOR_VERSION,
OSHMEM_RELEASE_VERSION),
.mca_open_component = verbs_open,
.mca_close_component = verbs_close,
.mca_query_component = verbs_query,
.mca_register_component_params = verbs_register,
},
/* MCA v2.0.0 component meta data */
.base_data = {
/* the component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.runtime_query = verbs_runtime_query,
},
};
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint)
{
int rc = OSHMEM_SUCCESS;
openib_device_t my_device;
openib_device_t *device = &my_device;
int num_devs = 0;
int i = 0;
*priority = 0;
*module = NULL;
/* If fork support is requested, try to enable it */
if (OSHMEM_SUCCESS != (rc = opal_common_verbs_fork_test())) {
return OSHMEM_ERROR;
}
memset(device, 0, sizeof(*device));
#ifdef HAVE_IBV_GET_DEVICE_LIST
device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif
if (num_devs == 0 || !device->ib_devs) {
return OSHMEM_ERR_NOT_SUPPORTED;
}
/* Open device */
if (NULL != mca_sshmem_verbs_component.hca_name) {
for (i = 0; i < num_devs; i++) {
if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
device->ib_dev = device->ib_devs[i];
break;
}
}
} else {
device->ib_dev = device->ib_devs[0];
}
if (NULL == device->ib_dev) {
rc = OSHMEM_ERR_NOT_FOUND;
goto out;
}
if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
rc = OSHMEM_ERR_RESOURCE_BUSY;
goto out;
}
/* Obtain device attributes */
if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
rc = OSHMEM_ERR_RESOURCE_BUSY;
goto out;
}
/* Allocate the protection domain for the device */
device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
if (NULL == device->ib_pd) {
rc = OSHMEM_ERR_RESOURCE_BUSY;
goto out;
}
/* Allocate memory */
if (!rc) {
void *addr = NULL;
size_t size = (size_t)opal_getpagesize();
struct ibv_mr *ib_mr = NULL;
uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ;
uint64_t exp_access_flag = 0;
OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));
#if (MPAGE_ENABLE > 0)
exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR |
IBV_EXP_ACCESS_SHARED_MR_USER_READ |
IBV_EXP_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */
struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};
ib_mr = ibv_exp_reg_mr(&in);
if (NULL == ib_mr) {
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
} else {
device->ib_mr_shared = ib_mr;
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
#if (MPAGE_ENABLE > 0)
if (!rc && (0 != mca_sshmem_verbs_component.has_shared_mr)) {
struct ibv_exp_reg_shared_mr_in in_smr;
access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ|
IBV_EXP_ACCESS_NO_RDMA;
addr = (void *)mca_sshmem_base_start_address;
mca_sshmem_verbs_fill_shared_mr(&in_smr, device->ib_pd, device->ib_mr_shared->handle, addr, access_flag);
ib_mr = ibv_exp_reg_shared_mr(&in_smr);
if (NULL == ib_mr) {
if (mca_sshmem_verbs_component.has_shared_mr == 1)
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
mca_sshmem_verbs_component.has_shared_mr = 0;
} else {
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
mca_sshmem_verbs_component.has_shared_mr = 1;
}
}
#else
if (!rc && mca_sshmem_verbs_component.has_shared_mr == 1) {
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
}
mca_sshmem_verbs_component.has_shared_mr = 0;
#endif /* MPAGE_ENABLE */
}
#if !MPAGE_HAVE_IBV_EXP_REG_MR_CREATE_FLAGS
/* disqualify ourselves if we can not alloc contig
* pages at fixed address
*/
if (mca_sshmem_verbs_component.has_shared_mr == 0)
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
#endif
/* all is well - rainbows and butterflies */
if (!rc) {
*priority = mca_sshmem_verbs_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
}
out:
if (device) {
if (0 < (i = opal_value_array_get_size(&device->ib_mr_array))) {
struct ibv_mr** array;
struct ibv_mr* ib_mr = NULL;
array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
/* destruct shared_mr first in order to avoid proc fs race */
for (i--;i >= 0; i--) {
ib_mr = array[i];
ibv_dereg_mr(ib_mr);
opal_value_array_remove_item(&device->ib_mr_array, i);
}
if (device->ib_mr_shared) {
device->ib_mr_shared = NULL;
}
OBJ_DESTRUCT(&device->ib_mr_array);
}
if (device->ib_pd) {
ibv_dealloc_pd(device->ib_pd);
device->ib_pd = NULL;
}
if(device->ib_dev_context) {
ibv_close_device(device->ib_dev_context);
device->ib_dev_context = NULL;
}
if(device->ib_devs) {
ibv_free_device_list(device->ib_devs);
device->ib_devs = NULL;
}
}
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_register(void)
{
int index;
/* ////////////////////////////////////////////////////////////////////// */
/* (default) priority - set high to make verbs the default */
mca_sshmem_verbs_component.priority = 40;
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
"priority", "Priority for sshmem verbs "
"component (default: 40)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_ALL_EQ,
&mca_sshmem_verbs_component.priority);
mca_sshmem_verbs_component.hca_name = NULL;
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
"hca_name", "Preferred hca (default: the first)", MCA_BASE_VAR_TYPE_STRING,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_sshmem_verbs_component.hca_name);
if (index) {
(void) mca_base_var_register_synonym(index, "oshmem", "memheap", "base",
"hca_name",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
/* allow user specify hca port, extract hca name
* ex: mlx_4_0:1 is allowed
*/
if (mca_sshmem_verbs_component.hca_name) {
char *p;
p = strchr(mca_sshmem_verbs_component.hca_name, ':');
if (p)
*p = 0;
}
mca_sshmem_verbs_component.mr_interleave_factor = 2;
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
"mr_interleave_factor", "try to give at least N Gbytes spaces between mapped memheaps "
"of other PEs that are local to me (default: 2)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_sshmem_verbs_component.mr_interleave_factor);
if (index) {
(void) mca_base_var_register_synonym(index, "oshmem", "memheap", "base",
"mr_interleave_factor",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
mca_sshmem_verbs_component.has_shared_mr = -1;
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
"shared_mr", "Shared memory region usage "
"[0 - off, 1 - on, -1 - auto] (default: -1)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_ALL_EQ,
&mca_sshmem_verbs_component.has_shared_mr);
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_open(void)
{
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_query(mca_base_module_t **module, int *priority)
{
*priority = mca_sshmem_verbs_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_close(void)
{
return OSHMEM_SUCCESS;
}