1
1
openmpi/orte/mca/oob/ud/oob_ud_component.c

735 строки
26 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
#include "oob_ud_component.h"
static int mca_oob_ud_component_open (void);
static int mca_oob_ud_component_close (void);
static int mca_oob_ud_component_register (void);
static bool mca_oob_ud_component_available(void);
static int mca_oob_ud_component_startup(void);
static int mca_oob_ud_component_send_nb(orte_rml_send_t *msg);
static void mca_oob_ud_component_shutdown(void);
static char* mca_oob_ud_component_get_addr(void);
static int mca_oob_ud_component_set_addr(orte_process_name_t *peer, char **uris);
static bool mca_oob_ud_component_is_reachable(orte_process_name_t *peer);
#if OPAL_ENABLE_FT_CR == 1
static int mca_oob_ud_component_ft_event(int state);
#endif // OPAL_ENABLE_FT_CR
static int mca_oob_ud_listen_create (mca_oob_ud_port_t *port);
static int mca_oob_ud_listen_destroy (mca_oob_ud_port_t *port);
static int mca_oob_ud_port_alloc_buffers (mca_oob_ud_port_t *port);
static inline int mca_oob_ud_port_recv_start (mca_oob_ud_port_t *port);
static inline int mca_oob_ud_alloc_reg_mem (struct ibv_pd *pd, mca_oob_ud_reg_mem_t *reg_mem,
const int buffer_len);
static inline void mca_oob_ud_free_reg_mem (mca_oob_ud_reg_mem_t *reg_mem);
static void mca_oob_ud_cancel_all_in_list (opal_list_t *list);
static void mca_oob_ud_empty_list (opal_list_t *list);
static void mca_oob_ud_port_construct (mca_oob_ud_port_t *port);
static void mca_oob_ud_port_destruct (mca_oob_ud_port_t *port);
static void mca_oob_ud_device_construct (mca_oob_ud_device_t *device);
static void mca_oob_ud_device_destruct (mca_oob_ud_device_t *device);
/*
* Struct of function pointers and all that to let us be initialized
*/
mca_oob_ud_component_t mca_oob_ud_component = {
{
{
MCA_OOB_BASE_VERSION_2_0_0,
"ud", /* MCA module name */
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
mca_oob_ud_component_open, /* component open */
mca_oob_ud_component_close, /* component close */
NULL, /* component query */
mca_oob_ud_component_register, /* component register */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
0, // reserve space for an assigned index
0, //set the priority so that we will select this component only if someone directs to do so
mca_oob_ud_component_available, //available
mca_oob_ud_component_startup, //startup
mca_oob_ud_component_shutdown, //shutdown
mca_oob_ud_component_send_nb, //send_nb
mca_oob_ud_component_get_addr,
mca_oob_ud_component_set_addr,
mca_oob_ud_component_is_reachable, //is_reachable
#if OPAL_ENABLE_FT_CR == 1
mca_oob_ud_component_ft_event,
#endif // OPAL_ENABLE_FT_CR
},
};
static int mca_oob_ud_component_open (void)
{
OBJ_CONSTRUCT(&mca_oob_ud_component.ud_devices, opal_list_t);
OBJ_CONSTRUCT(&mca_oob_ud_component.ud_active_sends, opal_list_t);
OBJ_CONSTRUCT(&mca_oob_ud_component.ud_active_recvs, opal_list_t);
OBJ_CONSTRUCT(&mca_oob_ud_component.ud_event_queued_reqs, opal_list_t);
OBJ_CONSTRUCT(&mca_oob_ud_component.ud_event_processing_msgs, opal_list_t);
OBJ_CONSTRUCT(&mca_oob_ud_component.ud_lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_oob_ud_component.ud_match_lock, opal_mutex_t);
return ORTE_SUCCESS;
}
static int mca_oob_ud_component_close (void)
{
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:component_close entering",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
OBJ_DESTRUCT(&mca_oob_ud_component.ud_devices);
OBJ_DESTRUCT(&mca_oob_ud_component.ud_active_sends);
OBJ_DESTRUCT(&mca_oob_ud_component.ud_active_recvs);
OBJ_DESTRUCT(&mca_oob_ud_component.ud_event_queued_reqs);
OBJ_DESTRUCT(&mca_oob_ud_component.ud_lock);
OBJ_DESTRUCT(&mca_oob_ud_component.ud_match_lock);
OBJ_DESTRUCT(&mca_oob_ud_component.ud_event_processing_msgs);
return ORTE_SUCCESS;
}
static int mca_oob_ud_component_register (void)
{
mca_base_component_t *component = &mca_oob_ud_component.super.oob_base;
mca_oob_ud_component.ud_min_qp = 8;
(void) mca_base_component_var_register (component, "min_qp", "Minimum number of UD queue pairs "
"to allocate (default: 8)", MCA_BASE_VAR_TYPE_INT, NULL,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_oob_ud_component.ud_min_qp);
mca_oob_ud_component.ud_max_qp = 32;
(void) mca_base_component_var_register (component, "max_qp", "Maximum number of UD queue pairs "
"to allocate (default: 32)", MCA_BASE_VAR_TYPE_INT, NULL,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_oob_ud_component.ud_max_qp);
mca_oob_ud_component.ud_recv_buffer_count = 512;
(void) mca_base_component_var_register (component, "recv_buffers", "Number of MTU sized recv "
"buffers to post (default: 512)", MCA_BASE_VAR_TYPE_INT, NULL,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_oob_ud_component.ud_recv_buffer_count);
mca_oob_ud_component.ud_send_buffer_count = 512;
(void) mca_base_component_var_register (component, "send_buffers", "Number of MTU sized send "
"buffers to allocate (default: 512)", MCA_BASE_VAR_TYPE_INT, NULL,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_oob_ud_component.ud_send_buffer_count);
mca_oob_ud_component.ud_max_retries = 5;
(void)mca_base_component_var_register(component, "peer_retries",
"Number of times to try shutting down a connection before giving up",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_oob_ud_component.ud_max_retries);
mca_oob_ud_component.ud_timeout_usec = 800000;
(void)mca_base_component_var_register(component, "peer_timeout",
"Timeout in microseconds between retransmission of data",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_oob_ud_component.ud_timeout_usec);
return ORTE_SUCCESS;
}
static bool mca_oob_ud_component_available(void) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"oob:ud: component_available called");
/* set the module event base - this is where we would spin off a separate
* progress thread if so desired */
mca_oob_ud_module.ev_base = orte_event_base;
return true;
}
static int port_mtus[] = {0, 256, 512, 1024, 2048, 4096};
static inline int mca_oob_ud_port_setup (mca_oob_ud_port_t *port)
{
int rc;
struct ibv_port_attr port_attr;
rc = ibv_query_port (port->device->ib_context, port->port_num, &port_attr);
if (0 != rc || IBV_PORT_ACTIVE != port_attr.state || 0 == port_attr.lid) {
/* skip this port */
return ORTE_ERROR;
}
port->lid = port_attr.lid;
port->mtu = port_attr.active_mtu > IBV_MTU_4096 ? 2048 : port_mtus[port_attr.active_mtu];
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:port_setup found port: num = %u, lid = %u, mtu = %u",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
port->port_num, port->lid, port->mtu);
return rc;
}
static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
struct ibv_device *ib_device)
{
int rc, port_num;
struct ibv_device_attr dev_attr;
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup attempting to setup ib device %p",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) ib_device);
device->ib_context = ibv_open_device (ib_device);
if (NULL == device->ib_context) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup error opening device. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
rc = ibv_query_device (device->ib_context, &dev_attr);
if (0 != rc) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup error querying device. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
device->ib_channel = ibv_create_comp_channel (device->ib_context);
if (NULL == device->ib_channel) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup error completing completion channel."
"errno = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
device->ib_pd = ibv_alloc_pd (device->ib_context);
if (NULL == device->ib_pd) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup error allocating protection domain."
"errno = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
for (port_num = 1 ; port_num <= dev_attr.phys_port_cnt ; ++port_num) {
mca_oob_ud_port_t *port = OBJ_NEW(mca_oob_ud_port_t);
if (NULL == port) {
opal_output (0, "oob:ud:device_setup malloc failure. errno = %d", errno);
return ORTE_ERR_OUT_OF_RESOURCE;
}
port->device = device;
port->port_num = port_num;
rc = mca_oob_ud_port_setup (port);
if (ORTE_SUCCESS != rc) {
OBJ_RELEASE(port);
continue;
}
opal_list_append (&device->ports, (opal_list_item_t *) port);
break;
}
if (0 == opal_list_get_size(&device->ports)) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup could not init device. no usable "
"ports present", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
static int mca_oob_ud_component_startup(void)
{
struct ibv_device **devices;
int num_devices, i, rc;
opal_list_item_t *item, *item2;
bool found_one = false;
devices = ibv_get_device_list (&num_devices);
if (NULL == devices || 0 == num_devices) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:component_init no devices found",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return ORTE_ERROR;
}
for (i = 0 ; i < num_devices ; ++i) {
mca_oob_ud_device_t *device = OBJ_NEW(mca_oob_ud_device_t);
if (NULL == device) {
opal_output (0, "oob:ud:component_init malloc failure. errno = %d",
errno);
return ORTE_ERROR;
}
rc = mca_oob_ud_device_setup (device, devices[i]);
if (ORTE_SUCCESS != rc) {
OBJ_RELEASE(device);
continue;
}
opal_list_append (&mca_oob_ud_component.ud_devices,
(opal_list_item_t *) device);
/* NTH: support only 1 device for now */
break;
}
ibv_free_device_list (devices);
if (0 == opal_list_get_size (&mca_oob_ud_component.ud_devices)) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:component_init no usable devices found.",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return ORTE_ERROR;
}
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:init initializing oob/openib. # of devices = %u",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(unsigned int) opal_list_get_size (&mca_oob_ud_component.ud_devices));
for (item = opal_list_get_first (&mca_oob_ud_component.ud_devices);
item != opal_list_get_end (&mca_oob_ud_component.ud_devices);
item = opal_list_get_next (item)) {
mca_oob_ud_device_t *device = (mca_oob_ud_device_t *) item;
/* start monitoring the device for completions */
for (item2 = opal_list_get_first (&device->ports) ;
item2 != opal_list_get_end (&device->ports) ;
item2 = opal_list_get_next (item2)) {
mca_oob_ud_port_t *port = (mca_oob_ud_port_t *) item2;
rc = mca_oob_ud_listen_create (port);
if (0 != rc) {
continue;
}
rc = mca_oob_ud_port_alloc_buffers (port);
if (ORTE_SUCCESS != rc) {
mca_oob_ud_listen_destroy (port);
continue;
}
rc = opal_free_list_init (&port->data_qps,
sizeof (mca_oob_ud_qp_t),
OBJ_CLASS(mca_oob_ud_qp_t),
mca_oob_ud_component.ud_min_qp,
mca_oob_ud_component.ud_max_qp,
2);
if (OPAL_SUCCESS != rc) {
mca_oob_ud_listen_destroy (port);
continue;
}
rc = mca_oob_ud_port_recv_start (port);
if (ORTE_SUCCESS != rc) {
mca_oob_ud_listen_destroy (port);
continue;
}
/* NTH: only supports one port for now */
found_one = true;
/* NTH: since we only support one port start monitoring now */
mca_oob_ud_event_start_monitor (device);
break;
}
}
if (!found_one) {
return ORTE_ERR_NOT_FOUND;
}
/* have to call the module init here so we can test for available qpair */
if ((NULL != mca_oob_ud_module.api.init) && (ORTE_SUCCESS != (rc = mca_oob_ud_module.api.init()))){
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
static void mca_oob_ud_component_shutdown(void)
{
mca_oob_ud_peer_t *peer;
opal_list_item_t *item;
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:fini entering",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_lock);
OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_match_lock);
if (ORTE_VPID_INVALID != ORTE_PROC_MY_PARENT->vpid) {
if (ORTE_SUCCESS == mca_oob_ud_peer_lookup (ORTE_PROC_MY_PARENT, &peer) && NULL != peer) {
mca_oob_ud_peer_handle_end (peer);
}
}
/* abort active receives */
mca_oob_ud_cancel_all_in_list (&mca_oob_ud_component.ud_active_recvs);
mca_oob_ud_cancel_all_in_list (&mca_oob_ud_component.ud_active_sends);
mca_oob_ud_empty_list (&mca_oob_ud_component.ud_event_queued_reqs);
OPAL_THREAD_UNLOCK(&mca_oob_ud_component.ud_match_lock);
if (NULL != mca_oob_ud_module.api.finalize) {
mca_oob_ud_module.api.finalize(&peer);
}
for (item = opal_list_get_first (&mca_oob_ud_component.ud_devices);
item != opal_list_get_end (&mca_oob_ud_component.ud_devices);
item = opal_list_get_next (item)) {
mca_oob_ud_event_stop_monitor ((mca_oob_ud_device_t *) item);
}
mca_oob_ud_empty_list (&mca_oob_ud_component.ud_devices);
OPAL_THREAD_UNLOCK(&mca_oob_ud_component.ud_lock);
}
static char* mca_oob_ud_component_get_addr(void) {
/* NTH: qp_num - 32 bits (10), lid - 16 bits (5), port - 8 bits (3) + ud:// + 3 .'s + \0 = 27 chars */
char *contact_info = (char *) calloc(opal_list_get_size(&mca_oob_ud_component.ud_devices) * 27, 1);
char *ptr = contact_info;
opal_list_item_t *item, *port_item;
*ptr = 0;
for (item = opal_list_get_first (&mca_oob_ud_component.ud_devices) ;
item != opal_list_get_end (&mca_oob_ud_component.ud_devices) ;
item = opal_list_get_next (item)) {
mca_oob_ud_device_t *device = (mca_oob_ud_device_t *) item;
for (port_item = opal_list_get_first (&device->ports);
port_item != opal_list_get_end (&device->ports);
port_item = opal_list_get_next (port_item)) {
if (ptr != contact_info) {
ptr += sprintf (ptr, ";");
}
mca_oob_ud_port_get_uri ((mca_oob_ud_port_t *) port_item, ptr);
ptr += strlen (ptr);
}
}
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:get_addr contact information: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), contact_info);
return contact_info;
}
static int mca_oob_ud_component_send_nb(orte_rml_send_t *msg) {
if (NULL != mca_oob_ud_module.api.send_nb) {
mca_oob_ud_module.api.send_nb(msg);
return ORTE_SUCCESS;
}
return ORTE_ERROR;
}
static int mca_oob_ud_component_set_addr(orte_process_name_t *peer, char **uris)
{
int rc;
OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_lock);
for (int i = 0; NULL != uris[i]; i++) {
if (NULL != mca_oob_ud_module.api.set_addr) {
if (ORTE_SUCCESS != (rc = mca_oob_ud_module.api.set_addr(peer, uris[i]))) {
return rc;
}
}
}
OPAL_THREAD_UNLOCK(&mca_oob_ud_component.ud_lock);
return ORTE_SUCCESS;
}
#if OPAL_ENABLE_FT_CR == 1
static int mca_oob_ud_component_ft_event(int state) {
(void) state;
return ORTE_SUCCESS;
}
#endif // OPAL_ENABLE_FT_CR
static int mca_oob_ud_port_alloc_buffers (mca_oob_ud_port_t *port) {
int total_buffer_count = mca_oob_ud_component.ud_recv_buffer_count +
mca_oob_ud_component.ud_send_buffer_count;
opal_list_item_t *item;
int rc, i;
rc = mca_oob_ud_alloc_reg_mem (port->device->ib_pd, &port->grh_buf,
mca_oob_ud_component.ud_recv_buffer_count * sizeof (struct ibv_grh));
if (ORTE_SUCCESS != rc) {
return rc;
}
rc = mca_oob_ud_alloc_reg_mem (port->device->ib_pd, &port->msg_buf,
total_buffer_count * port->mtu);
if (ORTE_SUCCESS != rc) {
return rc;
}
rc = opal_free_list_init (&port->free_msgs, sizeof (mca_oob_ud_msg_t),
OBJ_CLASS(mca_oob_ud_msg_t), mca_oob_ud_component.ud_send_buffer_count,
mca_oob_ud_component.ud_send_buffer_count, 0);
if (ORTE_SUCCESS != rc) {
return rc;
}
for (i = 0, item = opal_list_get_first (&port->free_msgs.super) ;
item != opal_list_get_end (&port->free_msgs.super) ;
item = opal_list_get_next (item), ++i) {
char *ptr = port->msg_buf.ptr + (i + mca_oob_ud_component.ud_recv_buffer_count) *
port->mtu;
mca_oob_ud_msg_init ((mca_oob_ud_msg_t *) item, port,
ptr, port->msg_buf.mr);
}
return rc;
}
static bool mca_oob_ud_component_is_reachable(orte_process_name_t *peer_name)
{
orte_process_name_t hop;
/* if we have a route to this peer, then we can reach it */
hop = orte_routed.get_route(peer_name);
if (ORTE_JOBID_INVALID == hop.jobid ||
ORTE_VPID_INVALID == hop.vpid) {
opal_output (0, "%s oob:ud:component_is_reachable peer %s is unreachable",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer_name));
return false;
}
return true;
}
static void mca_oob_ud_port_construct (mca_oob_ud_port_t *port)
{
memset((char *) port + sizeof (port->super), 0, sizeof (*port) - sizeof (port->super));
OBJ_CONSTRUCT(&port->data_qps, opal_free_list_t);
OBJ_CONSTRUCT(&port->free_msgs, opal_free_list_t);
OBJ_CONSTRUCT(&port->listen_qp, opal_free_list_item_t);
}
static void mca_oob_ud_port_destruct (mca_oob_ud_port_t *port)
{
(void) mca_oob_ud_listen_destroy (port);
OBJ_DESTRUCT(&port->data_qps);
OBJ_DESTRUCT(&port->free_msgs);
mca_oob_ud_free_reg_mem (&port->grh_buf);
mca_oob_ud_free_reg_mem (&port->msg_buf);
}
OBJ_CLASS_INSTANCE(mca_oob_ud_port_t, opal_list_item_t,
mca_oob_ud_port_construct,
mca_oob_ud_port_destruct);
static int mca_oob_ud_listen_create (mca_oob_ud_port_t *port) {
return mca_oob_ud_qp_init (&port->listen_qp, port, port->device->ib_channel, NULL, false);
}
/* mca_oob_ud_listen_destroy:
*
* Destory the listen queue pair associated with a port.
*/
static int mca_oob_ud_listen_destroy (mca_oob_ud_port_t *port)
{
if (NULL == port || NULL == port->listen_qp.ib_qp) {
return ORTE_SUCCESS;
}
OBJ_DESTRUCT(&port->listen_qp);
return ORTE_SUCCESS;
}
static inline int mca_oob_ud_port_recv_start (mca_oob_ud_port_t *port)
{
int i, rc;
rc = mca_oob_ud_qp_to_rts (&port->listen_qp);
if (ORTE_SUCCESS != rc) {
return rc;
}
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:port_recv_start posting "
"%d message buffers", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
mca_oob_ud_component.ud_recv_buffer_count);
for (i = 0 ; i < mca_oob_ud_component.ud_recv_buffer_count ; ++i) {
rc = mca_oob_ud_port_post_one_recv (port, i);
if (ORTE_SUCCESS != rc) {
return rc;
}
}
rc = ibv_req_notify_cq (port->listen_qp.ib_recv_cq, 0);
if (0 != rc) {
opal_output (0, "%s oob:ud:port_recv_start error requesting completion"
"notifications. rc = %d, errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc, errno);
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
static inline int mca_oob_ud_alloc_reg_mem (struct ibv_pd *pd, mca_oob_ud_reg_mem_t *reg_mem,
const int buffer_len)
{
reg_mem->len = buffer_len;
reg_mem->ptr = NULL;
reg_mem->mr = NULL;
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:alloc_reg_mem allocing and registering %d bytes of memory with pd %p",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), buffer_len, (void *) pd);
posix_memalign ((void **)&reg_mem->ptr, sysconf(_SC_PAGESIZE), buffer_len);
if (NULL == reg_mem->ptr) {
opal_output (0, "%s oob:ud:alloc_reg_mem malloc failed! errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERR_OUT_OF_RESOURCE;
}
memset (reg_mem->ptr, 0, buffer_len);
reg_mem->mr = ibv_reg_mr (pd, reg_mem->ptr, buffer_len, IBV_ACCESS_LOCAL_WRITE);
if (NULL == reg_mem->mr) {
opal_output (0, "%s oob:ud:alloc_reg_mem failed to register memory. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
static inline void mca_oob_ud_free_reg_mem (mca_oob_ud_reg_mem_t *reg_mem)
{
if (reg_mem->mr) {
(void) ibv_dereg_mr (reg_mem->mr);
}
if (reg_mem->ptr) {
free (reg_mem->ptr);
}
memset (reg_mem, 0, sizeof (mca_oob_ud_reg_mem_t));
}
static void mca_oob_ud_cancel_all_in_list (opal_list_t *list)
{
opal_list_item_t *item;
while (NULL != (item = opal_list_remove_first (list))) {
((mca_oob_ud_req_t *)item)->req_list = NULL;
mca_oob_ud_req_abort ((mca_oob_ud_req_t *) item);
}
}
static void mca_oob_ud_empty_list (opal_list_t *list)
{
opal_list_item_t *item;
while (NULL != (item = opal_list_remove_first (list))) {
OBJ_RELEASE(item);
}
}
static void mca_oob_ud_device_construct (mca_oob_ud_device_t *device)
{
memset((char *) device + sizeof (device->super), 0, sizeof (*device) - sizeof (device->super));
OBJ_CONSTRUCT(&device->ports, opal_list_t);
}
static void mca_oob_ud_device_destruct (mca_oob_ud_device_t *device)
{
opal_list_item_t *item;
while (NULL != (item = opal_list_remove_first (&device->ports))) {
OBJ_RELEASE(item);
}
if (device->ib_pd) {
(void) ibv_dealloc_pd (device->ib_pd);
}
if (device->ib_channel) {
(void) ibv_destroy_comp_channel (device->ib_channel);
}
if (device->ib_context) {
(void) ibv_close_device (device->ib_context);
}
OBJ_DESTRUCT(&device->ports);
memset (device, 0, sizeof (mca_oob_ud_device_t));
}
OBJ_CLASS_INSTANCE(mca_oob_ud_device_t, opal_list_item_t,
mca_oob_ud_device_construct,
mca_oob_ud_device_destruct);
OBJ_CLASS_INSTANCE(mca_oob_ud_msg_op_t,
opal_object_t,
NULL, NULL);
OBJ_CLASS_INSTANCE(mca_oob_ud_ping_t,
opal_object_t,
NULL, NULL);