1
1

btl/openib: Move free list memory allocation to add_procs

Per RFC which expired two weeks ago:

We are planning to make a change to Open MPI to always set up the btls. This
means the btl init will be called even if add_procs is never called for that
btl. In the openib btl free lists fragments are currently allocated in btl_init.
To avoid wasting that memory this commit moves that final device setup to
the add_procs function. This included allocating free lists, and starting the
async event thread.

At this time this change is safe since we have a barrier after add_procs in
MPI_Init. If this changes we will need to re-think some of the initialization
since we might have the possibility of a connection request before add_procs
is called.

Tested with Mellanox ConnectX2 and QLogic HCAs.

Commit also cleans up tabs in btl_openib_async.c.

cmr=v1.7.5:reviewer=miked

This commit was SVN r30122.
Этот коммит содержится в:
Nathan Hjelm 2014-01-06 19:51:30 +00:00
родитель 03c5791104
Коммит 5c8ea3a251
5 изменённых файлов: 299 добавлений и 290 удалений

Просмотреть файл

@ -52,6 +52,8 @@
#include "btl_openib_proc.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_xrc.h"
#include "btl_openib_async.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/mpool.h"
@ -689,6 +691,218 @@ static uint64_t calculate_max_reg (void)
return (max_reg * 7) >> 3;
}
static int prepare_device_for_use (mca_btl_openib_device_t *device)
{
mca_btl_openib_frag_init_data_t *init_data;
int rc, length;
if (device->ready_for_use) {
return OMPI_SUCCESS;
}
/* For each btl module that we made - find every
base device that doesn't have device->qps setup on it yet (remember
that some modules may share the same device, so when going through
to loop, we may hit a device that was already setup earlier in
the loop).
We may to call for prepare_device_for_use() only after adding the btl
to mca_btl_openib_component.openib_btls, since the prepare_device_for_use
adds device to async thread that require access to
mca_btl_openib_component.openib_btls.
*/
/* Setup the device qps info */
device->qps = (mca_btl_openib_device_qp_t*)
calloc(mca_btl_openib_component.num_qps,
sizeof(mca_btl_openib_device_qp_t));
if (NULL == device->qps) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (int qp_index = 0 ; qp_index < mca_btl_openib_component.num_qps ; qp_index++) {
OBJ_CONSTRUCT(&device->qps[qp_index].send_free, ompi_free_list_t);
OBJ_CONSTRUCT(&device->qps[qp_index].recv_free, ompi_free_list_t);
}
#if OPAL_HAVE_THREADS
if(mca_btl_openib_component.use_async_event_thread) {
mca_btl_openib_async_cmd_t async_command;
/* start the async even thread if it is not already started */
if (start_async_event_thread() != OMPI_SUCCESS)
return OMPI_ERROR;
device->got_fatal_event = false;
device->got_port_event = false;
async_command.a_cmd = OPENIB_ASYNC_CMD_FD_ADD;
async_command.fd = device->ib_dev_context->async_fd;
if (write(mca_btl_openib_component.async_pipe[1],
&async_command, sizeof(mca_btl_openib_async_cmd_t))<0){
BTL_ERROR(("Failed to write to pipe [%d]",errno));
return OMPI_ERROR;
}
/* wait for ok from thread */
if (OMPI_SUCCESS !=
btl_openib_async_command_done(device->ib_dev_context->async_fd)) {
return OMPI_ERROR;
}
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* Prepare data for thread, but not starting it */
OBJ_CONSTRUCT(&device->thread, opal_thread_t);
device->thread.t_run = mca_btl_openib_progress_thread;
device->thread.t_arg = device;
device->progress = false;
#endif
#endif
#if HAVE_XRC
/* if user configured to run with XRC qp and the device doesn't
* support it - we should ignore this device. Maybe we have another
* one that has XRC support
*/
if (!(device->ib_dev_attr.device_cap_flags & IBV_DEVICE_XRC) &&
MCA_BTL_XRC_ENABLED) {
opal_show_help("help-mpi-btl-openib.txt",
"XRC on device without XRC support", true,
mca_btl_openib_component.num_xrc_qps,
ibv_get_device_name(device->ib_dev),
ompi_process_info.nodename);
return OMPI_ERROR;
}
if (MCA_BTL_XRC_ENABLED) {
if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(device)) {
BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));
return OMPI_ERROR;
}
}
#endif
device->endpoints = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(device->endpoints, 10, INT_MAX, 10);
opal_pointer_array_add(&mca_btl_openib_component.devices, device);
if (mca_btl_openib_component.max_eager_rdma > 0 &&
device->use_eager_rdma) {
device->eager_rdma_buffers =
(mca_btl_base_endpoint_t **) calloc(mca_btl_openib_component.max_eager_rdma * device->btls,
sizeof(mca_btl_openib_endpoint_t*));
if(NULL == device->eager_rdma_buffers) {
BTL_ERROR(("Memory allocation fails"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
if (NULL == init_data) {
if (mca_btl_openib_component.max_eager_rdma > 0 &&
device->use_eager_rdma) {
/* cleanup */
free (device->eager_rdma_buffers);
device->eager_rdma_buffers = NULL;
}
BTL_ERROR(("Memory allocation fails"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_footer_t) +
sizeof(mca_btl_openib_eager_rdma_header_t);
init_data->order = MCA_BTL_NO_ORDER;
init_data->list = &device->send_free_control;
rc = ompi_free_list_init_ex_new(&device->send_free_control,
sizeof(mca_btl_openib_send_control_frag_t), opal_cache_line_size,
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num, -1,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, mca_btl_openib_frag_init,
init_data);
if (OMPI_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we ran
out of registered memory, so show that error message */
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
errno = ENOMEM;
mca_btl_openib_show_init_error(__FILE__, __LINE__,
"ompi_free_list_init_ex_new",
ibv_get_device_name(device->ib_dev));
}
return rc;
}
/* setup all the qps */
for (int qp = 0 ; qp < mca_btl_openib_component.num_qps ; qp++) {
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
if (NULL == init_data) {
BTL_ERROR(("Memory allocation fails"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Initialize pool of send fragments */
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &device->qps[qp].send_free;
rc = ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_send_frag_t), opal_cache_line_size,
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, mca_btl_openib_frag_init,
init_data);
if (OMPI_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we
ran out of registered memory, so show that error
message */
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
errno = ENOMEM;
mca_btl_openib_show_init_error(__FILE__, __LINE__,
"ompi_free_list_init_ex_new",
ibv_get_device_name(device->ib_dev));
}
return OMPI_ERROR;
}
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &device->qps[qp].recv_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_recv_frag_t), opal_cache_line_size,
OBJ_CLASS(mca_btl_openib_recv_frag_t),
length, mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
}
device->ready_for_use = true;
return OMPI_SUCCESS;
}
/*
* add a proc to this btl module
@ -732,6 +946,12 @@ int mca_btl_openib_add_procs(
}
#endif
rc = prepare_device_for_use (openib_btl->device);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("could not prepare openib device for use"));
return rc;
}
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_openib_proc_t* ib_proc;

Просмотреть файл

@ -412,6 +412,8 @@ typedef struct mca_btl_openib_device_t {
uint32_t max_inline_data;
/* Registration limit and current count */
uint64_t mem_reg_max, mem_reg_active;
/* Device is ready for use */
bool ready_for_use;
} mca_btl_openib_device_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_device_t);

Просмотреть файл

@ -1,8 +1,11 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -47,7 +50,7 @@ static int return_status = OMPI_ERROR;
static int btl_openib_async_poll_init(struct mca_btl_openib_async_poll *hcas_poll);
static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *hcas_poll, opal_list_t *ignore_qp_err_list);
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *hcas_poll, int index,
opal_list_t *ignore_qp_err_list);
opal_list_t *ignore_qp_err_list);
static const char *openib_event_to_str (enum ibv_event_type event);
static int send_command_comp(int in);
@ -171,8 +174,8 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
BTL_VERBOSE(("Got cmd %d", cmd.a_cmd));
if (OPENIB_ASYNC_CMD_FD_ADD == cmd.a_cmd) {
fd = cmd.fd;
BTL_VERBOSE(("Got fd %d", fd));
fd = cmd.fd;
BTL_VERBOSE(("Got fd %d", fd));
BTL_VERBOSE(("Adding device [%d] to async event poll[%d]",
fd, devices_poll->active_poll_size));
flags = fcntl(fd, F_GETFL);
@ -204,8 +207,8 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
} else if (OPENIB_ASYNC_CMD_FD_REMOVE == cmd.a_cmd) {
bool fd_found = false;
fd = cmd.fd;
BTL_VERBOSE(("Got fd %d", fd));
fd = cmd.fd;
BTL_VERBOSE(("Got fd %d", fd));
/* Removing device from poll */
BTL_VERBOSE(("Removing device [%d] from async event poll [%d]",
@ -232,24 +235,24 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
return OMPI_ERROR;
}
} else if (OPENIB_ASYNC_IGNORE_QP_ERR == cmd.a_cmd) {
mca_btl_openib_qp_list *new_qp;
new_qp = OBJ_NEW(mca_btl_openib_qp_list);
BTL_VERBOSE(("Ignore errors on QP %p", (void *)cmd.qp));
new_qp->qp = cmd.qp;
opal_list_append(ignore_qp_err_list, (opal_list_item_t *)new_qp);
send_command_comp(OPENIB_ASYNC_IGNORE_QP_ERR);
mca_btl_openib_qp_list *new_qp;
new_qp = OBJ_NEW(mca_btl_openib_qp_list);
BTL_VERBOSE(("Ignore errors on QP %p", (void *)cmd.qp));
new_qp->qp = cmd.qp;
opal_list_append(ignore_qp_err_list, (opal_list_item_t *)new_qp);
send_command_comp(OPENIB_ASYNC_IGNORE_QP_ERR);
} else if (OPENIB_ASYNC_THREAD_EXIT == cmd.a_cmd) {
/* Got 0 - command to close the thread */
opal_list_item_t *item;
opal_list_item_t *item;
BTL_VERBOSE(("Async event thread exit"));
free(devices_poll->async_pollfd);
return_status = OMPI_SUCCESS;
while ((item = opal_list_remove_first(ignore_qp_err_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(ignore_qp_err_list);
while ((item = opal_list_remove_first(ignore_qp_err_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(ignore_qp_err_list);
pthread_exit(&return_status);
}
@ -315,7 +318,7 @@ srq_limit_event_exit:
/* Function handle async device events */
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index,
opal_list_t *ignore_qp_err_list)
opal_list_t *ignore_qp_err_list)
{
int j;
mca_btl_openib_device_t *device = NULL;
@ -374,28 +377,28 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
case IBV_EVENT_CQ_ERR:
case IBV_EVENT_QP_FATAL:
if (event_type == IBV_EVENT_QP_FATAL) {
opal_list_item_t *item;
mca_btl_openib_qp_list *qp_item;
bool in_ignore_list = false;
if (event_type == IBV_EVENT_QP_FATAL) {
opal_list_item_t *item;
mca_btl_openib_qp_list *qp_item;
bool in_ignore_list = false;
BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp));
BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp));
/* look through ignore list */
for (item = opal_list_get_first(ignore_qp_err_list);
item != opal_list_get_end(ignore_qp_err_list);
item = opal_list_get_next(item)) {
qp_item = (mca_btl_openib_qp_list *)item;
if (qp_item->qp == event.element.qp) {
BTL_VERBOSE(("QP %p is in error ignore list",
(void *)event.element.qp));
in_ignore_list = true;
break;
}
}
if (in_ignore_list)
break;
}
/* look through ignore list */
for (item = opal_list_get_first(ignore_qp_err_list);
item != opal_list_get_end(ignore_qp_err_list);
item = opal_list_get_next(item)) {
qp_item = (mca_btl_openib_qp_list *)item;
if (qp_item->qp == event.element.qp) {
BTL_VERBOSE(("QP %p is in error ignore list",
(void *)event.element.qp));
in_ignore_list = true;
break;
}
}
if (in_ignore_list)
break;
}
case IBV_EVENT_QP_REQ_ERR:
case IBV_EVENT_QP_ACCESS_ERR:
@ -457,7 +460,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
/* This Async event thread is handling all async event of
* all btls/devices in openib component
*/
void* btl_openib_async_thread(void * async)
static void* btl_openib_async_thread(void * async)
{
int rc;
int i;
@ -499,7 +502,7 @@ void* btl_openib_async_thread(void * async)
if (0 == i) {
/* 0 poll we use for comunication with main thread */
if (OMPI_SUCCESS != btl_openib_async_commandh(&devices_poll,
&ignore_qp_err_list)) {
&ignore_qp_err_list)) {
free(devices_poll.async_pollfd);
BTL_ERROR(("Failed to process async thread process. "
"Fatal error, stoping asynch event thread"));
@ -508,7 +511,7 @@ void* btl_openib_async_thread(void * async)
} else {
/* We get device event */
if (btl_openib_async_deviceh(&devices_poll, i,
&ignore_qp_err_list)) {
&ignore_qp_err_list)) {
free(devices_poll.async_pollfd);
BTL_ERROR(("Failed to process async thread process. "
"Fatal error, stoping asynch event thread"));
@ -676,4 +679,36 @@ void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t
}
#endif
int start_async_event_thread(void)
{
if (0 != mca_btl_openib_component.async_thread) {
return OMPI_SUCCESS;
}
/* Set the error counter to zero */
mca_btl_openib_component.error_counter = 0;
/* Create pipe for communication with async event thread */
if (pipe(mca_btl_openib_component.async_pipe)) {
BTL_ERROR(("Failed to create pipe for communication with "
"async event thread"));
return OMPI_ERROR;
}
if (pipe(mca_btl_openib_component.async_comp_pipe)) {
BTL_ERROR(("Failed to create comp pipe for communication with "
"main thread"));
return OMPI_ERROR;
}
/* Starting async event thread for the component */
if (pthread_create(&mca_btl_openib_component.async_thread, NULL,
(void*(*)(void*)) btl_openib_async_thread, NULL)) {
BTL_ERROR(("Failed to create async event thread"));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
#endif

Просмотреть файл

@ -13,7 +13,7 @@
#define MCA_BTL_OPENIB_ASYNC_H
#include "btl_openib_endpoint.h"
void* btl_openib_async_thread(void *one_hca);
int start_async_event_thread(void);
void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep);
int btl_openib_async_command_done(int exp);
#if HAVE_XRC

Просмотреть файл

@ -660,36 +660,6 @@ static inline int param_register_uint(const char* param_name, unsigned int defau
return *storage;
}
#if OPAL_HAVE_THREADS
static int start_async_event_thread(void)
{
/* Set the error counter to zero */
mca_btl_openib_component.error_counter = 0;
/* Create pipe for communication with async event thread */
if(pipe(mca_btl_openib_component.async_pipe)) {
BTL_ERROR(("Failed to create pipe for communication with "
"async event thread"));
return OMPI_ERROR;
}
if(pipe(mca_btl_openib_component.async_comp_pipe)) {
BTL_ERROR(("Failed to create comp pipe for communication with "
"main thread"));
return OMPI_ERROR;
}
/* Starting async event thread for the component */
if(pthread_create(&mca_btl_openib_component.async_thread, NULL,
(void*(*)(void*))btl_openib_async_thread, NULL)) {
BTL_ERROR(("Failed to create async event thread"));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
#endif
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
uint8_t port_num, uint16_t pkey_index,
struct ibv_port_attr *ib_port_attr)
@ -928,6 +898,7 @@ static void device_construct(mca_btl_openib_device_t *device)
OBJ_CONSTRUCT(&device->device_lock, opal_mutex_t);
OBJ_CONSTRUCT(&device->send_free_control, ompi_free_list_t);
device->max_inline_data = 0;
device->ready_for_use = false;
}
static void device_destruct(mca_btl_openib_device_t *device)
@ -1039,187 +1010,6 @@ device_error:
OBJ_CLASS_INSTANCE(mca_btl_openib_device_t, opal_object_t, device_construct,
device_destruct);
static int prepare_device_for_use(mca_btl_openib_device_t *device)
{
mca_btl_openib_frag_init_data_t *init_data;
int rc, qp, length;
#if OPAL_HAVE_THREADS
if(mca_btl_openib_component.use_async_event_thread) {
mca_btl_openib_async_cmd_t async_command;
if(0 == mca_btl_openib_component.async_thread) {
/* async thread is not yet started, so start it here */
if(start_async_event_thread() != OMPI_SUCCESS)
return OMPI_ERROR;
}
device->got_fatal_event = false;
device->got_port_event = false;
async_command.a_cmd = OPENIB_ASYNC_CMD_FD_ADD;
async_command.fd = device->ib_dev_context->async_fd;
if (write(mca_btl_openib_component.async_pipe[1],
&async_command, sizeof(mca_btl_openib_async_cmd_t))<0){
BTL_ERROR(("Failed to write to pipe [%d]",errno));
return OMPI_ERROR;
}
/* wait for ok from thread */
if (OMPI_SUCCESS !=
btl_openib_async_command_done(device->ib_dev_context->async_fd)) {
return OMPI_ERROR;
}
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* Prepare data for thread, but not starting it */
OBJ_CONSTRUCT(&device->thread, opal_thread_t);
device->thread.t_run = mca_btl_openib_progress_thread;
device->thread.t_arg = device;
device->progress = false;
#endif
#endif
#if HAVE_XRC
/* if user configured to run with XRC qp and the device doesn't
* support it - we should ignore this device. Maybe we have another
* one that has XRC support
*/
if (!(device->ib_dev_attr.device_cap_flags & IBV_DEVICE_XRC) &&
MCA_BTL_XRC_ENABLED) {
opal_show_help("help-mpi-btl-openib.txt",
"XRC on device without XRC support", true,
mca_btl_openib_component.num_xrc_qps,
ibv_get_device_name(device->ib_dev),
ompi_process_info.nodename);
return OMPI_ERROR;
}
if (MCA_BTL_XRC_ENABLED) {
if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(device)) {
BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));
return OMPI_ERROR;
}
}
#endif
device->endpoints = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(device->endpoints, 10, INT_MAX, 10);
opal_pointer_array_add(&mca_btl_openib_component.devices, device);
if (mca_btl_openib_component.max_eager_rdma > 0 &&
device->use_eager_rdma) {
device->eager_rdma_buffers =
(mca_btl_base_endpoint_t **) calloc(mca_btl_openib_component.max_eager_rdma * device->btls,
sizeof(mca_btl_openib_endpoint_t*));
if(NULL == device->eager_rdma_buffers) {
BTL_ERROR(("Memory allocation fails"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
if (NULL == init_data) {
if (mca_btl_openib_component.max_eager_rdma > 0 &&
device->use_eager_rdma) {
/* cleanup */
free (device->eager_rdma_buffers);
device->eager_rdma_buffers = NULL;
}
BTL_ERROR(("Memory allocation fails"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_footer_t) +
sizeof(mca_btl_openib_eager_rdma_header_t);
init_data->order = MCA_BTL_NO_ORDER;
init_data->list = &device->send_free_control;
rc = ompi_free_list_init_ex_new(&device->send_free_control,
sizeof(mca_btl_openib_send_control_frag_t), opal_cache_line_size,
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num, -1,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, mca_btl_openib_frag_init,
init_data);
if (OMPI_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we ran
out of registered memory, so show that error message */
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
errno = ENOMEM;
mca_btl_openib_show_init_error(__FILE__, __LINE__,
"ompi_free_list_init_ex_new",
ibv_get_device_name(device->ib_dev));
}
return rc;
}
/* setup all the qps */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
if (NULL == init_data) {
BTL_ERROR(("Memory allocation fails"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Initialize pool of send fragments */
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &device->qps[qp].send_free;
rc = ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_send_frag_t), opal_cache_line_size,
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, mca_btl_openib_frag_init,
init_data);
if (OMPI_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we
ran out of registered memory, so show that error
message */
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
errno = ENOMEM;
mca_btl_openib_show_init_error(__FILE__, __LINE__,
"ompi_free_list_init_ex_new",
ibv_get_device_name(device->ib_dev));
}
return OMPI_ERROR;
}
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &device->qps[qp].recv_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_recv_frag_t), opal_cache_line_size,
OBJ_CLASS(mca_btl_openib_recv_frag_t),
length, mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
}
return OMPI_SUCCESS;
}
static int
get_port_list(mca_btl_openib_device_t *device, int *allowed_ports)
{
@ -2922,44 +2712,6 @@ btl_openib_component_init(int *num_btl_modules,
goto no_btls;
}
++i;
/* For each btl module that we made - find every
base device that doesn't have device->qps setup on it yet (remember
that some modules may share the same device, so when going through
to loop, we may hit a device that was already setup earlier in
the loop).
We may to call for prepare_device_for_use() only after adding the btl
to mca_btl_openib_component.openib_btls, since the prepare_device_for_use
adds device to async thread that require access to
mca_btl_openib_component.openib_btls.
*/
if (NULL == device->qps) {
/* Setup the device qps info */
device->qps = (mca_btl_openib_device_qp_t*)
calloc(mca_btl_openib_component.num_qps,
sizeof(mca_btl_openib_device_qp_t));
if (NULL == device->qps) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
goto no_btls;
}
for (qp_index = 0; qp_index < mca_btl_openib_component.num_qps; qp_index++) {
OBJ_CONSTRUCT(&device->qps[qp_index].send_free, ompi_free_list_t);
OBJ_CONSTRUCT(&device->qps[qp_index].recv_free, ompi_free_list_t);
}
/* Do finial init on device */
ret = prepare_device_for_use(device);
if (OMPI_SUCCESS != ret) {
opal_show_help("help-mpi-btl-openib.txt",
"error in device init", true,
ompi_process_info.nodename,
ibv_get_device_name(device->ib_dev));
goto no_btls;
}
}
}
/* If we got nothing, then error out */
if (0 == i) {