btl/openib: Move free list memory allocation to add_procs
Per RFC which expired two weeks ago: We are planning to make a change to Open MPI to always set up the btls. This means the btl init will be called even if add_procs is never called for that btl. In the openib btl free lists fragments are currently allocated in btl_init. To avoid wasting that memory this commit moves that final device setup to the add_procs function. This included allocating free lists, and starting the async event thread. At this time this change is safe since we have a barrier after add_procs in MPI_Init. If this changes we will need to re-think some of the initialization since we might have the possibility of a connection request before add_procs is called. Tested with Mellanox ConnectX2 and QLogic HCAs. Commit also cleans up tabs in btl_openib_async.c. cmr=v1.7.5:reviewer=miked This commit was SVN r30122.
Этот коммит содержится в:
родитель
03c5791104
Коммит
5c8ea3a251
@ -52,6 +52,8 @@
|
||||
#include "btl_openib_proc.h"
|
||||
#include "btl_openib_endpoint.h"
|
||||
#include "btl_openib_xrc.h"
|
||||
#include "btl_openib_async.h"
|
||||
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
@ -689,6 +691,218 @@ static uint64_t calculate_max_reg (void)
|
||||
return (max_reg * 7) >> 3;
|
||||
}
|
||||
|
||||
static int prepare_device_for_use (mca_btl_openib_device_t *device)
|
||||
{
|
||||
mca_btl_openib_frag_init_data_t *init_data;
|
||||
int rc, length;
|
||||
|
||||
if (device->ready_for_use) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* For each btl module that we made - find every
|
||||
base device that doesn't have device->qps setup on it yet (remember
|
||||
that some modules may share the same device, so when going through
|
||||
to loop, we may hit a device that was already setup earlier in
|
||||
the loop).
|
||||
|
||||
We may to call for prepare_device_for_use() only after adding the btl
|
||||
to mca_btl_openib_component.openib_btls, since the prepare_device_for_use
|
||||
adds device to async thread that require access to
|
||||
mca_btl_openib_component.openib_btls.
|
||||
*/
|
||||
|
||||
/* Setup the device qps info */
|
||||
device->qps = (mca_btl_openib_device_qp_t*)
|
||||
calloc(mca_btl_openib_component.num_qps,
|
||||
sizeof(mca_btl_openib_device_qp_t));
|
||||
if (NULL == device->qps) {
|
||||
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for (int qp_index = 0 ; qp_index < mca_btl_openib_component.num_qps ; qp_index++) {
|
||||
OBJ_CONSTRUCT(&device->qps[qp_index].send_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&device->qps[qp_index].recv_free, ompi_free_list_t);
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_THREADS
|
||||
if(mca_btl_openib_component.use_async_event_thread) {
|
||||
mca_btl_openib_async_cmd_t async_command;
|
||||
|
||||
/* start the async even thread if it is not already started */
|
||||
if (start_async_event_thread() != OMPI_SUCCESS)
|
||||
return OMPI_ERROR;
|
||||
|
||||
device->got_fatal_event = false;
|
||||
device->got_port_event = false;
|
||||
async_command.a_cmd = OPENIB_ASYNC_CMD_FD_ADD;
|
||||
async_command.fd = device->ib_dev_context->async_fd;
|
||||
if (write(mca_btl_openib_component.async_pipe[1],
|
||||
&async_command, sizeof(mca_btl_openib_async_cmd_t))<0){
|
||||
BTL_ERROR(("Failed to write to pipe [%d]",errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
/* wait for ok from thread */
|
||||
if (OMPI_SUCCESS !=
|
||||
btl_openib_async_command_done(device->ib_dev_context->async_fd)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
/* Prepare data for thread, but not starting it */
|
||||
OBJ_CONSTRUCT(&device->thread, opal_thread_t);
|
||||
device->thread.t_run = mca_btl_openib_progress_thread;
|
||||
device->thread.t_arg = device;
|
||||
device->progress = false;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_XRC
|
||||
/* if user configured to run with XRC qp and the device doesn't
|
||||
* support it - we should ignore this device. Maybe we have another
|
||||
* one that has XRC support
|
||||
*/
|
||||
if (!(device->ib_dev_attr.device_cap_flags & IBV_DEVICE_XRC) &&
|
||||
MCA_BTL_XRC_ENABLED) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"XRC on device without XRC support", true,
|
||||
mca_btl_openib_component.num_xrc_qps,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
ompi_process_info.nodename);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (MCA_BTL_XRC_ENABLED) {
|
||||
if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(device)) {
|
||||
BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
device->endpoints = OBJ_NEW(opal_pointer_array_t);
|
||||
opal_pointer_array_init(device->endpoints, 10, INT_MAX, 10);
|
||||
opal_pointer_array_add(&mca_btl_openib_component.devices, device);
|
||||
if (mca_btl_openib_component.max_eager_rdma > 0 &&
|
||||
device->use_eager_rdma) {
|
||||
device->eager_rdma_buffers =
|
||||
(mca_btl_base_endpoint_t **) calloc(mca_btl_openib_component.max_eager_rdma * device->btls,
|
||||
sizeof(mca_btl_openib_endpoint_t*));
|
||||
if(NULL == device->eager_rdma_buffers) {
|
||||
BTL_ERROR(("Memory allocation fails"));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
if (NULL == init_data) {
|
||||
if (mca_btl_openib_component.max_eager_rdma > 0 &&
|
||||
device->use_eager_rdma) {
|
||||
/* cleanup */
|
||||
free (device->eager_rdma_buffers);
|
||||
device->eager_rdma_buffers = NULL;
|
||||
}
|
||||
BTL_ERROR(("Memory allocation fails"));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
sizeof(mca_btl_openib_eager_rdma_header_t);
|
||||
|
||||
init_data->order = MCA_BTL_NO_ORDER;
|
||||
init_data->list = &device->send_free_control;
|
||||
|
||||
rc = ompi_free_list_init_ex_new(&device->send_free_control,
|
||||
sizeof(mca_btl_openib_send_control_frag_t), opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
|
||||
mca_btl_openib_component.buffer_alignment,
|
||||
mca_btl_openib_component.ib_free_list_num, -1,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
/* If we're "out of memory", this usually means that we ran
|
||||
out of registered memory, so show that error message */
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||
errno = ENOMEM;
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ompi_free_list_init_ex_new",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* setup all the qps */
|
||||
for (int qp = 0 ; qp < mca_btl_openib_component.num_qps ; qp++) {
|
||||
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
if (NULL == init_data) {
|
||||
BTL_ERROR(("Memory allocation fails"));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Initialize pool of send fragments */
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
mca_btl_openib_component.qp_infos[qp].size;
|
||||
|
||||
init_data->order = qp;
|
||||
init_data->list = &device->qps[qp].send_free;
|
||||
|
||||
rc = ompi_free_list_init_ex_new(init_data->list,
|
||||
sizeof(mca_btl_openib_send_frag_t), opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
|
||||
mca_btl_openib_component.buffer_alignment,
|
||||
mca_btl_openib_component.ib_free_list_num,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
/* If we're "out of memory", this usually means that we
|
||||
ran out of registered memory, so show that error
|
||||
message */
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||
errno = ENOMEM;
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ompi_free_list_init_ex_new",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
mca_btl_openib_component.qp_infos[qp].size;
|
||||
|
||||
init_data->order = qp;
|
||||
init_data->list = &device->qps[qp].recv_free;
|
||||
|
||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
|
||||
sizeof(mca_btl_openib_recv_frag_t), opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_openib_recv_frag_t),
|
||||
length, mca_btl_openib_component.buffer_alignment,
|
||||
mca_btl_openib_component.ib_free_list_num,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
device->ready_for_use = true;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* add a proc to this btl module
|
||||
@ -732,6 +946,12 @@ int mca_btl_openib_add_procs(
|
||||
}
|
||||
#endif
|
||||
|
||||
rc = prepare_device_for_use (openib_btl->device);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_ERROR(("could not prepare openib device for use"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
|
||||
struct ompi_proc_t* ompi_proc = ompi_procs[i];
|
||||
mca_btl_openib_proc_t* ib_proc;
|
||||
|
@ -412,6 +412,8 @@ typedef struct mca_btl_openib_device_t {
|
||||
uint32_t max_inline_data;
|
||||
/* Registration limit and current count */
|
||||
uint64_t mem_reg_max, mem_reg_active;
|
||||
/* Device is ready for use */
|
||||
bool ready_for_use;
|
||||
} mca_btl_openib_device_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_openib_device_t);
|
||||
|
||||
|
@ -1,8 +1,11 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -47,7 +50,7 @@ static int return_status = OMPI_ERROR;
|
||||
static int btl_openib_async_poll_init(struct mca_btl_openib_async_poll *hcas_poll);
|
||||
static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *hcas_poll, opal_list_t *ignore_qp_err_list);
|
||||
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *hcas_poll, int index,
|
||||
opal_list_t *ignore_qp_err_list);
|
||||
opal_list_t *ignore_qp_err_list);
|
||||
static const char *openib_event_to_str (enum ibv_event_type event);
|
||||
static int send_command_comp(int in);
|
||||
|
||||
@ -171,8 +174,8 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
|
||||
|
||||
BTL_VERBOSE(("Got cmd %d", cmd.a_cmd));
|
||||
if (OPENIB_ASYNC_CMD_FD_ADD == cmd.a_cmd) {
|
||||
fd = cmd.fd;
|
||||
BTL_VERBOSE(("Got fd %d", fd));
|
||||
fd = cmd.fd;
|
||||
BTL_VERBOSE(("Got fd %d", fd));
|
||||
BTL_VERBOSE(("Adding device [%d] to async event poll[%d]",
|
||||
fd, devices_poll->active_poll_size));
|
||||
flags = fcntl(fd, F_GETFL);
|
||||
@ -204,8 +207,8 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
|
||||
} else if (OPENIB_ASYNC_CMD_FD_REMOVE == cmd.a_cmd) {
|
||||
bool fd_found = false;
|
||||
|
||||
fd = cmd.fd;
|
||||
BTL_VERBOSE(("Got fd %d", fd));
|
||||
fd = cmd.fd;
|
||||
BTL_VERBOSE(("Got fd %d", fd));
|
||||
|
||||
/* Removing device from poll */
|
||||
BTL_VERBOSE(("Removing device [%d] from async event poll [%d]",
|
||||
@ -232,24 +235,24 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
} else if (OPENIB_ASYNC_IGNORE_QP_ERR == cmd.a_cmd) {
|
||||
mca_btl_openib_qp_list *new_qp;
|
||||
new_qp = OBJ_NEW(mca_btl_openib_qp_list);
|
||||
BTL_VERBOSE(("Ignore errors on QP %p", (void *)cmd.qp));
|
||||
new_qp->qp = cmd.qp;
|
||||
opal_list_append(ignore_qp_err_list, (opal_list_item_t *)new_qp);
|
||||
send_command_comp(OPENIB_ASYNC_IGNORE_QP_ERR);
|
||||
mca_btl_openib_qp_list *new_qp;
|
||||
new_qp = OBJ_NEW(mca_btl_openib_qp_list);
|
||||
BTL_VERBOSE(("Ignore errors on QP %p", (void *)cmd.qp));
|
||||
new_qp->qp = cmd.qp;
|
||||
opal_list_append(ignore_qp_err_list, (opal_list_item_t *)new_qp);
|
||||
send_command_comp(OPENIB_ASYNC_IGNORE_QP_ERR);
|
||||
|
||||
} else if (OPENIB_ASYNC_THREAD_EXIT == cmd.a_cmd) {
|
||||
/* Got 0 - command to close the thread */
|
||||
opal_list_item_t *item;
|
||||
opal_list_item_t *item;
|
||||
BTL_VERBOSE(("Async event thread exit"));
|
||||
free(devices_poll->async_pollfd);
|
||||
return_status = OMPI_SUCCESS;
|
||||
|
||||
while ((item = opal_list_remove_first(ignore_qp_err_list))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(ignore_qp_err_list);
|
||||
while ((item = opal_list_remove_first(ignore_qp_err_list))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(ignore_qp_err_list);
|
||||
|
||||
pthread_exit(&return_status);
|
||||
}
|
||||
@ -315,7 +318,7 @@ srq_limit_event_exit:
|
||||
|
||||
/* Function handle async device events */
|
||||
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index,
|
||||
opal_list_t *ignore_qp_err_list)
|
||||
opal_list_t *ignore_qp_err_list)
|
||||
{
|
||||
int j;
|
||||
mca_btl_openib_device_t *device = NULL;
|
||||
@ -374,28 +377,28 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
|
||||
case IBV_EVENT_CQ_ERR:
|
||||
case IBV_EVENT_QP_FATAL:
|
||||
if (event_type == IBV_EVENT_QP_FATAL) {
|
||||
opal_list_item_t *item;
|
||||
mca_btl_openib_qp_list *qp_item;
|
||||
bool in_ignore_list = false;
|
||||
if (event_type == IBV_EVENT_QP_FATAL) {
|
||||
opal_list_item_t *item;
|
||||
mca_btl_openib_qp_list *qp_item;
|
||||
bool in_ignore_list = false;
|
||||
|
||||
BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp));
|
||||
BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp));
|
||||
|
||||
/* look through ignore list */
|
||||
for (item = opal_list_get_first(ignore_qp_err_list);
|
||||
item != opal_list_get_end(ignore_qp_err_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
qp_item = (mca_btl_openib_qp_list *)item;
|
||||
if (qp_item->qp == event.element.qp) {
|
||||
BTL_VERBOSE(("QP %p is in error ignore list",
|
||||
(void *)event.element.qp));
|
||||
in_ignore_list = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (in_ignore_list)
|
||||
break;
|
||||
}
|
||||
/* look through ignore list */
|
||||
for (item = opal_list_get_first(ignore_qp_err_list);
|
||||
item != opal_list_get_end(ignore_qp_err_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
qp_item = (mca_btl_openib_qp_list *)item;
|
||||
if (qp_item->qp == event.element.qp) {
|
||||
BTL_VERBOSE(("QP %p is in error ignore list",
|
||||
(void *)event.element.qp));
|
||||
in_ignore_list = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (in_ignore_list)
|
||||
break;
|
||||
}
|
||||
|
||||
case IBV_EVENT_QP_REQ_ERR:
|
||||
case IBV_EVENT_QP_ACCESS_ERR:
|
||||
@ -457,7 +460,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
/* This Async event thread is handling all async event of
|
||||
* all btls/devices in openib component
|
||||
*/
|
||||
void* btl_openib_async_thread(void * async)
|
||||
static void* btl_openib_async_thread(void * async)
|
||||
{
|
||||
int rc;
|
||||
int i;
|
||||
@ -499,7 +502,7 @@ void* btl_openib_async_thread(void * async)
|
||||
if (0 == i) {
|
||||
/* 0 poll we use for comunication with main thread */
|
||||
if (OMPI_SUCCESS != btl_openib_async_commandh(&devices_poll,
|
||||
&ignore_qp_err_list)) {
|
||||
&ignore_qp_err_list)) {
|
||||
free(devices_poll.async_pollfd);
|
||||
BTL_ERROR(("Failed to process async thread process. "
|
||||
"Fatal error, stoping asynch event thread"));
|
||||
@ -508,7 +511,7 @@ void* btl_openib_async_thread(void * async)
|
||||
} else {
|
||||
/* We get device event */
|
||||
if (btl_openib_async_deviceh(&devices_poll, i,
|
||||
&ignore_qp_err_list)) {
|
||||
&ignore_qp_err_list)) {
|
||||
free(devices_poll.async_pollfd);
|
||||
BTL_ERROR(("Failed to process async thread process. "
|
||||
"Fatal error, stoping asynch event thread"));
|
||||
@ -676,4 +679,36 @@ void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t
|
||||
}
|
||||
#endif
|
||||
|
||||
int start_async_event_thread(void)
|
||||
{
|
||||
if (0 != mca_btl_openib_component.async_thread) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set the error counter to zero */
|
||||
mca_btl_openib_component.error_counter = 0;
|
||||
|
||||
/* Create pipe for communication with async event thread */
|
||||
if (pipe(mca_btl_openib_component.async_pipe)) {
|
||||
BTL_ERROR(("Failed to create pipe for communication with "
|
||||
"async event thread"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (pipe(mca_btl_openib_component.async_comp_pipe)) {
|
||||
BTL_ERROR(("Failed to create comp pipe for communication with "
|
||||
"main thread"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Starting async event thread for the component */
|
||||
if (pthread_create(&mca_btl_openib_component.async_thread, NULL,
|
||||
(void*(*)(void*)) btl_openib_async_thread, NULL)) {
|
||||
BTL_ERROR(("Failed to create async event thread"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -13,7 +13,7 @@
|
||||
#define MCA_BTL_OPENIB_ASYNC_H
|
||||
#include "btl_openib_endpoint.h"
|
||||
|
||||
void* btl_openib_async_thread(void *one_hca);
|
||||
int start_async_event_thread(void);
|
||||
void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep);
|
||||
int btl_openib_async_command_done(int exp);
|
||||
#if HAVE_XRC
|
||||
|
@ -660,36 +660,6 @@ static inline int param_register_uint(const char* param_name, unsigned int defau
|
||||
return *storage;
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_THREADS
|
||||
static int start_async_event_thread(void)
|
||||
{
|
||||
/* Set the error counter to zero */
|
||||
mca_btl_openib_component.error_counter = 0;
|
||||
|
||||
/* Create pipe for communication with async event thread */
|
||||
if(pipe(mca_btl_openib_component.async_pipe)) {
|
||||
BTL_ERROR(("Failed to create pipe for communication with "
|
||||
"async event thread"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if(pipe(mca_btl_openib_component.async_comp_pipe)) {
|
||||
BTL_ERROR(("Failed to create comp pipe for communication with "
|
||||
"main thread"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Starting async event thread for the component */
|
||||
if(pthread_create(&mca_btl_openib_component.async_thread, NULL,
|
||||
(void*(*)(void*))btl_openib_async_thread, NULL)) {
|
||||
BTL_ERROR(("Failed to create async event thread"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
uint8_t port_num, uint16_t pkey_index,
|
||||
struct ibv_port_attr *ib_port_attr)
|
||||
@ -928,6 +898,7 @@ static void device_construct(mca_btl_openib_device_t *device)
|
||||
OBJ_CONSTRUCT(&device->device_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&device->send_free_control, ompi_free_list_t);
|
||||
device->max_inline_data = 0;
|
||||
device->ready_for_use = false;
|
||||
}
|
||||
|
||||
static void device_destruct(mca_btl_openib_device_t *device)
|
||||
@ -1039,187 +1010,6 @@ device_error:
|
||||
OBJ_CLASS_INSTANCE(mca_btl_openib_device_t, opal_object_t, device_construct,
|
||||
device_destruct);
|
||||
|
||||
static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||
{
|
||||
mca_btl_openib_frag_init_data_t *init_data;
|
||||
int rc, qp, length;
|
||||
|
||||
#if OPAL_HAVE_THREADS
|
||||
if(mca_btl_openib_component.use_async_event_thread) {
|
||||
mca_btl_openib_async_cmd_t async_command;
|
||||
if(0 == mca_btl_openib_component.async_thread) {
|
||||
/* async thread is not yet started, so start it here */
|
||||
if(start_async_event_thread() != OMPI_SUCCESS)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
device->got_fatal_event = false;
|
||||
device->got_port_event = false;
|
||||
async_command.a_cmd = OPENIB_ASYNC_CMD_FD_ADD;
|
||||
async_command.fd = device->ib_dev_context->async_fd;
|
||||
if (write(mca_btl_openib_component.async_pipe[1],
|
||||
&async_command, sizeof(mca_btl_openib_async_cmd_t))<0){
|
||||
BTL_ERROR(("Failed to write to pipe [%d]",errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
/* wait for ok from thread */
|
||||
if (OMPI_SUCCESS !=
|
||||
btl_openib_async_command_done(device->ib_dev_context->async_fd)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
/* Prepare data for thread, but not starting it */
|
||||
OBJ_CONSTRUCT(&device->thread, opal_thread_t);
|
||||
device->thread.t_run = mca_btl_openib_progress_thread;
|
||||
device->thread.t_arg = device;
|
||||
device->progress = false;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_XRC
|
||||
/* if user configured to run with XRC qp and the device doesn't
|
||||
* support it - we should ignore this device. Maybe we have another
|
||||
* one that has XRC support
|
||||
*/
|
||||
if (!(device->ib_dev_attr.device_cap_flags & IBV_DEVICE_XRC) &&
|
||||
MCA_BTL_XRC_ENABLED) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"XRC on device without XRC support", true,
|
||||
mca_btl_openib_component.num_xrc_qps,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
ompi_process_info.nodename);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (MCA_BTL_XRC_ENABLED) {
|
||||
if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(device)) {
|
||||
BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
device->endpoints = OBJ_NEW(opal_pointer_array_t);
|
||||
opal_pointer_array_init(device->endpoints, 10, INT_MAX, 10);
|
||||
opal_pointer_array_add(&mca_btl_openib_component.devices, device);
|
||||
if (mca_btl_openib_component.max_eager_rdma > 0 &&
|
||||
device->use_eager_rdma) {
|
||||
device->eager_rdma_buffers =
|
||||
(mca_btl_base_endpoint_t **) calloc(mca_btl_openib_component.max_eager_rdma * device->btls,
|
||||
sizeof(mca_btl_openib_endpoint_t*));
|
||||
if(NULL == device->eager_rdma_buffers) {
|
||||
BTL_ERROR(("Memory allocation fails"));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
if (NULL == init_data) {
|
||||
if (mca_btl_openib_component.max_eager_rdma > 0 &&
|
||||
device->use_eager_rdma) {
|
||||
/* cleanup */
|
||||
free (device->eager_rdma_buffers);
|
||||
device->eager_rdma_buffers = NULL;
|
||||
}
|
||||
BTL_ERROR(("Memory allocation fails"));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
sizeof(mca_btl_openib_eager_rdma_header_t);
|
||||
|
||||
init_data->order = MCA_BTL_NO_ORDER;
|
||||
init_data->list = &device->send_free_control;
|
||||
|
||||
rc = ompi_free_list_init_ex_new(&device->send_free_control,
|
||||
sizeof(mca_btl_openib_send_control_frag_t), opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
|
||||
mca_btl_openib_component.buffer_alignment,
|
||||
mca_btl_openib_component.ib_free_list_num, -1,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
/* If we're "out of memory", this usually means that we ran
|
||||
out of registered memory, so show that error message */
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||
errno = ENOMEM;
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ompi_free_list_init_ex_new",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* setup all the qps */
|
||||
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
|
||||
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
if (NULL == init_data) {
|
||||
BTL_ERROR(("Memory allocation fails"));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Initialize pool of send fragments */
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
mca_btl_openib_component.qp_infos[qp].size;
|
||||
|
||||
init_data->order = qp;
|
||||
init_data->list = &device->qps[qp].send_free;
|
||||
|
||||
rc = ompi_free_list_init_ex_new(init_data->list,
|
||||
sizeof(mca_btl_openib_send_frag_t), opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
|
||||
mca_btl_openib_component.buffer_alignment,
|
||||
mca_btl_openib_component.ib_free_list_num,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
/* If we're "out of memory", this usually means that we
|
||||
ran out of registered memory, so show that error
|
||||
message */
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||
errno = ENOMEM;
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ompi_free_list_init_ex_new",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
mca_btl_openib_component.qp_infos[qp].size;
|
||||
|
||||
init_data->order = qp;
|
||||
init_data->list = &device->qps[qp].recv_free;
|
||||
|
||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
|
||||
sizeof(mca_btl_openib_recv_frag_t), opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_openib_recv_frag_t),
|
||||
length, mca_btl_openib_component.buffer_alignment,
|
||||
mca_btl_openib_component.ib_free_list_num,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
get_port_list(mca_btl_openib_device_t *device, int *allowed_ports)
|
||||
{
|
||||
@ -2922,44 +2712,6 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
goto no_btls;
|
||||
}
|
||||
++i;
|
||||
|
||||
/* For each btl module that we made - find every
|
||||
base device that doesn't have device->qps setup on it yet (remember
|
||||
that some modules may share the same device, so when going through
|
||||
to loop, we may hit a device that was already setup earlier in
|
||||
the loop).
|
||||
|
||||
We may to call for prepare_device_for_use() only after adding the btl
|
||||
to mca_btl_openib_component.openib_btls, since the prepare_device_for_use
|
||||
adds device to async thread that require access to
|
||||
mca_btl_openib_component.openib_btls.
|
||||
*/
|
||||
|
||||
if (NULL == device->qps) {
|
||||
/* Setup the device qps info */
|
||||
device->qps = (mca_btl_openib_device_qp_t*)
|
||||
calloc(mca_btl_openib_component.num_qps,
|
||||
sizeof(mca_btl_openib_device_qp_t));
|
||||
if (NULL == device->qps) {
|
||||
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
for (qp_index = 0; qp_index < mca_btl_openib_component.num_qps; qp_index++) {
|
||||
OBJ_CONSTRUCT(&device->qps[qp_index].send_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&device->qps[qp_index].recv_free, ompi_free_list_t);
|
||||
}
|
||||
|
||||
/* Do finial init on device */
|
||||
ret = prepare_device_for_use(device);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
goto no_btls;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If we got nothing, then error out */
|
||||
if (0 == i) {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user