1
1

Merge pull request #2336 from larrystevenwise/master

openib btl: remove BTL_OPENIB_FAILOVER_ENABLED code
Этот коммит содержится в:
Jeff Squyres 2016-11-02 10:27:26 -04:00 коммит произвёл GitHub
родитель 479fe59d9e 7050969d47
Коммит 27fcd2d6ba
10 изменённых файлов: 0 добавлений и 959 удалений

Просмотреть файл

@ -68,13 +68,6 @@ sources = \
connect/btl_openib_connect_empty.h \ connect/btl_openib_connect_empty.h \
connect/connect.h connect/connect.h
# If we have failover support, build that file
if MCA_btl_openib_enable_failover
sources += \
btl_openib_failover.c \
btl_openib_failover.h
endif
# If we have rdmacm support, build that CPC # If we have rdmacm support, build that CPC
if MCA_btl_openib_have_rdmacm if MCA_btl_openib_have_rdmacm
sources += \ sources += \

Просмотреть файл

@ -1850,23 +1850,13 @@ int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
assert(max_data == payload_size); assert(max_data == payload_size);
} }
#if BTL_OPENIB_FAILOVER_ENABLED
send_signaled = 1;
#else
send_signaled = qp_need_signal(ep, qp, payload_size + header_size, do_rdma); send_signaled = qp_need_signal(ep, qp, payload_size + header_size, do_rdma);
#endif
ib_rc = post_send(ep, to_send_frag(item), do_rdma, send_signaled); ib_rc = post_send(ep, to_send_frag(item), do_rdma, send_signaled);
if (!ib_rc) { if (!ib_rc) {
if (0 == send_signaled) { if (0 == send_signaled) {
MCA_BTL_IB_FRAG_RETURN(frag); MCA_BTL_IB_FRAG_RETURN(frag);
} }
#if BTL_OPENIB_FAILOVER_ENABLED
else {
/* Return up in case needed for failover */
*descriptor = (struct mca_btl_base_descriptor_t *) frag;
}
#endif
OPAL_THREAD_UNLOCK(&ep->endpoint_lock); OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
return OPAL_SUCCESS; return OPAL_SUCCESS;

Просмотреть файл

@ -241,9 +241,6 @@ struct mca_btl_openib_component_t {
opal_event_base_t *async_evbase; /**< Async event base */ opal_event_base_t *async_evbase; /**< Async event base */
bool use_async_event_thread; /**< Use the async event handler */ bool use_async_event_thread; /**< Use the async event handler */
mca_btl_openib_srq_manager_t srq_manager; /**< Hash table for all BTL SRQs */ mca_btl_openib_srq_manager_t srq_manager; /**< Hash table for all BTL SRQs */
#if BTL_OPENIB_FAILOVER_ENABLED
bool port_error_failover; /**< Report port errors to speed up failover */
#endif
/* declare as an int instead of btl_openib_device_type_t since there is no /* declare as an int instead of btl_openib_device_type_t since there is no
guarantee about the size of an enum. this value will be registered as an guarantee about the size of an enum. this value will be registered as an
integer with the MCA variable system */ integer with the MCA variable system */
@ -310,9 +307,6 @@ struct mca_btl_openib_component_t {
int memory_registration_verbose_level; int memory_registration_verbose_level;
int memory_registration_verbose; int memory_registration_verbose;
int ignore_locality; int ignore_locality;
#if BTL_OPENIB_FAILOVER_ENABLED
int verbose_failover;
#endif
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
bool cuda_async_send; bool cuda_async_send;
bool cuda_async_recv; bool cuda_async_recv;

Просмотреть файл

@ -84,9 +84,6 @@
#include "btl_openib_ini.h" #include "btl_openib_ini.h"
#include "btl_openib_mca.h" #include "btl_openib_mca.h"
#include "btl_openib_xrc.h" #include "btl_openib_xrc.h"
#if BTL_OPENIB_FAILOVER_ENABLED
#include "btl_openib_failover.h"
#endif
#include "btl_openib_async.h" #include "btl_openib_async.h"
#include "connect/base.h" #include "connect/base.h"
#include "btl_openib_ip.h" #include "btl_openib_ip.h"
@ -504,12 +501,6 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
mca_btl_openib_endpoint_connected(ep); mca_btl_openib_endpoint_connected(ep);
} }
break; break;
#if BTL_OPENIB_FAILOVER_ENABLED
case MCA_BTL_OPENIB_CONTROL_EP_BROKEN:
case MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR:
btl_openib_handle_failover_control_messages(ctl_hdr, ep);
break;
#endif
default: default:
BTL_ERROR(("Unknown message type received by BTL")); BTL_ERROR(("Unknown message type received by BTL"));
break; break;
@ -3452,20 +3443,8 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
opal_list_item_t *i; opal_list_item_t *i;
while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) { while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) {
btl_ownership = (to_base_frag(i)->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); btl_ownership = (to_base_frag(i)->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
#if BTL_OPENIB_FAILOVER_ENABLED
/* The check for the callback flag is only needed when running
* with the failover case because there is a chance that a fragment
* generated from a sendi call (which does not set the flag) gets
* coalesced. In normal operation, this cannot happen as the sendi
* call will never queue up a fragment which could potentially become
* a coalesced fragment. It will revert to a regular send. */
if (to_base_frag(i)->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
#endif
to_base_frag(i)->base.des_cbfunc(&openib_btl->super, endpoint, to_base_frag(i)->base.des_cbfunc(&openib_btl->super, endpoint,
&to_base_frag(i)->base, OPAL_SUCCESS); &to_base_frag(i)->base, OPAL_SUCCESS);
#if BTL_OPENIB_FAILOVER_ENABLED
}
#endif
if( btl_ownership ) { if( btl_ownership ) {
mca_btl_openib_free(&openib_btl->super, &to_base_frag(i)->base); mca_btl_openib_free(&openib_btl->super, &to_base_frag(i)->base);
} }
@ -3590,14 +3569,9 @@ error:
} }
} }
#if BTL_OPENIB_FAILOVER_ENABLED
mca_btl_openib_handle_endpoint_error(openib_btl, des, qp,
remote_proc, endpoint);
#else
if(openib_btl) if(openib_btl)
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL, openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL,
(struct opal_proc_t*)remote_proc, NULL); (struct opal_proc_t*)remote_proc, NULL);
#endif
} }
static int poll_device(mca_btl_openib_device_t* device, int count) static int poll_device(mca_btl_openib_device_t* device, int count)
@ -3808,9 +3782,6 @@ error:
if(openib_btl->device->got_port_event) { if(openib_btl->device->got_port_event) {
/* These are non-fatal so just ignore it. */ /* These are non-fatal so just ignore it. */
openib_btl->device->got_port_event = false; openib_btl->device->got_port_event = false;
#if BTL_OPENIB_FAILOVER_ENABLED
mca_btl_openib_handle_btl_error(openib_btl);
#endif
} }
} }
return count; return count;

Просмотреть файл

@ -584,13 +584,6 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
BTL_OPENIB_FOOTER_HTON(*ftr); BTL_OPENIB_FOOTER_HTON(*ftr);
sr_desc->wr.rdma.rkey = ep->eager_rdma_remote.rkey; sr_desc->wr.rdma.rkey = ep->eager_rdma_remote.rkey;
#if BTL_OPENIB_FAILOVER_ENABLED
/* frag->ftr is unused on the sending fragment, so use it
* to indicate it is an eager fragment. A non-zero value
* indicates it is eager, and the value indicates the
* location in the eager RDMA array that it lives. */
frag->ftr = (mca_btl_openib_footer_t*)(long)(1 + head);
#endif
sr_desc->wr.rdma.remote_addr = sr_desc->wr.rdma.remote_addr =
ep->eager_rdma_remote.base.lval + ep->eager_rdma_remote.base.lval +
head * openib_btl->eager_rdma_frag_size + head * openib_btl->eager_rdma_frag_size +

Просмотреть файл

@ -1,790 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
* Functions specific to implementing failover support.
*
* This file is conditionally copiled into the BTL when one configures
* it in with --enable-openib-failover. When this file is compiled
* in, the multi-BTL configurations can handle errors. The
* requirement is that there needs to be more than one openib BTL in
* use so that all the traffic can move to the other BTL. This does
* not support failing over to a different BTL like TCP.
*/
#include "opal_config.h"
#include "opal_stdint.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_proc.h"
#include "btl_openib_failover.h"
static void error_out_all_pending_frags(mca_btl_base_endpoint_t *ep,
struct mca_btl_base_module_t* module,
bool errout);
static void mca_btl_openib_endpoint_notify(mca_btl_openib_endpoint_t *endpoint,
uint8_t type, int index);
/* debug functions that are normally not needed */
void mca_btl_openib_dump_all_local_rdma_frags(mca_btl_openib_device_t *device);
void mca_btl_openib_dump_all_internal_queues(bool errout);
static void dump_local_rdma_frags(mca_btl_openib_endpoint_t * endpoint);
/**
* This function is called when we get an error on the completion
* event of a fragment. We check to see what type of fragment it is
* and act accordingly. In most cases, we first call up into the PML
* and have it map out this connection for any future communication.
* In addition, this function will possibly send some control messages
* over the other openib BTL. The first control message will tell the
* remote side to also map out this connection. The second control
* message makes sure the eager RDMA connection remains in a sane
* state. See that function for more details.
* @param openib_btl Pointer to BTL that had the error
* @param des Pointer to descriptor that had the error
* @param qp Queue pair that had the error
* @param remote_proc Pointer to process that had the error
* @param endpoint Pointer to endpoint that had the error
*/
void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
mca_btl_base_descriptor_t *des,
int qp,
opal_proc_t* remote_proc,
mca_btl_openib_endpoint_t* endpoint)
{
char *btlname = NULL;
int btl_ownership;
/* Since this BTL supports failover, it will call the PML error handler
* function with the NONFATAL flag. If the PML is running with failover
* support, then it will map out the endpoint for further communication
* and return control here. If the PML does not have failover support,
* it will abort the job and control will not return here. */
/* Note: At this point, what needs to be done is based on the type
* of openib fragment that got the error. Also note that in the wc
* struct, when wc->status != IBV_WC_SUCCESS, these are the only
* valid fields: wc->wr_id, wc->status, wc->vendor_err, wc->qp_num.
* This means that one cannot key off of the wc->opcode to see what
* operation was done. The important information needs to be read
* from the fragment. */
/* Cannot issue callback to SRQ errors because the shared receive
* queue is shared and is not specific to a connection. There is no
* way to figure out what type of message created the error because
* we need the information in the wc->imm_data field which does not
* exist when we have an error. So, nothing to do here but return. */
if ((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) &&
!BTL_OPENIB_QP_TYPE_PP(qp)) {
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"SRQ RECV type=%d", openib_frag_type(des));
/* Need to think about returning any shared resources of the
* SRQ. For now, we do nothing as we rarely see an error on
* the SRQ. */
return;
}
assert(NULL != remote_proc);
/* Create a nice string to help with debug */
if (NULL != openib_btl) {
asprintf(&btlname, "lid=%d:name=%s",
openib_btl->lid, openib_btl->device->ib_dev->name);
}
/* The next set of errors are associated with an endpoint, but not
* with a PML descriptor. They are not associated with a PML
* descriptor because:
* A. It was a receive
* B. It was some type of openib specific control message.
* Therefore, just drop the fragments and call up into the PML to
* disable this endpoint for future communication. */
if (((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) &&
(BTL_OPENIB_QP_TYPE_PP(qp))) ||
(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_CONTROL) ||
(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_EAGER_RDMA)) {
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL,
remote_proc, btlname);
/* Now that this connection has been mapped out at the PML layer,
* we change the state in the BTL layer. The change in the PML
* layer should prevent that we ever try to send on this BTL
* again. If we do, then this is an error case. */
if (MCA_BTL_IB_FAILED != endpoint->endpoint_state) {
endpoint->endpoint_state = MCA_BTL_IB_FAILED;
mca_btl_openib_endpoint_notify(endpoint, MCA_BTL_OPENIB_CONTROL_EP_BROKEN, 0);
error_out_all_pending_frags(endpoint, &openib_btl->super, true);
}
opal_output_verbose(60, mca_btl_openib_component.verbose_failover,
"MCA_BTL_OPENIG_FRAG=%d, "
"dropping since connection is broken (des=%lx)",
openib_frag_type(des), (long unsigned int) des);
if (NULL != btlname) free(btlname);
return;
}
/* These are RDMA read type fragments. Just continue with processing */
if (openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV_USER) {
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"OPENIB_FRAG_RECV_USER fragment, "
"btl=%lx, continue with callbacks",
(long unsigned int) &openib_btl->super);
}
/* If we are at this point, we have completed a send, RDMA read or
* RDMA write. Call the PML callback function to map out this
* btl for further sending. We just call this every time we get an
* error even though it is not necessary. Subsequent calls with
* the same remote_proc argument will not actually map anything out. */
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL,
remote_proc, btlname);
if (NULL != btlname) free(btlname);
/* Since we believe we have done a send, read or write, then the
* des_segments fields should have valid data. */
assert(des->des_segments != NULL);
/* If the endpoint is not yet in the MCA_BTL_IB_CLOSED state, then
* change the status. Since this connection was mapped out in the
* PML layer, no more attempts should be made to send on it. In
* addition, send a message to other end of the connection letting
* it know that this side is now broken. This is needed in the case
* of a spurious error which may not cause the remote side to detect
* the error. */
if (MCA_BTL_IB_FAILED != endpoint->endpoint_state) {
endpoint->endpoint_state = MCA_BTL_IB_FAILED;
mca_btl_openib_endpoint_notify(endpoint, MCA_BTL_OPENIB_CONTROL_EP_BROKEN, 0);
}
/* Now, call the callback function associated with the fragment.
* In case the fragments were coalesced we need to pull them apart
* and call the callback function for each one. */
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) {
opal_list_item_t *i;
while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) {
btl_ownership = (to_base_frag(i)->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
to_base_frag(i)->base.des_cbfunc(&openib_btl->super, endpoint,
&to_base_frag(i)->base, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(&openib_btl->super, &to_base_frag(i)->base);
}
}
}
/* This must be a MCA_BTL_OPENIB_FRAG_SEND, MCA_BTL_OPENIB_FRAG_SEND_USER
* or MCA_BTL_OPENIB_FRAG_RECV_USER. */
btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
des->des_cbfunc(&openib_btl->super, endpoint, des, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(&openib_btl->super, des);
}
/* Here we send another control message to notify the remote side
* we had an error on a eager fragment. A non-zero value for the
* ftr variable indicates that this was an eager RDMA fragment.
* We need to do this in case the eager RDMA fragment after this
* one actually made it successfully. */
if (0 != to_send_frag(des)->ftr) {
mca_btl_openib_endpoint_notify(endpoint,
MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR,
(long)to_send_frag(des)->ftr - 1);
}
/* We know we have completed a send so return some resources even
* though connection is broken. With SRQ, the resources are shared
* so if we do not return the credits we may not be allowed to send
* anymore. */
qp_put_wqe(endpoint, qp);
if((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) && !BTL_OPENIB_QP_TYPE_PP(qp)) {
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
}
/* There are several queues associated with an endpoint that may
* have some unsent fragments sitting in them. Remove them and
* call the callback functions with an error so the PML can send
* them down a different path. This really only needs to be called
* once on an endpoint, but for now, just call it a bunch of times.
* The first time through will remove the unsent fragments so
* subsequent calls are no-ops. */
if (endpoint) {
error_out_all_pending_frags(endpoint, &openib_btl->super, true);
}
}
/**
* This functions allows an error to map out the entire BTL. First a
* call is made up to the PML to map out all connections from this BTL.
* Then a message is sent to all the endpoints connected to this BTL.
* This function is enabled by the btl_openib_port_error_failover
* MCA parameter. If that parameter is not set, then this function
* does not do anything.
* @param openib_btl Pointer to BTL that had the error
*/
void mca_btl_openib_handle_btl_error(mca_btl_openib_module_t* openib_btl) {
mca_btl_base_endpoint_t* endpoint;
int i;
/* Check to see that the flag is set for the entire map out. */
if(mca_btl_openib_component.port_error_failover) {
/* Since we are not specifying a specific connection to bring down,
* the PML layer will may out the entire BTL for future communication. */
char *btlname = NULL;
asprintf(&btlname, "lid=%d:name=%s",
openib_btl->lid, openib_btl->device->ib_dev->name);
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL,
NULL, btlname);
if (NULL != btlname) free(btlname);
/* Now send out messages to all endpoints that we are disconnecting.
* Only do this to endpoints that are connected. Otherwise, the
* remote side does not yet have the information on this endpoint. */
for (i = 0; i < opal_pointer_array_get_size(openib_btl->device->endpoints); i++) {
endpoint = (mca_btl_openib_endpoint_t*)
opal_pointer_array_get_item(openib_btl->device->endpoints, i);
if (NULL == endpoint) {
continue;
}
if (MCA_BTL_IB_CONNECTED == endpoint->endpoint_state) {
mca_btl_openib_endpoint_notify(endpoint, MCA_BTL_OPENIB_CONTROL_EP_BROKEN, 0);
endpoint->endpoint_state = MCA_BTL_IB_FAILED;
error_out_all_pending_frags(endpoint, &openib_btl->super, true);
}
}
}
}
/**
* This function gets called when a control message is received that
* is one of the following types:
* MCA_BTL_OPENIB_CONTROL_EP_BROKEN
* MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR message
* Note that we are using the working connection to send information
* about the broken connection. That is why we have to look at the
* various information in the control message to figure out which
* endpoint is broken. It is (obviously) not the one the message was
* received on, because we would not have received the message in that
* case. In the case of the BROKEN message, that means the remote
* side is notifying us that it has brought down its half of the
* connection. Therefore, we need to bring out half down. This is
* done because it has been observed that there are cases where only
* one side of the connection actually sees the error. This means we
* can be left in a state where one side believes it has two BTLs, but
* the other side believes it only has one. This can cause problems.
* In the case of the EAGER_RDMA_ERROR, see elsewhere in the code what
* we are doing.
* @param ctl_hdr Pointer control header that was received
*/
void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t *ctl_hdr,
mca_btl_openib_endpoint_t* ep)
{
mca_btl_openib_broken_connection_header_t *bc_hdr =
(mca_btl_openib_broken_connection_header_t*)ctl_hdr;
int i;
int found = false;
if(ep->nbo) {
BTL_OPENIB_BROKEN_CONNECTION_HEADER_NTOH((*bc_hdr));
}
opal_output_verbose(30, mca_btl_openib_component.verbose_failover,
"IB: Control message received from %d: lid=%d,subnet=0x%" PRIx64 "",
bc_hdr->vpid, bc_hdr->lid, bc_hdr->subnet_id);
/* Now we walk through all the endpoints on all the BTLs to
* find out which one to map out. */
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
mca_btl_openib_module_t* newbtl;
int j;
newbtl = mca_btl_openib_component.openib_btls[i];
/* Now, find the endpoint associated with it */
for (j = 0; j < opal_pointer_array_get_size(newbtl->device->endpoints); j++) {
mca_btl_base_endpoint_t* newep;
newep = (mca_btl_openib_endpoint_t*)
opal_pointer_array_get_item(newbtl->device->endpoints, j);
if (NULL == newep) {
continue;
}
/* Now compare the LID, subnet ID, and the vpid we received
* from the remote side and try to match it to an endpoint. */
if ((bc_hdr->lid == newep->rem_info.rem_lid) &&
(bc_hdr->subnet_id == newep->rem_info.rem_subnet_id) &&
(bc_hdr->vpid == newep->endpoint_proc->proc_opal->proc_name.vpid)) {
opal_output_verbose(30, mca_btl_openib_component.verbose_failover,
"IB: Control message received from %d: "
"found match: lid=%d,"
"subnet=0x%" PRIx64 ",endpoint_state=%d",
newep->endpoint_proc->proc_opal->proc_name.vpid,
newep->rem_info.rem_lid,
newep->rem_info.rem_subnet_id,
newep->endpoint_state);
found = true;
/* At this point, we have found the endpoint. Now decode the
* message type and do the appropriate action. */
if (MCA_BTL_OPENIB_CONTROL_EP_BROKEN == ctl_hdr->type) {
/* Now that we found a match, check the state of the
* endpoint to see it is already in a failed state.
* If not, then notify the upper layer and error out
* any pending fragments. */
if (MCA_BTL_IB_FAILED == newep->endpoint_state) {
return;
} else {
char *btlname = NULL;
opal_proc_t* remote_proc = NULL;
asprintf(&btlname, "lid=%d:name=%s",
newbtl->lid, newbtl->device->ib_dev->name);
remote_proc = newep->endpoint_proc->proc_opal;
opal_output_verbose(10, mca_btl_openib_component.verbose_failover,
"IB: Control message received from %d: "
"bringing down connection,lid=%d,"
"subnet=0x%" PRIx64 ",endpoint_state=%d",
newep->endpoint_proc->proc_opal->proc_name.vpid,
newep->rem_info.rem_lid,
newep->rem_info.rem_subnet_id,
newep->endpoint_state);
newbtl->error_cb(&newbtl->super, MCA_BTL_ERROR_FLAGS_NONFATAL,
remote_proc, btlname);
if (NULL != btlname) free(btlname);
error_out_all_pending_frags(newep, &newbtl->super, true);
newep->endpoint_state = MCA_BTL_IB_FAILED;
return;
}
} else { /* MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR message */
/* If we are still pointing at the location where
* we detected an error on the remote side, then
* bump the index by one. */
if (newep->eager_rdma_local.head == (uint16_t)bc_hdr->index) {
/* Adjust the local head by one just in case */
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(newep->eager_rdma_local.head);
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"IB: rank=%d, control message (remote=%d), "
"moved local head by one (new=%d)",
OPAL_PROC_MY_NAME.vpid,
newep->endpoint_proc->proc_opal->proc_name.vpid,
newep->eager_rdma_local.head);
} else {
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"IB: rank=%d, control message (remote=%d), "
"did not move local head by one (still=%d)",
OPAL_PROC_MY_NAME.vpid,
newep->endpoint_proc->proc_opal->proc_name.vpid,
newep->eager_rdma_local.head);
}
}
break; /* since we found the endpoint */
}
}
}
if (false == found) {
opal_output_verbose(30, mca_btl_openib_component.verbose_failover,
"IB: Control message: no match found");
}
}
/**
* This function will find all the pending fragments on an endpoint
* and call the callback function with OPAL_ERROR. It walks through
* each qp with each priority and looks for both no_credits_pending_frags
* and no_wqe_pending_frags. It then looks for any pending_lazy_frags,
* pending_put_frags, and pending_get_frags. This function is only
* called when running with failover support enabled. Note that
* the errout parameter allows the function to also be used as a
* debugging tool to see if there are any fragments on any of the
* queues.
* @param ep Pointer to endpoint that had error
* @param module Pointer to module that had error
* @param errout Boolean which says whether to error them out or not
*/
static void error_out_all_pending_frags(mca_btl_base_endpoint_t *ep,
struct mca_btl_base_module_t* module,
bool errout)
{
int qp, pri, len, total, btl_ownership;
opal_list_item_t *item;
mca_btl_openib_com_frag_t* frag;
mca_btl_base_descriptor_t *des;
int verbose = 10; /* Verbosity level unless debugging */
/* If debugging, drop verbosity level so we can see the output
* regardless of the level the program was run with. */
if (false == errout) {
verbose = 0;
}
total = 0;
/* Traverse all QPs and all priorities and move to other endpoint */
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
for (pri = 0; pri < 2; ++pri) {
/* All types of qp's have a no_wqe_pending_frags list */
len = opal_list_get_size(&ep->qps[qp].no_wqe_pending_frags[pri]);
if (len > 0) {
total += len;
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Checking for no_wqe_pending_frags qp=%d, "
"pri=%d, list size=%d",
qp, pri, len);
if (true == errout) {
while (NULL != (item = opal_list_remove_first(&ep->qps[qp].
no_wqe_pending_frags[pri]))) {
frag = (mca_btl_openib_com_frag_t *) item;
des = (mca_btl_base_descriptor_t *)frag;
/* Error out any coalesced frags if they exist */
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) {
opal_list_item_t *i;
while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) {
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Found coalesced frag in no_wqe_pending_frags");
btl_ownership = (to_base_frag(i)->base.des_flags &
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
to_base_frag(i)->base.des_cbfunc(module, ep,
&to_base_frag(i)->base, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(module, &to_base_frag(i)->base);
}
}
}
btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
des->des_cbfunc(module, ep, des, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(module, des);
}
}
}
}
if (BTL_OPENIB_QP_TYPE_PP(qp)) {
len = opal_list_get_size(&ep->qps[qp].no_credits_pending_frags[pri]);
if (len > 0) {
total += len;
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Checking for no_credits_pending_frags qp=%d, "
"pri=%d, list size=%d",
qp, pri, len);
if (true == errout) {
while (NULL != (item = opal_list_remove_first(&ep->qps[qp].
no_credits_pending_frags[pri]))) {
frag = (mca_btl_openib_com_frag_t *) item;
des = (mca_btl_base_descriptor_t *)frag;
/* Error out any coalesced frags if they exist */
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) {
opal_list_item_t *i;
while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) {
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Found coalesced frag in "
"no_credits_pending_frags");
btl_ownership = (to_base_frag(i)->base.des_flags &
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
to_base_frag(i)->base.des_cbfunc(module, ep,
&to_base_frag(i)->base, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(module, &to_base_frag(i)->base);
}
}
}
btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
des->des_cbfunc(module, ep, des, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(module, des);
}
}
}
}
} else if (BTL_OPENIB_QP_TYPE_SRQ(qp)) {
len = opal_list_get_size(&ep->endpoint_btl->qps[qp].u.srq_qp.pending_frags[pri]);
if (len > 0) {
total += len;
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Checking for srq pending_frags qp=%d, pri=%d, "
"list size=%d",
qp, pri, len);
if (true == errout) {
while (NULL != (item = opal_list_remove_first(&ep->endpoint_btl->qps[qp].
u.srq_qp.pending_frags[pri]))) {
frag = (mca_btl_openib_com_frag_t *) item;
des = (mca_btl_base_descriptor_t *)frag;
/* Error out any coalesced frags if they exist */
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) {
opal_list_item_t *i;
while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) {
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Found coalesced frag in SRQ pending_frags");
btl_ownership = (to_base_frag(i)->base.des_flags &
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
to_base_frag(i)->base.des_cbfunc(module, ep,
&to_base_frag(i)->base, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(module, &to_base_frag(i)->base);
}
}
}
btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
des->des_cbfunc(module, ep, des, OPAL_ERROR);
if( btl_ownership ) {
mca_btl_openib_free(module, des);
}
}
}
}
}
}
}
/* Check for any frags from a connection that was never made. Not sure if this
* can actually happen. */
len = opal_list_get_size(&ep->pending_lazy_frags);
if (len > 0) {
total += len;
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Checking for pending_lazy_frags, list size=%d", len);
if (true == errout) {
while (NULL != (item = opal_list_remove_first(&(ep->pending_lazy_frags)))) {
frag = (mca_btl_openib_com_frag_t *) item;
des = (mca_btl_base_descriptor_t *)frag;
des->des_cbfunc(module, ep, des, OPAL_ERROR);
}
}
}
len = opal_list_get_size(&ep->pending_put_frags);
if (len > 0) {
total += len;
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Checking for pending_put_frags, list size=%d", len);
if (true == errout) {
while (NULL != (item = opal_list_remove_first(&(ep->pending_put_frags)))) {
frag = (mca_btl_openib_com_frag_t *) item;
des = (mca_btl_base_descriptor_t *)frag;
des->des_cbfunc(module, ep, des, OPAL_ERROR);
}
}
}
len = opal_list_get_size(&ep->pending_get_frags);
if (len > 0) {
total += len;
opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover,
"IB: Checking for pending_get_frags, list size=%d", len);
if (true == errout) {
while (NULL != (item = opal_list_remove_first(&(ep->pending_put_frags)))) {
frag = (mca_btl_openib_com_frag_t *) item;
des = (mca_btl_base_descriptor_t *)frag;
des->des_cbfunc(module, ep, des, OPAL_ERROR);
}
}
}
opal_output_verbose(verbose + 30, mca_btl_openib_component.verbose_failover,
"IB: Finished checking for pending_frags, total moved=%d",
total);
}
/* local callback function for completion of a failover control message */
static void mca_btl_openib_endpoint_notify_cb(mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
int status)
{
MCA_BTL_IB_FRAG_RETURN(descriptor);
}
/**
* This function is used to send a message to the remote side
* indicating the endpoint is broken and telling the remote side to
* brings its endpoint down as well. This is needed because there are
* cases where only one side of the connection determines that the
* there was a problem.
* @param endpoint Pointer to endpoint with error
* @param type Type of message to be sent, can be one of two types
* @param index When sending RDMA error message, index is non zero
*/
static void mca_btl_openib_endpoint_notify(mca_btl_base_endpoint_t* endpoint, uint8_t type, int index)
{
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
mca_btl_openib_module_t* newbtl = NULL;
bool found = false;
mca_btl_openib_broken_connection_header_t *bc_hdr;
mca_btl_openib_send_control_frag_t* frag;
mca_btl_base_endpoint_t* newep;
int i, rc;
opal_proc_t* remote_proc = endpoint->endpoint_proc->proc_opal;
/* First, find a different BTL than this one that got the
* error to send the message over. */
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
if (mca_btl_openib_component.openib_btls[i] != openib_btl) {
newbtl = mca_btl_openib_component.openib_btls[i];
break;
}
}
if (NULL == newbtl) {
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"IB: Endpoint Notify: No BTL found");
/* If we cannot find one, then just return. */
return;
}
/* Now, find the endpoint associated with it. The device
* associated with the BTL has the list of all the
* endpoints. */
for (i = 0; i < opal_pointer_array_get_size(newbtl->device->endpoints); i++) {
newep = (mca_btl_openib_endpoint_t*)
opal_pointer_array_get_item(newbtl->device->endpoints, i);
if (NULL == newep) {
continue;
}
if (newep->endpoint_proc->proc_opal == remote_proc) {
found = true;
break;
}
}
if (false == found) {
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"IB: Endpoint Notify: No endpoint found");
/* If we cannot find a match, then just return. */
return;
}
frag = alloc_control_frag(newbtl);
if(NULL == frag) {
opal_output_verbose(20, mca_btl_openib_component.verbose_failover,
"IB: Endpoint Notify: No frag space");
/* If no frag available, then just return. */
return;
}
to_base_frag(frag)->base.des_cbfunc =
mca_btl_openib_endpoint_notify_cb;
to_base_frag(frag)->base.des_cbdata = NULL;
to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
to_base_frag(frag)->segment.seg_len =
sizeof(mca_btl_openib_broken_connection_header_t);
to_com_frag(frag)->endpoint = newep;
frag->hdr->tag = MCA_BTL_TAG_IB;
bc_hdr = (mca_btl_openib_broken_connection_header_t*)to_base_frag(frag)->segment.seg_addr.pval;
bc_hdr->control.type = type;
bc_hdr->lid = endpoint->endpoint_btl->port_info.lid;
bc_hdr->subnet_id = endpoint->endpoint_btl->port_info.subnet_id;
bc_hdr->vpid = OPAL_PROC_MY_NAME.vpid;
bc_hdr->index = index;
if(newep->nbo) {
BTL_OPENIB_BROKEN_CONNECTION_HEADER_HTON((*bc_hdr));
}
rc = mca_btl_openib_endpoint_send(newep, frag);
if (OPAL_SUCCESS == rc || OPAL_ERR_RESOURCE_BUSY == rc) {
return;
}
MCA_BTL_IB_FRAG_RETURN(frag);
BTL_ERROR(("Error sending BROKEN CONNECTION buffer (%s)", strerror(errno)));
return;
}
/*
* Function used for debugging problems in eager rdma.
*/
static void dump_local_rdma_frags(mca_btl_openib_endpoint_t * endpoint) {
mca_btl_openib_recv_frag_t *headers_buf = endpoint->eager_rdma_local.frags;
mca_btl_openib_recv_frag_t * frag;
mca_btl_openib_control_header_t* chdr;
int i, size;
opal_output(0, "Head = %d", endpoint->eager_rdma_local.head);
for (i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) {
frag = &headers_buf[i];
size = MCA_BTL_OPENIB_RDMA_FRAG_GET_SIZE(frag->ftr);
frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) -
size + sizeof(mca_btl_openib_footer_t));
to_base_frag(frag)->segment.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
chdr = to_base_frag(frag)->segment.seg_addr.pval;
if ((MCA_BTL_TAG_IB == frag->hdr->tag) &&
(MCA_BTL_OPENIB_CONTROL_CREDITS == chdr->type)) {
opal_output(0, "tag[%d] is credit message", i);
} else {
opal_output(0, "frag[%d] size=%d,tag=%d,ftr->u.buf=%d", i, size, frag->hdr->tag,
frag->ftr->u.buf[3]);
}
}
}
/*
* Function used for debugging problems in eager rdma.
*/
void mca_btl_openib_dump_all_local_rdma_frags(mca_btl_openib_device_t *device) {
int i, c;
mca_btl_openib_endpoint_t* endpoint;
c = device->eager_rdma_buffers_count;
opal_output(0, "rank=%d, device=%s", OPAL_PROC_MY_NAME.vpid, device->ib_dev->name);
for(i = 0; i < c; i++) {
endpoint = device->eager_rdma_buffers[i];
if(!endpoint)
continue;
dump_local_rdma_frags(endpoint);
}
}
/**
* This function is a debugging tool. If you notify a hang, you can
* call this function from a debugger and see if there are any
* messages stuck in any of the queues. If you call it with
* errout=true, then it will error them out. Otherwise, it will
* just print out the size of the queues with data in them.
*/
void mca_btl_openib_dump_all_internal_queues(bool errout) {
int i, j, num_eps;
mca_btl_openib_module_t* btl;
int total;
mca_btl_base_endpoint_t* ep;
struct mca_btl_base_module_t* module;
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
btl = mca_btl_openib_component.openib_btls[i];
module = &btl->super;
num_eps = opal_pointer_array_get_size(btl->device->endpoints);
/* Now, find the endpoint associated with it */
for (j = 0; j < num_eps; j++) {
ep = (mca_btl_openib_endpoint_t*)
opal_pointer_array_get_item(btl->device->endpoints, j);
if (NULL == ep) {
continue;
}
total = 0;
error_out_all_pending_frags(ep, module, errout);
}
}
}

Просмотреть файл

@ -1,31 +0,0 @@
/*
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
* Functions called by BTL to handle error events
*/
#ifndef MCA_BTL_IB_FAILOVER_H
#define MCA_BTL_IB_FAILOVER_H
BEGIN_C_DECLS
void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
mca_btl_base_descriptor_t *des,
int qp,
opal_proc_t* remote_proc,
mca_btl_openib_endpoint_t* endpoint);
void mca_btl_openib_handle_btl_error(mca_btl_openib_module_t* openib_btl);
void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t *ctl_hdr,
mca_btl_openib_endpoint_t* ep);
END_C_DECLS
#endif

Просмотреть файл

@ -190,10 +190,6 @@ typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t;
#define MCA_BTL_OPENIB_CONTROL_RDMA 1 #define MCA_BTL_OPENIB_CONTROL_RDMA 1
#define MCA_BTL_OPENIB_CONTROL_COALESCED 2 #define MCA_BTL_OPENIB_CONTROL_COALESCED 2
#define MCA_BTL_OPENIB_CONTROL_CTS 3 #define MCA_BTL_OPENIB_CONTROL_CTS 3
#if BTL_OPENIB_FAILOVER_ENABLED
#define MCA_BTL_OPENIB_CONTROL_EP_BROKEN 4
#define MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR 5
#endif
struct mca_btl_openib_control_header_t { struct mca_btl_openib_control_header_t {
uint8_t type; uint8_t type;
@ -243,32 +239,6 @@ do { \
(h).rdma_credits = ntohs((h).rdma_credits); \ (h).rdma_credits = ntohs((h).rdma_credits); \
} while (0) } while (0)
#if BTL_OPENIB_FAILOVER_ENABLED
struct mca_btl_openib_broken_connection_header_t {
mca_btl_openib_control_header_t control;
uint32_t lid;
uint64_t subnet_id;
uint32_t vpid;
uint32_t index; /* for eager RDMA only */
};
typedef struct mca_btl_openib_broken_connection_header_t mca_btl_openib_broken_connection_header_t;
#define BTL_OPENIB_BROKEN_CONNECTION_HEADER_HTON(h) \
do { \
(h).lid = htonl((h).lid); \
(h).subnet_id = hton64((h).subnet_id); \
(h).vpid = htonl((h).vpid); \
(h).index = htonl((h).index); \
} while (0)
#define BTL_OPENIB_BROKEN_CONNECTION_HEADER_NTOH(h) \
do { \
(h).lid = ntohl((h).lid); \
(h).subnet_id = ntoh64((h).subnet_id); \
(h).vpid = ntohl((h).vpid); \
(h).index = ntohl((h).index); \
} while (0)
#endif
enum mca_btl_openib_frag_type_t { enum mca_btl_openib_frag_type_t {
MCA_BTL_OPENIB_FRAG_RECV, MCA_BTL_OPENIB_FRAG_RECV,
MCA_BTL_OPENIB_FRAG_RECV_USER, MCA_BTL_OPENIB_FRAG_RECV_USER,

Просмотреть файл

@ -89,11 +89,6 @@ static mca_base_var_enum_value_t device_type_values[] = {
static int btl_openib_cq_size; static int btl_openib_cq_size;
static bool btl_openib_have_fork_support = OPAL_HAVE_IBV_FORK_INIT; static bool btl_openib_have_fork_support = OPAL_HAVE_IBV_FORK_INIT;
#if BTL_OPENIB_FAILOVER_ENABLED
static int btl_openib_verbose_failover;
static bool btl_openib_failover_enabled = true;
#endif
/* /*
* utility routine for string parameter registration * utility routine for string parameter registration
*/ */
@ -473,30 +468,6 @@ int btl_openib_register_mca_params(void)
"If nonzero, use the thread that will handle InfiniBand asynchronous events", "If nonzero, use the thread that will handle InfiniBand asynchronous events",
true, &mca_btl_openib_component.use_async_event_thread)); true, &mca_btl_openib_component.use_async_event_thread));
#if BTL_OPENIB_FAILOVER_ENABLED
/* failover specific output */
CHECK(reg_int("verbose_failover", NULL,
"Output some verbose OpenIB BTL failover information "
"(0 = no output, nonzero = output)", 0, &btl_openib_verbose_failover, 0));
mca_btl_openib_component.verbose_failover = opal_output_open(NULL);
opal_output_set_verbosity(mca_btl_openib_component.verbose_failover, btl_openib_verbose_failover);
CHECK(reg_bool("port_error_failover", NULL,
"If nonzero, asynchronous port errors will trigger failover",
0, &mca_btl_openib_component.port_error_failover));
/* Make non writeable parameter that indicates failover is configured in. */
tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"failover_enabled",
"openib failover is configured: run with bfo PML to support failover between openib BTLs",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_CONSTANT,
&btl_openib_failover_enabled);
if (0 > tmp) ret = tmp;
#endif
CHECK(reg_bool("enable_srq_resize", NULL, CHECK(reg_bool("enable_srq_resize", NULL,
"Enable/Disable on demand SRQ resize. " "Enable/Disable on demand SRQ resize. "
"(0 = without resizing, nonzero = with resizing)", 1, "(0 = without resizing, nonzero = with resizing)", 1,
@ -570,10 +541,6 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA | MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
MCA_BTL_FLAGS_SEND; MCA_BTL_FLAGS_SEND;
#if BTL_OPENIB_FAILOVER_ENABLED
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT;
#endif
#if HAVE_DECL_IBV_ATOMIC_HCA #if HAVE_DECL_IBV_ATOMIC_HCA
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_ATOMIC_FOPS; mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_ATOMIC_FOPS;
mca_btl_openib_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_CSWAP; mca_btl_openib_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_CSWAP;

Просмотреть файл

@ -104,22 +104,6 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[
AC_MSG_CHECKING([which openib btl cpcs will be built]) AC_MSG_CHECKING([which openib btl cpcs will be built])
AC_MSG_RESULT([$cpcs])]) AC_MSG_RESULT([$cpcs])])
# Enable openib device failover. It is disabled by default.
AC_MSG_CHECKING([whether openib failover is enabled])
AC_ARG_ENABLE([btl-openib-failover],
[AC_HELP_STRING([--enable-btl-openib-failover],
[enable openib BTL failover (default: disabled)])])
if test "$enable_btl_openib_failover" = "yes"; then
AC_MSG_RESULT([yes])
btl_openib_failover_enabled=1
else
AC_MSG_RESULT([no])
btl_openib_failover_enabled=0
fi
AC_DEFINE_UNQUOTED([BTL_OPENIB_FAILOVER_ENABLED], [$btl_openib_failover_enabled],
[enable openib BTL failover])
AM_CONDITIONAL([MCA_btl_openib_enable_failover], [test "x$btl_openib_failover_enabled" = "x1"])
# make sure that CUDA-aware checks have been done # make sure that CUDA-aware checks have been done
AC_REQUIRE([OPAL_CHECK_CUDA]) AC_REQUIRE([OPAL_CHECK_CUDA])