454fdff824
work on the configure.stub to correctly locate the ib libraries. This commit was SVN r6435.
786 строки
26 KiB
C
786 строки
26 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
|
|
#include "ompi_config.h"
|
|
#include <sys/time.h>
|
|
#include <time.h>
|
|
#include "include/types.h"
|
|
#include "mca/pml/base/pml_base_sendreq.h"
|
|
#include "mca/ns/base/base.h"
|
|
#include "mca/oob/base/base.h"
|
|
#include "mca/rml/rml.h"
|
|
#include "mca/errmgr/errmgr.h"
|
|
#include "dps/dps.h"
|
|
#include "btl_openib.h"
|
|
#include "btl_openib_endpoint.h"
|
|
#include "btl_openib_proc.h"
|
|
#include "btl_openib_frag.h"
|
|
#include "class/ompi_free_list.h"
|
|
|
|
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
|
|
static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
|
|
|
|
int mca_btl_openib_endpoint_create_qp(
|
|
mca_btl_openib_module_t* openib_btl,
|
|
struct ibv_pd* pd,
|
|
struct ibv_cq* cq,
|
|
struct ibv_qp_attr* qp_attr,
|
|
struct ibv_qp** qp
|
|
);
|
|
|
|
|
|
|
|
int mca_btl_openib_endpoint_qp_init_query(
|
|
mca_btl_openib_module_t* openib_btl,
|
|
struct ibv_qp* qp,
|
|
struct ibv_qp_attr* attr,
|
|
uint32_t lcl_psn,
|
|
uint32_t rem_qp_num,
|
|
uint32_t rem_psn,
|
|
uint16_t rem_lid,
|
|
uint32_t port_num
|
|
);
|
|
|
|
static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* openib_btl,
|
|
mca_btl_openib_endpoint_t * endpoint,
|
|
mca_btl_openib_frag_t * frag)
|
|
{
|
|
|
|
struct ibv_qp* ib_qp;
|
|
struct ibv_send_wr *bad_wr;
|
|
|
|
frag->sr_desc.remote_qkey = 0;
|
|
frag->sg_entry.addr = (uintprt_t) frag->hdr;
|
|
|
|
if(frag->base.des_flags && MCA_BTL_DES_FLAGS_PRIORITY && frag->size <= openib_btl->super.btl_eager_limit){
|
|
ib_qp = endpoint->lcl_qp_high;
|
|
} else {
|
|
ib_qp = endpoint->lcl_qp_low;
|
|
}
|
|
|
|
frag->sr_desc.opcode = IBV_WR_SEND;
|
|
frag->sr_desc.send_flags = IBV_SEND_SIGNALED;
|
|
|
|
frag->sg_entry.length = frag->segment.seg_len + ((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); /* sizeof(mca_btl_openib_header_t); */
|
|
|
|
/* TODO: should check if we can inline send,, but can't find
|
|
* inline send defined in openib verbs api.
|
|
* if(frag->sg_entry.len <= openib_btl->ib_inline_max) {
|
|
*/
|
|
if(ibv_post_send(ib_qp,
|
|
frag->sr_desc,
|
|
&bad_wr)) {
|
|
opal_output(0, "%s: error posting send request\n", __func__);
|
|
return OMPI_ERROR;
|
|
}
|
|
mca_btl_openib_endpoint_post_rr(endpoint, 1);
|
|
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_btl_openib_endpoint_t,
|
|
opal_list_item_t, mca_btl_openib_endpoint_construct,
|
|
mca_btl_openib_endpoint_destruct);
|
|
|
|
/*
|
|
* Initialize state of the endpoint instance.
|
|
*
|
|
*/
|
|
|
|
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
|
{
|
|
endpoint->endpoint_btl = 0;
|
|
endpoint->endpoint_proc = 0;
|
|
endpoint->endpoint_tstamp = 0.0;
|
|
endpoint->endpoint_state = MCA_BTL_IB_CLOSED;
|
|
endpoint->endpoint_retries = 0;
|
|
OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, opal_mutex_t);
|
|
OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, opal_mutex_t);
|
|
OBJ_CONSTRUCT(&endpoint->pending_send_frags, opal_list_t);
|
|
}
|
|
|
|
/*
|
|
* Destroy a endpoint
|
|
*
|
|
*/
|
|
|
|
static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
|
{
|
|
}
|
|
|
|
/*
|
|
* Send connection information to remote endpoint using OOB
|
|
*
|
|
*/
|
|
|
|
static void mca_btl_openib_endpoint_send_cb(
|
|
int status,
|
|
orte_process_name_t* endpoint,
|
|
orte_buffer_t* buffer,
|
|
orte_rml_tag_t tag,
|
|
void* cbdata)
|
|
{
|
|
OBJ_RELEASE(buffer);
|
|
}
|
|
|
|
|
|
static int mca_btl_openib_endpoint_send_connect_req(mca_btl_base_endpoint_t* endpoint)
|
|
{
|
|
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
|
|
int rc;
|
|
if(NULL == buffer) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
/* pack the info in the send buffer */
|
|
|
|
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_high.qp_num, 1, ORTE_UINT32);
|
|
if(rc != ORTE_SUCCESS) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_low.qp_num, 1, ORTE_UINT32);
|
|
if(rc != ORTE_SUCCESS) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
rc = orte_dps.pack(buffer, &endpoint->lcl_psn_high, 1, ORTE_UINT32);
|
|
if(rc != ORTE_SUCCESS) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
rc = orte_dps.pack(buffer, &endpoint->lcl_psn_low, 1, ORTE_UINT32);
|
|
if(rc != ORTE_SUCCESS) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
rc = orte_dps.pack(buffer, &endpoint->endpoint_btl->port.lid, 1, ORTE_UINT16);
|
|
if(rc != ORTE_SUCCESS) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
|
|
|
|
/* send to endpoint */
|
|
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
|
|
mca_btl_openib_endpoint_send_cb, NULL);
|
|
|
|
|
|
DEBUG_OUT("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
|
endpoint->lcl_qp_prop_high.qp_num,
|
|
endpoint->lcl_qp_prop_low.qp_num,
|
|
endpoint->endpoint_btl->port.lid);
|
|
|
|
if(rc < 0) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* Send connect ACK to remote endpoint
|
|
*
|
|
*/
|
|
|
|
static int mca_btl_openib_endpoint_send_connect_ack(mca_btl_base_endpoint_t* endpoint)
|
|
{
|
|
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
|
|
int rc;
|
|
uint32_t zero = 0;
|
|
|
|
/* pack the info in the send buffer */
|
|
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT16))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
/* send to endpoint */
|
|
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
|
|
mca_btl_openib_endpoint_send_cb, NULL);
|
|
if(rc < 0) {
|
|
ORTE_ERROR_LOG(rc);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
* Set remote connection info
|
|
*
|
|
* XXX: Currently size is unutilized, this shall change
|
|
* as soon as we add more info to be exchanged at connection
|
|
* setup.
|
|
*
|
|
*/
|
|
static int mca_btl_openib_endpoint_set_remote_info(mca_btl_base_endpoint_t* endpoint, orte_buffer_t* buffer)
|
|
{
|
|
int rc;
|
|
|
|
|
|
|
|
size_t cnt = 1;
|
|
rc = orte_dps.unpack(buffer, &endpoint->rem_qp_num_high, &cnt, ORTE_UINT32);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
rc = orte_dps.unpack(buffer, &endpoint->rem_qp_num_low, &cnt, ORTE_UINT32);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
rc = orte_dps.unpack(buffer, &endpoint->rem_psn_high, &cnt, ORTE_UINT32);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
rc = orte_dps.unpack(buffer, &endpoint->rem_psn_low, &cnt, ORTE_UINT32);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
rc = orte_dps.unpack(buffer, &endpoint->rem_lid, &cnt, ORTE_UINT16);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
DEBUG_OUT("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
|
endpoint->rem_qp_num_high,
|
|
endpoint->rem_qp_num_low,
|
|
endpoint->rem_lid);
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* Start to connect to the endpoint. We send our Queue Pair
|
|
* information over the TCP OOB communication mechanism.
|
|
|
|
* On completion of our send, a send completion handler
|
|
* is called.
|
|
*
|
|
*/
|
|
|
|
static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoint)
|
|
{
|
|
int rc;
|
|
mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) endpoint->endpoint_btl;
|
|
|
|
|
|
/* Create the High Priority Queue Pair */
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
|
|
openib_btl->ib_pd,
|
|
openib_btl->ib_cq_high,
|
|
endpoint->lcl_qp_attr_high,
|
|
&endpoint->lcl_qp_high))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
return rc;
|
|
}
|
|
endpoint->lcl_psn_high = lrand48() & 0xffffff;
|
|
|
|
/* Create the Low Priority Queue Pair */
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
|
|
openib_btl->ib_pd,
|
|
openib_btl->ib_cq_low,
|
|
endpoint->lcl_qp_attr_low,
|
|
&endpoint->lcl_qp_low))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
return rc;
|
|
}
|
|
endpoint->lcl_psn_low = lrand48() & 0xffffff;
|
|
|
|
DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
|
endpoint->lcl_qp_high->qp_num,
|
|
endpoint->lcl_qp_low.qp_num,
|
|
openib_btl->ib_port_attr->lid);
|
|
|
|
/* Send connection info over to remote endpoint */
|
|
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
return rc;
|
|
}
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* Reply to a `start - connect' message
|
|
*
|
|
*/
|
|
static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t *endpoint, orte_buffer_t* buffer)
|
|
{
|
|
int rc;
|
|
mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) endpoint->endpoint_btl;
|
|
|
|
|
|
/* Create the High Priority Queue Pair */
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
|
|
openib_btl->ib_pd,
|
|
openib_btl->ib_cq,
|
|
endpoint->lcl_qp_attr_high,
|
|
&endpoint->lcl_qp_high))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
return rc;
|
|
}
|
|
endpoint->lcl_psn_high = lrand48() & 0xffffff;
|
|
|
|
/* Create the Low Priority Queue Pair */
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
|
|
openib_btl->ib_pd,
|
|
openib_btl->ib_cq,
|
|
endpoint->lcl_qp_attr_low,
|
|
&endpoint->lcl_qp_low))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
return rc;
|
|
}
|
|
endpoint->lcl_psn_low = lrand48() & 0xffffff;
|
|
|
|
DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
|
endpoint->lcl_qp_high->qp_num,
|
|
endpoint->lcl_qp_low.qp_num,
|
|
openib_btl->ib_port_attr->lid);
|
|
|
|
|
|
/* Set the remote side info */
|
|
mca_btl_openib_endpoint_set_remote_info(endpoint, buffer);
|
|
|
|
/* Connect to endpoint */
|
|
|
|
rc = mca_btl_openib_endpoint_connect(endpoint);
|
|
if(rc != OMPI_SUCCESS) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
return rc;
|
|
}
|
|
|
|
/* Send connection info over to remote endpoint */
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
return rc;
|
|
}
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*/
|
|
|
|
static void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
|
|
{
|
|
endpoint->endpoint_state = MCA_BTL_IB_CONNECTED;
|
|
mca_btl_openib_progress_send_frags(endpoint);
|
|
}
|
|
|
|
/*
|
|
* Non blocking OOB recv callback.
|
|
* Read incoming QP and other info, and if this endpoint
|
|
* is trying to connect, reply with our QP info,
|
|
* otherwise try to modify QP's and establish
|
|
* reliable connection
|
|
*
|
|
*/
|
|
|
|
static void mca_btl_openib_endpoint_recv(
|
|
int status,
|
|
orte_process_name_t* endpoint,
|
|
orte_buffer_t* buffer,
|
|
orte_rml_tag_t tag,
|
|
void* cbdata)
|
|
{
|
|
mca_btl_openib_proc_t *ib_proc;
|
|
mca_btl_openib_endpoint_t *ib_endpoint;
|
|
int endpoint_state;
|
|
int rc;
|
|
|
|
for(ib_proc = (mca_btl_openib_proc_t*)
|
|
opal_list_get_first(&mca_btl_openib_component.ib_procs);
|
|
ib_proc != (mca_btl_openib_proc_t*)
|
|
opal_list_get_end(&mca_btl_openib_component.ib_procs);
|
|
ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
|
|
|
|
if(ib_proc->proc_guid.vpid == endpoint->vpid) {
|
|
|
|
/* Try to get the endpoint instance of this proc */
|
|
|
|
/* Limitation: Right now, we have only 1 endpoint
|
|
* for every process. Need several changes, some
|
|
* in PML/BTL interface to set this right */
|
|
ib_endpoint = ib_proc->proc_endpoints[0];
|
|
|
|
endpoint_state = ib_endpoint->endpoint_state;
|
|
|
|
/* Update status */
|
|
switch(endpoint_state) {
|
|
case MCA_BTL_IB_CLOSED :
|
|
/* We had this connection closed before.
|
|
* The endpoint is trying to connect. Move the
|
|
* status of this connection to CONNECTING,
|
|
* and then reply with our QP information */
|
|
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_reply_start_connect(ib_endpoint, buffer))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
break;
|
|
}
|
|
|
|
/* Setup state as connected */
|
|
ib_endpoint->endpoint_state = MCA_BTL_IB_CONNECT_ACK;
|
|
break;
|
|
|
|
case MCA_BTL_IB_CONNECTING :
|
|
|
|
mca_btl_openib_endpoint_set_remote_info(ib_endpoint, buffer);
|
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_connect(ib_endpoint))) {
|
|
opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
|
|
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
|
break;
|
|
}
|
|
|
|
/* Setup state as connected */
|
|
mca_btl_openib_endpoint_connected(ib_endpoint);
|
|
|
|
/* Send him an ack */
|
|
mca_btl_openib_endpoint_send_connect_ack(ib_endpoint);
|
|
break;
|
|
|
|
case MCA_BTL_IB_CONNECT_ACK:
|
|
|
|
mca_btl_openib_endpoint_connected(ib_endpoint);
|
|
|
|
break;
|
|
|
|
case MCA_BTL_IB_CONNECTED :
|
|
break;
|
|
default :
|
|
opal_output(0, "Connected -> Connecting not possible.\n");
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Okay, now that we are done receiving,
|
|
* re-post the buffer */
|
|
mca_btl_openib_post_recv();
|
|
}
|
|
|
|
void mca_btl_openib_post_recv()
|
|
{
|
|
DEBUG_OUT("");
|
|
|
|
orte_rml.recv_buffer_nb(
|
|
ORTE_RML_NAME_ANY,
|
|
ORTE_RML_TAG_DYNAMIC-1,
|
|
0,
|
|
mca_btl_openib_endpoint_recv,
|
|
NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
* Attempt to send a fragment using a given endpoint. If the endpoint is not
|
|
* connected, queue the fragment and start the connection as required.
|
|
*/
|
|
|
|
int mca_btl_openib_endpoint_send(
|
|
mca_btl_base_endpoint_t* endpoint,
|
|
mca_btl_openib_frag_t* frag
|
|
)
|
|
{
|
|
int rc;
|
|
mca_btl_openib_module_t *openib_btl;
|
|
|
|
OPAL_THREAD_LOCK(&endpoint->endpoint_send_lock);
|
|
|
|
switch(endpoint->endpoint_state) {
|
|
case MCA_BTL_IB_CONNECTING:
|
|
|
|
DEBUG_OUT("Queing because state is connecting");
|
|
|
|
opal_list_append(&endpoint->pending_send_frags,
|
|
(opal_list_item_t *)frag);
|
|
|
|
rc = OMPI_SUCCESS;
|
|
break;
|
|
|
|
case MCA_BTL_IB_CONNECT_ACK:
|
|
|
|
DEBUG_OUT("Queuing because waiting for ack");
|
|
|
|
opal_list_append(&endpoint->pending_send_frags,
|
|
(opal_list_item_t *)frag);
|
|
|
|
rc = OMPI_SUCCESS;
|
|
break;
|
|
|
|
case MCA_BTL_IB_CLOSED:
|
|
|
|
DEBUG_OUT("Connection to endpoint closed ... connecting ...");
|
|
|
|
opal_list_append(&endpoint->pending_send_frags,
|
|
(opal_list_item_t *)frag);
|
|
|
|
rc = mca_btl_openib_endpoint_start_connect(endpoint);
|
|
|
|
break;
|
|
|
|
case MCA_BTL_IB_FAILED:
|
|
|
|
rc = OMPI_ERR_UNREACH;
|
|
break;
|
|
|
|
case MCA_BTL_IB_CONNECTED:
|
|
{
|
|
openib_btl = endpoint->endpoint_btl;
|
|
|
|
|
|
DEBUG_OUT("Send to : %d, len : %d, frag : %p",
|
|
endpoint->endpoint_proc->proc_guid.vpid,
|
|
frag->ib_buf.desc.sg_entry.len,
|
|
frag);
|
|
|
|
rc = mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag);
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
rc = OMPI_ERR_UNREACH;
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&endpoint->endpoint_send_lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
void mca_btl_openib_progress_send_frags(mca_btl_openib_endpoint_t* endpoint)
|
|
{
|
|
opal_list_item_t *frag_item;
|
|
mca_btl_openib_frag_t *frag;
|
|
mca_btl_openib_module_t* openib_btl;
|
|
/*Check if endpoint is connected */
|
|
if(endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) {
|
|
|
|
return;
|
|
}
|
|
|
|
/* While there are frags in the list,
|
|
* process them */
|
|
|
|
while(!opal_list_is_empty(&(endpoint->pending_send_frags))) {
|
|
frag_item = opal_list_remove_first(&(endpoint->pending_send_frags));
|
|
frag = (mca_btl_openib_frag_t *) frag_item;
|
|
openib_btl = endpoint->endpoint_btl;
|
|
/* We need to post this one */
|
|
|
|
if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag))
|
|
opal_output(0, "error in mca_btl_openib_endpoint_send");
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* Complete connection to endpoint.
|
|
*/
|
|
|
|
int mca_btl_openib_endpoint_connect(
|
|
mca_btl_openib_endpoint_t *endpoint)
|
|
{
|
|
int rc;
|
|
mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) endpoint->endpoint_btl;
|
|
|
|
/* Connection establishment RC */
|
|
rc = mca_btl_openib_endpoint_qp_init_query(
|
|
openib_btl,
|
|
endpoint->lcl_qp_high,
|
|
endpoint->lcl_qp_attr_high,
|
|
endpoint->lcl_psn_high,
|
|
endpoint->rem_qp_num_high,
|
|
endpoint->rem_psn_high,
|
|
endpoint->rem_lid,
|
|
openib_btl->port_num
|
|
);
|
|
|
|
|
|
|
|
if(rc != OMPI_SUCCESS) {
|
|
return rc;
|
|
}
|
|
rc = mca_btl_openib_endpoint_qp_init_query(
|
|
openib_btl,
|
|
endpoint->lcl_qp_low,
|
|
endpoint->lcl_qp_attr_low,
|
|
endpoint->lcl_psn_low,
|
|
endpoint->rem_qp_num_low,
|
|
endpoint->rem_psn_low,
|
|
endpoint->rem_lid,
|
|
openib_btl->port_num
|
|
);
|
|
|
|
|
|
|
|
if(rc != OMPI_SUCCESS) {
|
|
return rc;
|
|
}
|
|
|
|
mca_btl_openib_endpoint_post_rr(endpoint, 0);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
int mca_btl_openib_endpoint_create_qp(
|
|
mca_btl_openib_module_t* openib_btl,
|
|
struct ibv_pd* pd,
|
|
struct ibv_cq* cq,
|
|
struct ibv_qp_attr* qp_attr,
|
|
struct ibv_qp** qp
|
|
)
|
|
{
|
|
{
|
|
struct ibv_qp_init_attr qp_init_attr = {
|
|
.send_cq = cq,
|
|
.recv_cq = cq,
|
|
.cap = {
|
|
.max_send_wr = openib_btl->ib_wq_size,
|
|
.max_recv_wr = openib_btl->ib_wq_size,
|
|
.max_send_sge = openib_btl->ib_sg_list_size,
|
|
.max_recv_sge = openib_btl->ib_sg_list_size,
|
|
},
|
|
.qp_type = IBV_QPT_RC
|
|
};
|
|
|
|
(*qp) = ibv_create_qp(pd, &qp_init_attr);
|
|
|
|
if(NULL == (*qp)) {
|
|
opal_output(0, "%s: error creating qp \n", __func__);
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
openib_btl->ib_inline_max = qp_init_attr.cap.max_inline_data;
|
|
|
|
}
|
|
|
|
{
|
|
qp_attr->qp_state = IBV_QPS_INIT;
|
|
qp_attr->pkey_index = openib_btl->ib_pkey_ix;
|
|
qp_attr->qp_port_num = openib_btl->port_num;
|
|
qp_attr->qp_access_flags = 0;
|
|
|
|
if(ibv_modify_qp((*qp), qp_attr,
|
|
IBV_QP_STATE |
|
|
IBV_QP_PKEY_INDEX |
|
|
IBV_QP_PORT |
|
|
IBV_QP_ACCESS_FLAGS )) {
|
|
opal_output("%s: error modifying qp to INIT\n");
|
|
return OMPI_ERROR;
|
|
}
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_btl_openib_endpoint_qp_init_query(
|
|
mca_btl_openib_module_t* openib_btl,
|
|
struct ibv_qp* qp,
|
|
struct ibv_qp_attr* attr,
|
|
uint32_t lcl_psn,
|
|
uint32_t rem_qp_num,
|
|
uint32_t rem_psn,
|
|
uint16_t rem_lid,
|
|
uint32_t port_num
|
|
)
|
|
|
|
|
|
{
|
|
attr->qp_state = IBV_QPS_RTR;
|
|
attr->path_mth = openib_btl->ib_mtu;
|
|
attr->dest_qp_num = rem_qp_num;
|
|
attr->rq_psn = rem_psn;
|
|
attr->max_des_rd_atomic = openib_btl->ib_max_rdma_dst_ops;
|
|
attr->min_rnr_timer = openib_btl->ib_min_rnr_timer;
|
|
attr->ah_attr.is_global = 0;
|
|
attr->ah_attr.dlid = rem_lid;
|
|
attr->ah_attr.sl = openib_btl->ib_service_level;
|
|
attr->ah_attr.src_path_bits = openib_btl->ib_src_path_bits;
|
|
attr->ah_attr.port_num = port_num;
|
|
|
|
if(ibv_modify_qp(qp, attr,
|
|
IBV_QP_STATE |
|
|
IBV_QP_AV |
|
|
IBV_QP_PATH_MTU |
|
|
IBV_QP_DEST_QPN |
|
|
IBV_QP_RQ_PSN |
|
|
IBV_QP_MAX_DEST_RD_ATOMIC |
|
|
IBV_QP_MIN_RNR_TIMER)) {
|
|
opal_out(0, "%s: error modifing QP to RTR\n", __func__);
|
|
return OMPI_ERROR;
|
|
}
|
|
attr->qp_state = IBV_QPS_RTS;
|
|
attr->timeout = openib_btl->ib_timeout;
|
|
attr->retry_cnt = openib_btl->ib_retry_count;
|
|
attr->rnr_retry = openib_btl->ib_rnr_retry;
|
|
attr->sq_psn = my_psn;
|
|
attr->max_rd_atomic = openib_btl->ib_max_rdma_dst_ops;
|
|
if (ibv_modify_qp(qp, attr,
|
|
IBV_QP_STATE |
|
|
IBV_QP_TIMEOUT |
|
|
IBV_QP_RETRY_CNT |
|
|
IBV_QP_RNR_RETRY |
|
|
IBV_QP_SQ_PSN |
|
|
IBV_QP_MAX_QP_RD_ATOMIC)) {
|
|
opal_output(0, "%s: error modifying QP to RTS\n", __func__);
|
|
return OMPI_FAILURE;
|
|
}
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|