2006-06-09 00:13:45 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2006 Sandia National Laboratories. All rights
|
|
|
|
* reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <time.h>
|
2006-06-22 18:06:55 +00:00
|
|
|
#include "opal/prefetch.h"
|
2006-06-09 00:13:45 +00:00
|
|
|
#include "ompi/types.h"
|
|
|
|
#include "ompi/mca/pml/base/pml_base_sendreq.h"
|
|
|
|
#include "orte/mca/ns/base/base.h"
|
|
|
|
#include "orte/mca/oob/base/base.h"
|
|
|
|
#include "orte/mca/rml/rml.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
#include "orte/dss/dss.h"
|
|
|
|
#include "btl_ud.h"
|
|
|
|
#include "btl_ud_endpoint.h"
|
|
|
|
#include "btl_ud_proc.h"
|
|
|
|
#include "btl_ud_frag.h"
|
|
|
|
#include "ompi/class/ompi_free_list.h"
|
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
|
|
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
|
|
|
|
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* post a send to the work queue
|
|
|
|
*/
|
|
|
|
inline int mca_btl_ud_endpoint_post_send(mca_btl_ud_module_t* ud_btl,
|
|
|
|
mca_btl_ud_endpoint_t * endpoint,
|
|
|
|
mca_btl_ud_frag_t * frag)
|
|
|
|
{
|
|
|
|
struct ibv_qp* ib_qp;
|
|
|
|
struct ibv_send_wr* bad_wr;
|
|
|
|
|
|
|
|
/* Have to be careful here - UD adds a 40 byte header, but it is not
|
|
|
|
included on the sending side. */
|
|
|
|
frag->sg_entry.length = frag->segment.seg_len + sizeof(mca_btl_ud_header_t);
|
|
|
|
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
|
|
|
|
|
|
|
|
if(frag->size == ud_btl->super.btl_eager_limit) {
|
2006-06-22 18:06:55 +00:00
|
|
|
if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ud_btl->sd_wqe_hp, -1) < 0)) {
|
2006-06-09 00:13:45 +00:00
|
|
|
OPAL_THREAD_ADD32(&ud_btl->sd_wqe_hp, 1);
|
|
|
|
opal_list_append(&ud_btl->pending_frags_hp,
|
|
|
|
(opal_list_item_t*)frag);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
ib_qp = ud_btl->qp_hp;
|
|
|
|
frag->wr_desc.sr_desc.wr.ud.ah = endpoint->rmt_ah_hp;
|
|
|
|
frag->wr_desc.sr_desc.wr.ud.remote_qpn =
|
2006-06-23 16:50:50 +00:00
|
|
|
endpoint->rem_addr.qp_num_hp;
|
2006-06-09 00:13:45 +00:00
|
|
|
|
|
|
|
if(frag->sg_entry.length <= ud_btl->ib_inline_max) {
|
|
|
|
frag->wr_desc.sr_desc.send_flags =
|
|
|
|
IBV_SEND_SIGNALED|IBV_SEND_INLINE;
|
|
|
|
}
|
|
|
|
} else {
|
2006-06-22 18:06:55 +00:00
|
|
|
if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ud_btl->sd_wqe_lp, -1) < 0)) {
|
2006-06-09 00:13:45 +00:00
|
|
|
OPAL_THREAD_ADD32(&ud_btl->sd_wqe_lp, 1);
|
|
|
|
opal_list_append(&ud_btl->pending_frags_lp,
|
|
|
|
(opal_list_item_t*)frag);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
ib_qp = ud_btl->qp_lp;
|
|
|
|
frag->wr_desc.sr_desc.wr.ud.ah = endpoint->rmt_ah_lp;
|
|
|
|
frag->wr_desc.sr_desc.wr.ud.remote_qpn =
|
2006-06-23 16:50:50 +00:00
|
|
|
endpoint->rem_addr.qp_num_lp;
|
2006-06-09 00:13:45 +00:00
|
|
|
}
|
|
|
|
|
2006-06-23 16:50:50 +00:00
|
|
|
/*OPAL_OUTPUT((0, "Send to LID %d QP %d, len: %d %d %d, frag: %p",
|
|
|
|
endpoint->rem_addr.lid,
|
|
|
|
frag->wr_desc.sr_desc.wr.ud.remote_qpn,
|
2006-06-09 00:13:45 +00:00
|
|
|
frag->sg_entry.length, frag->segment.seg_len,
|
2006-06-23 16:50:50 +00:00
|
|
|
ud_btl->ib_inline_max, frag));*/
|
2006-06-09 00:13:45 +00:00
|
|
|
|
|
|
|
#if MCA_BTL_UD_ENABLE_PROFILE
|
|
|
|
frag->tm = opal_sys_timer_get_cycles();
|
|
|
|
#endif
|
|
|
|
|
|
|
|
MCA_BTL_UD_START_TIME(ibv_post_send);
|
2006-06-23 16:50:50 +00:00
|
|
|
if(OPAL_UNLIKELY(ibv_post_send(ib_qp, &frag->wr_desc.sr_desc, &bad_wr))) {
|
2006-06-09 00:13:45 +00:00
|
|
|
BTL_ERROR(("error posting send request errno says %d %s\n",
|
|
|
|
errno, strerror(errno)));
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
MCA_BTL_UD_END_TIME(ibv_post_send);
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_btl_ud_endpoint_t,
|
|
|
|
opal_list_item_t, mca_btl_ud_endpoint_construct,
|
|
|
|
mca_btl_ud_endpoint_destruct);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize state of the endpoint instance.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
|
|
|
{
|
2006-06-23 16:50:50 +00:00
|
|
|
/*OBJ_CONSTRUCT(&endpoint->endpoint_lock, opal_mutex_t);*/
|
2006-06-09 00:13:45 +00:00
|
|
|
|
2006-06-23 16:50:50 +00:00
|
|
|
memset(&endpoint->rem_addr, 0, sizeof(struct mca_btl_ud_addr_t));
|
2006-06-09 00:13:45 +00:00
|
|
|
}
|
|
|
|
|
2006-06-23 16:50:50 +00:00
|
|
|
|
2006-06-09 00:13:45 +00:00
|
|
|
/*
|
|
|
|
* Destroy a endpoint
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the queue pair note that this is just the initial
|
|
|
|
* queue pair creation and we need to get the remote queue pair
|
|
|
|
* info from the peer before the qp is usable,
|
|
|
|
*/
|
|
|
|
|
|
|
|
int mca_btl_ud_endpoint_init_qp(
|
|
|
|
mca_btl_base_module_t* btl,
|
|
|
|
struct ibv_cq* cq,
|
|
|
|
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
|
|
|
|
struct ibv_srq* srq,
|
|
|
|
#endif
|
|
|
|
struct ibv_qp** qp,
|
|
|
|
uint32_t lcl_psn
|
|
|
|
)
|
|
|
|
{
|
|
|
|
mca_btl_ud_module_t* ud_btl = (mca_btl_ud_module_t*)btl;
|
|
|
|
struct ibv_qp* my_qp;
|
|
|
|
struct ibv_qp_attr qp_attr;
|
|
|
|
struct ibv_qp_init_attr qp_init_attr;
|
|
|
|
|
|
|
|
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
|
|
|
|
|
|
|
|
qp_init_attr.send_cq = cq;
|
|
|
|
qp_init_attr.recv_cq = cq;
|
|
|
|
qp_init_attr.cap.max_send_wr = mca_btl_ud_component.rd_num;
|
|
|
|
qp_init_attr.cap.max_recv_wr = mca_btl_ud_component.rd_num;
|
|
|
|
qp_init_attr.cap.max_send_sge = mca_btl_ud_component.ib_sg_list_size;
|
|
|
|
qp_init_attr.cap.max_recv_sge = mca_btl_ud_component.ib_sg_list_size;
|
|
|
|
qp_init_attr.qp_type = IBV_QPT_UD;
|
|
|
|
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
|
|
|
|
if(mca_btl_ud_component.use_srq) {
|
|
|
|
qp_init_attr.srq = srq;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
my_qp = ibv_create_qp(ud_btl->ib_pd, &qp_init_attr);
|
|
|
|
|
|
|
|
if(NULL == my_qp) {
|
|
|
|
BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
(*qp) = my_qp;
|
|
|
|
if(0 == (ud_btl->ib_inline_max = qp_init_attr.cap.max_inline_data)) {
|
|
|
|
BTL_ERROR(("ibv_create_qp: returned 0 byte(s) for max inline data"));
|
|
|
|
}
|
|
|
|
|
|
|
|
qp_attr.qp_state = IBV_QPS_INIT;
|
|
|
|
qp_attr.pkey_index = mca_btl_ud_component.ib_pkey_ix;
|
|
|
|
qp_attr.qkey = mca_btl_ud_component.ib_qkey;
|
|
|
|
qp_attr.port_num = ud_btl->port_num;
|
|
|
|
|
|
|
|
if(ibv_modify_qp(*qp, &qp_attr,
|
|
|
|
IBV_QP_STATE |
|
|
|
|
IBV_QP_PKEY_INDEX |
|
|
|
|
IBV_QP_PORT |
|
|
|
|
IBV_QP_QKEY)) {
|
|
|
|
BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
qp_attr.qp_state = IBV_QPS_RTR;
|
|
|
|
if(ibv_modify_qp(*qp, &qp_attr, IBV_QP_STATE)) {
|
|
|
|
BTL_ERROR(("error modifing QP to RTR errno says %s", strerror(errno)));
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
qp_attr.qp_state = IBV_QPS_RTS;
|
|
|
|
qp_attr.sq_psn = lcl_psn;
|
|
|
|
if (ibv_modify_qp(*qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) {
|
|
|
|
BTL_ERROR(("error modifying QP to RTS errno says %s", strerror(errno)));
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|