/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "ompi_config.h" #include #include #include "opal/prefetch.h" #include "ompi/types.h" #include "ompi/mca/pml/base/pml_base_sendreq.h" #include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss.h" #include "btl_ud.h" #include "btl_ud_endpoint.h" #include "btl_ud_proc.h" #include "btl_ud_frag.h" #include "ompi/class/ompi_free_list.h" #include #include static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint); static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint); /* * post a send to the work queue */ inline int mca_btl_ud_endpoint_post_send(mca_btl_ud_module_t* ud_btl, mca_btl_ud_endpoint_t * endpoint, mca_btl_ud_frag_t * frag) { struct ibv_qp* ib_qp; struct ibv_send_wr* bad_wr; /* Have to be careful here - UD adds a 40 byte header, but it is not included on the sending side. */ frag->sg_entry.length = frag->segment.seg_len + sizeof(mca_btl_ud_header_t); frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED; if(frag->size == ud_btl->super.btl_eager_limit) { if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ud_btl->sd_wqe_hp, -1) < 0)) { OPAL_THREAD_ADD32(&ud_btl->sd_wqe_hp, 1); opal_list_append(&ud_btl->pending_frags_hp, (opal_list_item_t*)frag); return OMPI_SUCCESS; } ib_qp = ud_btl->qp_hp; frag->wr_desc.sr_desc.wr.ud.ah = endpoint->rmt_ah_hp; frag->wr_desc.sr_desc.wr.ud.remote_qpn = endpoint->rem_addr.qp_num_hp; if(frag->sg_entry.length <= ud_btl->ib_inline_max) { frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED|IBV_SEND_INLINE; } } else { if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ud_btl->sd_wqe_lp, -1) < 0)) { OPAL_THREAD_ADD32(&ud_btl->sd_wqe_lp, 1); opal_list_append(&ud_btl->pending_frags_lp, (opal_list_item_t*)frag); return OMPI_SUCCESS; } ib_qp = ud_btl->qp_lp; frag->wr_desc.sr_desc.wr.ud.ah = endpoint->rmt_ah_lp; frag->wr_desc.sr_desc.wr.ud.remote_qpn = endpoint->rem_addr.qp_num_lp; } /*OPAL_OUTPUT((0, "Send to LID %d QP %d, len: %d %d %d, frag: %p", endpoint->rem_addr.lid, frag->wr_desc.sr_desc.wr.ud.remote_qpn, frag->sg_entry.length, frag->segment.seg_len, ud_btl->ib_inline_max, frag));*/ #if MCA_BTL_UD_ENABLE_PROFILE frag->tm = opal_sys_timer_get_cycles(); #endif MCA_BTL_UD_START_TIME(ibv_post_send); if(OPAL_UNLIKELY(ibv_post_send(ib_qp, &frag->wr_desc.sr_desc, &bad_wr))) { BTL_ERROR(("error posting send request errno says %d %s\n", errno, strerror(errno))); return OMPI_ERROR; } MCA_BTL_UD_END_TIME(ibv_post_send); return OMPI_SUCCESS; } OBJ_CLASS_INSTANCE(mca_btl_ud_endpoint_t, opal_list_item_t, mca_btl_ud_endpoint_construct, mca_btl_ud_endpoint_destruct); /* * Initialize state of the endpoint instance. * */ static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint) { /*OBJ_CONSTRUCT(&endpoint->endpoint_lock, opal_mutex_t);*/ memset(&endpoint->rem_addr, 0, sizeof(struct mca_btl_ud_addr_t)); } /* * Destroy a endpoint * */ static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) { } /* * Create the queue pair note that this is just the initial * queue pair creation and we need to get the remote queue pair * info from the peer before the qp is usable, */ int mca_btl_ud_endpoint_init_qp( mca_btl_base_module_t* btl, struct ibv_cq* cq, #ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ struct ibv_srq* srq, #endif struct ibv_qp** qp, uint32_t lcl_psn ) { mca_btl_ud_module_t* ud_btl = (mca_btl_ud_module_t*)btl; struct ibv_qp* my_qp; struct ibv_qp_attr qp_attr; struct ibv_qp_init_attr qp_init_attr; memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); qp_init_attr.send_cq = cq; qp_init_attr.recv_cq = cq; qp_init_attr.cap.max_send_wr = mca_btl_ud_component.rd_num; qp_init_attr.cap.max_recv_wr = mca_btl_ud_component.rd_num; qp_init_attr.cap.max_send_sge = mca_btl_ud_component.ib_sg_list_size; qp_init_attr.cap.max_recv_sge = mca_btl_ud_component.ib_sg_list_size; qp_init_attr.qp_type = IBV_QPT_UD; #ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ if(mca_btl_ud_component.use_srq) { qp_init_attr.srq = srq; } #endif my_qp = ibv_create_qp(ud_btl->ib_pd, &qp_init_attr); if(NULL == my_qp) { BTL_ERROR(("error creating qp errno says %s", strerror(errno))); return OMPI_ERROR; } (*qp) = my_qp; if(0 == (ud_btl->ib_inline_max = qp_init_attr.cap.max_inline_data)) { BTL_ERROR(("ibv_create_qp: returned 0 byte(s) for max inline data")); } qp_attr.qp_state = IBV_QPS_INIT; qp_attr.pkey_index = mca_btl_ud_component.ib_pkey_ix; qp_attr.qkey = mca_btl_ud_component.ib_qkey; qp_attr.port_num = ud_btl->port_num; if(ibv_modify_qp(*qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY)) { BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno))); return OMPI_ERROR; } qp_attr.qp_state = IBV_QPS_RTR; if(ibv_modify_qp(*qp, &qp_attr, IBV_QP_STATE)) { BTL_ERROR(("error modifing QP to RTR errno says %s", strerror(errno))); return OMPI_ERROR; } qp_attr.qp_state = IBV_QPS_RTS; qp_attr.sq_psn = lcl_psn; if (ibv_modify_qp(*qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { BTL_ERROR(("error modifying QP to RTS errno says %s", strerror(errno))); return OMPI_ERROR; } return OMPI_SUCCESS; }