1
1
openmpi/ompi/mca/btl/ofud/btl_ofud_endpoint.c
Ralph Castain 9613b3176c Effectively revert the orte_output system and return to direct use of opal_output at all levels. Retain the orte_show_help subsystem to allow aggregation of show_help messages at the HNP.
After much work by Jeff and myself, and quite a lot of discussion, it has become clear that we simply cannot resolve the infinite loops caused by RML-involved subsystems calling orte_output. The original rationale for the change to orte_output has also been reduced by shifting the output of XML-formatted vs human readable messages to an alternative approach.

I have globally replaced the orte_output/ORTE_OUTPUT calls in the code base, as well as the corresponding .h file name. I have test compiled and run this on the various environments within my reach, so hopefully this will prove minimally disruptive.

This commit was SVN r18619.
2008-06-09 14:53:58 +00:00

132 строки
4.5 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2007 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <errno.h>
#include <string.h>
#include "ompi_config.h"
#include "opal/prefetch.h"
#include "ompi/types.h"
#include "ompi/class/ompi_free_list.h"
#include "btl_ofud.h"
#include "btl_ofud_endpoint.h"
#include "btl_ofud_frag.h"
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
/* First, we check the downcounter on the endpoint.
If it is 0, we queue this frag on the endpoint.
Otherwise, we check the BTL downcounter.
If it is 0, we queue this frag on the BTL.
Otherwise, we post the send. */
#define CHECK_FRAG_QUEUES(sd_wqe, lock, queue, frag) \
do { \
if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&(sd_wqe), -1) < 0)) { \
OPAL_THREAD_ADD32(&(sd_wqe), 1); \
OPAL_THREAD_LOCK(&(lock)); \
opal_list_append(&(queue), \
(opal_list_item_t*)(frag)); \
OPAL_THREAD_UNLOCK(&(lock)); \
return OMPI_SUCCESS; \
} \
} while(0);
/*
* Post a send to the work queue
*/
int mca_btl_ud_endpoint_post_send(mca_btl_ud_module_t* ud_btl,
mca_btl_ud_frag_t* frag)
{
struct ibv_qp* ib_qp;
struct ibv_send_wr* bad_wr;
struct ibv_send_wr* wr = &frag->wr_desc.sr_desc;
mca_btl_ud_endpoint_t* endpoint = frag->endpoint;
int ret;
/* Have to be careful here - UD adds a 40 byte header, but it is not
included on the sending side. */
frag->sg_entry.length = frag->segment.seg_len + sizeof(mca_btl_ud_header_t);
wr->send_flags = IBV_SEND_SIGNALED;
CHECK_FRAG_QUEUES(ud_btl->sd_wqe,
ud_btl->ud_lock, ud_btl->pending_frags, frag);
/* We avoid locking here by allowing our stripe counter to count
until it wraps around uint32_t. This keeps the mod operation
out of the critical section, allowing us to use OPAL_THREAD_ADD32
instead of a full mutex. */
ib_qp = ud_btl->ib_qp[ud_btl->ib_qp_next % MCA_BTL_UD_NUM_QP];
OPAL_THREAD_ADD32(((int32_t*)&ud_btl->ib_qp_next), 1);
wr->wr.ud.ah = endpoint->rmt_ah;
wr->wr.ud.remote_qpn = endpoint->rem_addr.qp_num;
if(frag->sg_entry.length <= ud_btl->ib_inline_max) {
wr->send_flags =
IBV_SEND_SIGNALED|IBV_SEND_INLINE;
}
/*frag->hdr->src_qpnum = ud_btl->addr.qp_num;*/
MCA_BTL_UD_START_TIME(ibv_post_send);
if(OPAL_UNLIKELY((ret = ibv_post_send(ib_qp, wr, &bad_wr)))) {
#if 0
opal_output(0, "ep->sd_wqe %d btl->sd_wqe %d len %d ib_qp_next %d",
endpoint->sd_wqe, ud_btl->sd_wqe,
frag->sg_entry.length, ud_btl->ib_qp_next);
#endif
BTL_ERROR(("error posting send request: %d %s\n", ret, strerror(ret)));
}
MCA_BTL_UD_END_TIME(ibv_post_send);
return OMPI_SUCCESS;
}
OBJ_CLASS_INSTANCE(mca_btl_ud_endpoint_t,
opal_list_item_t, mca_btl_ud_endpoint_construct,
mca_btl_ud_endpoint_destruct);
/*
* Construct/destruct an endpoint structure.
*/
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
{
#if OMPI_ENABLE_DEBUG
memset(&endpoint->rem_addr, 0, sizeof(struct mca_btl_ud_addr_t));
#endif
}
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
{
}