9613b3176c
After much work by Jeff and myself, and quite a lot of discussion, it has become clear that we simply cannot resolve the infinite loops caused by RML-involved subsystems calling orte_output. The original rationale for the change to orte_output has also been reduced by shifting the output of XML-formatted vs human readable messages to an alternative approach. I have globally replaced the orte_output/ORTE_OUTPUT calls in the code base, as well as the corresponding .h file name. I have test compiled and run this on the various environments within my reach, so hopefully this will prove minimally disruptive. This commit was SVN r18619.
132 строки
4.5 KiB
C
132 строки
4.5 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2006 Sandia National Laboratories. All rights
|
|
* reserved.
|
|
* Copyright (c) 2007 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
|
|
#include "ompi_config.h"
|
|
#include "opal/prefetch.h"
|
|
#include "ompi/types.h"
|
|
#include "ompi/class/ompi_free_list.h"
|
|
|
|
#include "btl_ofud.h"
|
|
#include "btl_ofud_endpoint.h"
|
|
#include "btl_ofud_frag.h"
|
|
|
|
|
|
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
|
|
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
|
|
|
|
|
|
/* First, we check the downcounter on the endpoint.
|
|
If it is 0, we queue this frag on the endpoint.
|
|
Otherwise, we check the BTL downcounter.
|
|
If it is 0, we queue this frag on the BTL.
|
|
Otherwise, we post the send. */
|
|
#define CHECK_FRAG_QUEUES(sd_wqe, lock, queue, frag) \
|
|
do { \
|
|
if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&(sd_wqe), -1) < 0)) { \
|
|
OPAL_THREAD_ADD32(&(sd_wqe), 1); \
|
|
OPAL_THREAD_LOCK(&(lock)); \
|
|
opal_list_append(&(queue), \
|
|
(opal_list_item_t*)(frag)); \
|
|
OPAL_THREAD_UNLOCK(&(lock)); \
|
|
return OMPI_SUCCESS; \
|
|
} \
|
|
} while(0);
|
|
|
|
|
|
/*
|
|
* Post a send to the work queue
|
|
*/
|
|
|
|
int mca_btl_ud_endpoint_post_send(mca_btl_ud_module_t* ud_btl,
|
|
mca_btl_ud_frag_t* frag)
|
|
{
|
|
struct ibv_qp* ib_qp;
|
|
struct ibv_send_wr* bad_wr;
|
|
struct ibv_send_wr* wr = &frag->wr_desc.sr_desc;
|
|
mca_btl_ud_endpoint_t* endpoint = frag->endpoint;
|
|
int ret;
|
|
|
|
/* Have to be careful here - UD adds a 40 byte header, but it is not
|
|
included on the sending side. */
|
|
frag->sg_entry.length = frag->segment.seg_len + sizeof(mca_btl_ud_header_t);
|
|
wr->send_flags = IBV_SEND_SIGNALED;
|
|
|
|
CHECK_FRAG_QUEUES(ud_btl->sd_wqe,
|
|
ud_btl->ud_lock, ud_btl->pending_frags, frag);
|
|
|
|
/* We avoid locking here by allowing our stripe counter to count
|
|
until it wraps around uint32_t. This keeps the mod operation
|
|
out of the critical section, allowing us to use OPAL_THREAD_ADD32
|
|
instead of a full mutex. */
|
|
ib_qp = ud_btl->ib_qp[ud_btl->ib_qp_next % MCA_BTL_UD_NUM_QP];
|
|
OPAL_THREAD_ADD32(((int32_t*)&ud_btl->ib_qp_next), 1);
|
|
|
|
wr->wr.ud.ah = endpoint->rmt_ah;
|
|
wr->wr.ud.remote_qpn = endpoint->rem_addr.qp_num;
|
|
|
|
if(frag->sg_entry.length <= ud_btl->ib_inline_max) {
|
|
wr->send_flags =
|
|
IBV_SEND_SIGNALED|IBV_SEND_INLINE;
|
|
}
|
|
|
|
/*frag->hdr->src_qpnum = ud_btl->addr.qp_num;*/
|
|
|
|
MCA_BTL_UD_START_TIME(ibv_post_send);
|
|
if(OPAL_UNLIKELY((ret = ibv_post_send(ib_qp, wr, &bad_wr)))) {
|
|
#if 0
|
|
opal_output(0, "ep->sd_wqe %d btl->sd_wqe %d len %d ib_qp_next %d",
|
|
endpoint->sd_wqe, ud_btl->sd_wqe,
|
|
frag->sg_entry.length, ud_btl->ib_qp_next);
|
|
#endif
|
|
BTL_ERROR(("error posting send request: %d %s\n", ret, strerror(ret)));
|
|
|
|
}
|
|
MCA_BTL_UD_END_TIME(ibv_post_send);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_btl_ud_endpoint_t,
|
|
opal_list_item_t, mca_btl_ud_endpoint_construct,
|
|
mca_btl_ud_endpoint_destruct);
|
|
|
|
/*
|
|
* Construct/destruct an endpoint structure.
|
|
*/
|
|
|
|
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
|
{
|
|
#if OMPI_ENABLE_DEBUG
|
|
memset(&endpoint->rem_addr, 0, sizeof(struct mca_btl_ud_addr_t));
|
|
#endif
|
|
}
|
|
|
|
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
|
{
|
|
}
|
|
|