1
1
The UD BTL isn't gone - the latest version is in my afriedle-ud branch.  This version on the trunk was very old, ompi_ignore'd, lacked performance, and probably contained bugs.  The maintained version on my branch is working solid, and will eventually come back, but not for v1.2.

This commit was SVN r12144.
Этот коммит содержится в:
Andrew Friedley 2006-10-17 18:59:21 +00:00
родитель 338000edf2
Коммит 16769e64fe
14 изменённых файлов: 0 добавлений и 2914 удалений

Просмотреть файл

Просмотреть файл

@ -1,2 +0,0 @@
afriedle
afriedl

Просмотреть файл

@ -1,68 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Sandia National Laboratories. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
AM_CPPFLAGS=$(btl_ud_CPPFLAGS)
sources = \
btl_ud.c \
btl_ud.h \
btl_ud_component.c \
btl_ud_endpoint.c \
btl_ud_endpoint.h \
btl_ud_frag.c \
btl_ud_frag.h \
btl_ud_proc.c \
btl_ud_proc.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_btl_ud_DSO
lib =
lib_sources =
component = mca_btl_ud.la
component_sources = $(sources)
else
lib = libmca_btl_ud.la
lib_sources = $(sources)
component =
component_sources =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component)
mca_btl_ud_la_SOURCES = $(component_sources)
mca_btl_ud_la_LDFLAGS = -module -avoid-version $(btl_ud_LDFLAGS)
mca_btl_ud_la_LIBADD = \
$(btl_ud_LIBS) \
$(top_ompi_builddir)/ompi/libmpi.la \
$(top_ompi_builddir)/orte/liborte.la \
$(top_ompi_builddir)/opal/libopal.la
noinst_LTLIBRARIES = $(lib)
libmca_btl_ud_la_SOURCES = $(lib_sources)
libmca_btl_ud_la_LDFLAGS= -module -avoid-version $(btl_ud_LDFLAGS)
libmca_btl_ud_la_LIBADD=$(btl_ud_LIBS)

Просмотреть файл

@ -1,694 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <inttypes.h>
#include "opal/prefetch.h"
#include "opal/util/output.h"
#include "opal/util/if.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "btl_ud.h"
#include "btl_ud_frag.h"
#include "btl_ud_proc.h"
#include "btl_ud_endpoint.h"
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
#include <errno.h>
#include <string.h>
#include <math.h>
mca_btl_ud_module_t mca_btl_ud_module = {
{
&mca_btl_ud_component.super,
0, /* max size of first fragment */
0, /* min send fragment size */
0, /* max send fragment size */
0, /* min rdma fragment size */
0, /* max rdma fragment size */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
MCA_BTL_FLAGS_SEND,
mca_btl_ud_add_procs,
mca_btl_ud_del_procs,
mca_btl_ud_register,
mca_btl_ud_finalize,
/* we need alloc free, pack */
mca_btl_ud_alloc,
mca_btl_ud_free,
mca_btl_ud_prepare_src,
NULL, /*mca_btl_ud_prepare_dst */
mca_btl_ud_send,
NULL, /*mca_btl_ud_put */
NULL, /*mca_btl_ud_get */
mca_btl_ud_dump,
NULL, /* mpool */
NULL /* register error */
}
};
/*
* add a proc to this btl module
* creates an endpoint that is setup on the
* first send to the endpoint
*/
int mca_btl_ud_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **ompi_procs,
struct mca_btl_base_endpoint_t** peers,
ompi_bitmap_t* reachable)
{
mca_btl_ud_module_t* ud_btl = (mca_btl_ud_module_t*)btl;
struct ibv_ah_attr ah_attr;
int i, rc;
for(i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_ud_proc_t* ib_proc;
mca_btl_base_endpoint_t* ib_peer;
if(NULL == (ib_proc = mca_btl_ud_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
* Check to make sure that the peer has at least as many interface
* addresses exported as we are trying to use. If not, then
* don't bind this PTL instance to the proc.
*/
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
/* The btl_proc datastructure is shared by all IB PTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
ib_peer = OBJ_NEW(mca_btl_ud_endpoint_t);
if(NULL == ib_peer) {
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
rc = mca_btl_ud_proc_insert(ib_proc, ib_peer);
if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(ib_peer);
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
continue;
}
BTL_VERBOSE(("modex_recv HP QP num %d, LP QP num %d, LID = %d",
ib_peer->rem_addr.qp_num_hp,
ib_peer->rem_addr.qp_num_lp,
ib_peer->rem_addr.lid));
/* Set up IB address handles for the endpoint */
ah_attr.is_global = 0;
ah_attr.dlid = ib_peer->rem_addr.lid;
ah_attr.sl = mca_btl_ud_component.ib_service_level;
ah_attr.src_path_bits = mca_btl_ud_component.ib_src_path_bits;
ah_attr.port_num = ud_btl->port_num;
ib_peer->rmt_ah_hp = ibv_create_ah(ud_btl->ib_pd, &ah_attr);
if(NULL == ib_peer->rmt_ah_hp) {
BTL_ERROR(("error creating address handle errno says %s\n",
strerror(errno)));
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
continue;
}
ib_peer->rmt_ah_lp = ibv_create_ah(ud_btl->ib_pd, &ah_attr);
if(NULL == ib_peer) {
BTL_ERROR(("error creating address handle errno says %s\n",
strerror(errno)));
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
continue;
}
ompi_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
peers[i] = ib_peer;
}
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_ud_component.use_srq) {
ud_btl->rd_num = mca_btl_ud_component.rd_num +
log2(nprocs) * mca_btl_ud_component.srq_rd_per_peer;
if(ud_btl->rd_num > mca_btl_ud_component.srq_rd_max)
ud_btl->rd_num = mca_btl_ud_component.srq_rd_max;
}
#endif
return OMPI_SUCCESS;
}
/*
* delete the proc as reachable from this btl module
*/
int mca_btl_ud_del_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t ** peers)
{
BTL_DEBUG(("TODO\n"));
return OMPI_SUCCESS;
}
/*
*Register callback function to support send/recv semantics
*/
int mca_btl_ud_register(struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
mca_btl_ud_module_t* ud_btl = (mca_btl_ud_module_t*) btl;
OPAL_THREAD_LOCK(&ud_btl->ib_lock);
ud_btl->ib_reg[tag].cbfunc = cbfunc;
ud_btl->ib_reg[tag].cbdata = cbdata;
OPAL_THREAD_UNLOCK(&ud_btl->ib_lock);
return OMPI_SUCCESS;
}
/**
* Allocate a segment.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*
* When allocating a segment we pull a pre-alllocated segment
* from one of two free lists, an eager list and a max list
*/
mca_btl_base_descriptor_t* mca_btl_ud_alloc(
struct mca_btl_base_module_t* btl,
size_t size)
{
mca_btl_ud_frag_t* frag;
int rc;
if(size <= mca_btl_ud_component.eager_limit) {
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
frag->segment.seg_len = size;
} else if(size <= mca_btl_ud_component.max_send_size) {
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
frag->segment.seg_len = size;
} else {
return NULL;
}
return (mca_btl_base_descriptor_t*)frag;
}
/**
* Return a segment
*
* Return the segment to the appropriate
* preallocated segment list
*/
int mca_btl_ud_free(struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
mca_btl_ud_frag_t* frag = (mca_btl_ud_frag_t*)des;
if(frag->size == 0) {
btl->btl_mpool->mpool_release(btl->btl_mpool,
(mca_mpool_base_registration_t*)frag->ud_reg);
MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag);
}
else if(frag->size == mca_btl_ud_component.max_send_size){
MCA_BTL_IB_FRAG_RETURN_MAX(btl, frag);
} else if(frag->size == mca_btl_ud_component.eager_limit){
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
} else {
BTL_ERROR(("invalid descriptor"));
}
return OMPI_SUCCESS;
}
/**
* register user buffer or pack
* data into pre-registered buffer and return a
* descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*
* prepare source's behavior depends on the following:
* Has a valid memory registration been passed to prepare_src?
* if so we attempt to use the pre-registred user-buffer, if the memory registration
* is to small (only a portion of the user buffer) then we must reregister the user buffer
* Has the user requested the memory to be left pinned?
* if so we insert the memory registration into a memory tree for later lookup, we
* may also remove a previous registration if a MRU (most recently used) list of
* registions is full, this prevents resources from being exhausted.
* Is the requested size larger than the btl's max send size?
* if so and we aren't asked to leave the registration pinned than we register the memory if
* the users buffer is contiguous
* Otherwise we choose from two free lists of pre-registered memory in which to pack the data into.
*
*/
mca_btl_base_descriptor_t* mca_btl_ud_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size
)
{
mca_btl_ud_module_t* ud_btl;
mca_btl_ud_frag_t* frag;
mca_mpool_openib_registration_t * ud_reg;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int32_t free_after;
int rc;
ud_btl = (mca_btl_ud_module_t*) btl;
ud_reg = (mca_mpool_openib_registration_t*) registration;
if(OPAL_UNLIKELY(NULL != ud_reg &&
0 == ompi_convertor_need_buffers(convertor))) {
/* the memory is already pinned and we have contiguous user data */
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor,
&iov, &iov_count, &max_data, &free_after);
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
/*frag->sg_entry.length = max_data;*/
frag->sg_entry.lkey = ud_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long)iov.iov_base;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->ud_reg = ud_reg;
btl->btl_mpool->mpool_retain(btl->btl_mpool, registration);
return &frag->base;
} else if(OPAL_UNLIKELY(max_data > btl->btl_max_send_size &&
ompi_convertor_need_buffers(convertor) == 0 && reserve == 0)) {
/* The user buffer is contigous and we are asked to send more than
the max send size. */
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor,
&iov, &iov_count, &max_data, &free_after);
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->base.des_flags = 0;
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, iov.iov_base,
max_data, 0, (mca_mpool_base_registration_t**) &ud_reg);
if(OMPI_SUCCESS != rc || NULL == ud_reg) {
BTL_ERROR(("mpool_register(%p,%lu) failed",
iov.iov_base, max_data));
MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag);
return NULL;
}
/*frag->sg_entry.length = max_data;*/
frag->sg_entry.lkey = ud_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long) iov.iov_base;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->ud_reg = ud_reg;
return &frag->base;
} else
if (max_data + reserve <= btl->btl_eager_limit) {
/* the data is small enough to fit in the eager frag and
either we received no prepinned memory or leave pinned is
not set */
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
if(OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor,
&iov, &iov_count, &max_data, &free_after);
if(OPAL_UNLIKELY(rc < 0)) {
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->sg_entry.length = max_data + reserve + sizeof(mca_btl_ud_header_t);
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
*size = max_data;
return &frag->base;
} else {
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
if(OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
if(OPAL_UNLIKELY(max_data + reserve > btl->btl_max_send_size)) {
max_data = btl->btl_max_send_size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor,
&iov, &iov_count, &max_data, &free_after);
if(OPAL_UNLIKELY(rc < 0)) {
MCA_BTL_IB_FRAG_RETURN_MAX(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags=0;
*size = max_data;
return &frag->base;
}
return NULL;
}
int mca_btl_ud_finalize(struct mca_btl_base_module_t* btl)
{
return OMPI_SUCCESS;
}
/*
* Initiate a send.
*/
int mca_btl_ud_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
int rc;
mca_btl_ud_frag_t* frag = (mca_btl_ud_frag_t*)descriptor;
OPAL_PREFETCH(frag, 1, 1);
MCA_BTL_UD_START_TIME(post_send);
frag->endpoint = endpoint;
frag->hdr->tag = tag;
/*OPAL_THREAD_LOCK(&endpoint->endpoint_lock);*/
rc = mca_btl_ud_endpoint_post_send(
(mca_btl_ud_module_t*)btl, endpoint, frag);
/*OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);*/
MCA_BTL_UD_END_TIME(post_send);
return rc;
}
/*
* Initialize the btl module by allocating a protection domain
* and creating both the high and low priority completion queues
*/
int mca_btl_ud_module_init(mca_btl_ud_module_t *ud_btl)
{
struct mca_mpool_base_resources_t mpool_resources;
struct ibv_context *ctx = ud_btl->ib_dev_context;
struct ibv_recv_wr* bad_wr;
mca_btl_ud_frag_t* frag;
ompi_free_list_item_t* item;
uint32_t length;
int32_t rc, i;
ud_btl->ib_pd = ibv_alloc_pd(ctx);
if(NULL == ud_btl->ib_pd) {
BTL_ERROR(("error allocating pd for %s errno says %s\n",
ibv_get_device_name(ud_btl->ib_dev), strerror(errno)));
return OMPI_ERROR;
}
mpool_resources.ib_pd = ud_btl->ib_pd;
ud_btl->super.btl_mpool =
mca_mpool_base_module_create(mca_btl_ud_component.ib_mpool_name,
&ud_btl->super, &mpool_resources);
if(NULL == ud_btl->super.btl_mpool) {
BTL_ERROR(("error creating openib memory pool! aborting ud btl initialization"));
return OMPI_ERROR;
}
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_ud_component.use_srq) {
struct ibv_srq_init_attr attr;
attr.attr.max_wr = mca_btl_ud_component.srq_rd_max;
attr.attr.max_sge = mca_btl_ud_component.ib_sg_list_size;
ud_btl->srd_posted_hp = 0;
ud_btl->srd_posted_lp = 0;
ud_btl->srq_hp = ibv_create_srq(ud_btl->ib_pd, &attr);
if(NULL == ud_btl->srq_hp) {
BTL_ERROR(("error in ibv_create_srq\n"));
return OMPI_ERROR;
}
ud_btl->srq_lp = ibv_create_srq(ud_btl->ib_pd, &attr);
if(NULL == ud_btl->srq_hp) {
BTL_ERROR(("error in ibv_create_srq\n"));
return OMPI_ERROR;
}
} else {
ud_btl->srq_hp = NULL;
ud_btl->srq_lp = NULL;
}
#endif
/* Create the low and high priority completion queues */
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
ud_btl->ib_cq_lp =
ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size, NULL);
ud_btl->ib_cq_hp =
ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size, NULL);
#else
ud_btl->ib_cq_lp = ibv_create_cq(ctx,
mca_btl_ud_component.ib_cq_size, NULL, NULL, 0);
ud_btl->ib_cq_hp = ibv_create_cq(ctx,
mca_btl_ud_component.ib_cq_size, NULL, NULL, 0);
#endif
if(NULL == ud_btl->ib_cq_lp) {
BTL_ERROR(("error creating low priority cq for %s errno says %s\n",
ibv_get_device_name(ud_btl->ib_dev), strerror(errno)));
return OMPI_ERROR;
}
if(NULL == ud_btl->ib_cq_hp) {
BTL_ERROR(("error creating high priority cq for %s errno says %s\n",
ibv_get_device_name(ud_btl->ib_dev), strerror(errno)));
return OMPI_ERROR;
}
/* Set up our packet sequence numbers */
ud_btl->addr.psn_hp = lrand48() & 0xffffff;
ud_btl->addr.psn_lp = lrand48() & 0xffffff;
/* Set up the QPs for this BTL */
if(OMPI_SUCCESS != mca_btl_ud_endpoint_init_qp(&ud_btl->super,
ud_btl->ib_cq_hp,
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
ud_btl->srq_hp,
#endif
&ud_btl->qp_hp,
ud_btl->addr.psn_hp)) {
return OMPI_ERROR;
}
if(OMPI_SUCCESS != mca_btl_ud_endpoint_init_qp(&ud_btl->super,
ud_btl->ib_cq_lp,
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
ud_btl->srq_lp,
#endif
&ud_btl->qp_lp,
ud_btl->addr.psn_lp)) {
return OMPI_ERROR;
}
/* Place our QP numbers in our local address information */
ud_btl->addr.qp_num_hp = ud_btl->qp_hp->qp_num;
ud_btl->addr.qp_num_lp = ud_btl->qp_lp->qp_num;
OBJ_CONSTRUCT(&ud_btl->ib_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ud_btl->send_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->send_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->send_free_frag, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->recv_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->recv_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->pending_frags_hp, opal_list_t);
OBJ_CONSTRUCT(&ud_btl->pending_frags_lp, opal_list_t);
/* Initialize pool of send fragments */
length = sizeof(mca_btl_ud_frag_t) + sizeof(mca_btl_ud_header_t) +
ud_btl->super.btl_eager_limit + 2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_eager,
length,
OBJ_CLASS(mca_btl_ud_send_frag_eager_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
ompi_free_list_init(&ud_btl->recv_free_eager,
length + sizeof(mca_btl_ud_ib_header_t),
OBJ_CLASS(mca_btl_ud_recv_frag_eager_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
length = sizeof(mca_btl_ud_frag_t) + sizeof(mca_btl_ud_header_t) +
ud_btl->super.btl_max_send_size + 2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_max,
length,
OBJ_CLASS(mca_btl_ud_send_frag_max_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
/* Initialize pool of receive fragments */
ompi_free_list_init (&ud_btl->recv_free_max,
length + sizeof(mca_btl_ud_ib_header_t),
OBJ_CLASS (mca_btl_ud_recv_frag_max_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
length = sizeof(mca_btl_ud_frag_t) +
sizeof(mca_btl_ud_header_t) + 2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_frag,
length,
OBJ_CLASS(mca_btl_ud_send_frag_frag_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
/* Post receive descriptors */
for(i = 0; i < ud_btl->rd_num; i++) {
/* High Priority (eager) */
OMPI_FREE_LIST_WAIT(&ud_btl->recv_free_eager, item, rc);
frag = (mca_btl_ud_frag_t*)item;
frag->sg_entry.length = frag->size +
sizeof(mca_btl_ud_header_t) + sizeof(mca_btl_ud_ib_header_t);
if(ibv_post_recv(ud_btl->qp_hp,
&frag->wr_desc.rd_desc, &bad_wr)) {
BTL_ERROR(("error posting recv, errno %s\n", strerror(errno)));
return OMPI_ERROR;
}
/* Low Priority (max) */
OMPI_FREE_LIST_WAIT(&ud_btl->recv_free_max, item, rc);
frag = (mca_btl_ud_frag_t*)item;
frag->sg_entry.length = frag->size +
sizeof(mca_btl_ud_header_t) + sizeof(mca_btl_ud_ib_header_t);
if(ibv_post_recv(ud_btl->qp_lp,
&frag->wr_desc.rd_desc, &bad_wr)) {
BTL_ERROR(("error posting recv, errno %s\n", strerror(errno)));
return OMPI_ERROR;
}
}
return OMPI_SUCCESS;
}
/*
* Dump profiling information
*/
void mca_btl_ud_dump(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
int verbose)
{
mca_btl_base_dump(btl, endpoint, verbose);
}

Просмотреть файл

@ -1,603 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_UD_H
#define MCA_PTL_UD_H
/* Standard system includes */
#include <sys/types.h>
#include <string.h>
/* Open MPI includes */
#include "ompi/class/ompi_free_list.h"
#include "ompi/class/ompi_bitmap.h"
#include "orte/class/orte_pointer_array.h"
#include "opal/class/opal_value_array.h"
#include "opal/event/event.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "opal/util/output.h"
#include "opal/sys/timer.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "btl_ud_endpoint.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#define MCA_BTL_IB_LEAVE_PINNED 1
/**
* UD Infiniband (IB) BTL component.
*/
struct mca_btl_ud_component_t {
mca_btl_base_component_1_0_1_t super; /**< base BTL component */
uint32_t ib_max_btls;
/**< maximum number of hcas available to the IB component */
uint32_t ib_num_btls;
/**< number of hcas available to the IB component */
struct mca_btl_ud_module_t *ud_btls;
/**< array of available PTLs */
int ib_free_list_num;
/**< initial size of free lists */
int ib_free_list_max;
/**< maximum size of free lists */
int ib_free_list_inc;
/**< number of elements to alloc when growing free lists */
opal_list_t ib_procs;
/**< list of ib proc structures */
opal_mutex_t ib_lock;
/**< lock for accessing module state */
char* ib_mpool_name;
/**< name of ib memory pool */
int32_t sd_num; /**< maximum number of send descriptors to post to a QP */
int32_t rd_num; /**< number of receive descriptors to post to each QP */
int32_t srq_rd_max; /* maximum number of receive descriptors posted */
int32_t srq_rd_per_peer; /* number of receive descriptors to post per log2(peers) in SRQ mode */
int32_t srq_sd_max; /* maximum number of send descriptors posted */
size_t eager_limit;
size_t max_send_size;
uint32_t reg_mru_len;
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
uint32_t use_srq;
#endif
uint32_t ib_cq_size; /**< Max outstanding CQE on the CQ */
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ*/
uint32_t ib_pkey_ix;
uint32_t ib_qkey;
uint32_t ib_service_level;
uint32_t ib_src_path_bits;
}; typedef struct mca_btl_ud_component_t mca_btl_ud_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_ud_component_t mca_btl_ud_component;
typedef mca_btl_base_recv_reg_t mca_btl_ud_recv_reg_t;
/**
* Profiling variables
*/
#if OMPI_ENABLE_DEBUG
#define MCA_BTL_UD_ENABLE_PROFILE 0
#else
#define MCA_BTL_UD_ENABLE_PROFILE 0
#endif
#if MCA_BTL_UD_ENABLE_PROFILE
#define MCA_BTL_UD_PROFILE_VAR(var) \
opal_timer_t avg_ ## var; \
opal_timer_t cnt_ ## var; \
opal_timer_t tmp_ ## var
struct mca_btl_ud_profile_t
{
MCA_BTL_UD_PROFILE_VAR(post_send);
MCA_BTL_UD_PROFILE_VAR(endpoint_send_conn);
MCA_BTL_UD_PROFILE_VAR(ibv_post_send);
MCA_BTL_UD_PROFILE_VAR(full_send);
};
typedef struct mca_btl_ud_profile_t mca_btl_ud_profile_t;
extern mca_btl_ud_profile_t mca_btl_ud_profile;
#endif
/**
* IB PTL Interface
*/
struct mca_btl_ud_module_t {
mca_btl_base_module_t super; /**< base PTL interface */
mca_btl_ud_recv_reg_t ib_reg[256];
uint8_t port_num; /**< ID of the PORT */
struct ibv_device* ib_dev; /* the ib device */
struct ibv_context* ib_dev_context;
struct ibv_pd* ib_pd;
struct ibv_cq* ib_cq_hp;
struct ibv_cq* ib_cq_lp;
struct mca_btl_ud_addr_t addr;
/**< local address information */
ompi_free_list_t send_free_eager;
/**< free list of eager buffer descriptors */
ompi_free_list_t send_free_max;
/**< free list of max buffer descriptors */
ompi_free_list_t send_free_frag;
/**< free list of frags only... used for pining memory */
ompi_free_list_t recv_free_eager;
/**< High priority free list of buffer descriptors */
ompi_free_list_t recv_free_max;
/**< Low priority free list of buffer descriptors */
opal_list_t pending_frags_hp;
/**< list of pending high priority frags */
opal_list_t pending_frags_lp;
/**< list of pending low priority frags */
opal_mutex_t ib_lock; /**< module level lock */
size_t ib_inline_max; /**< max size of inline send*/
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
struct ibv_srq *srq_hp;
struct ibv_srq *srq_lp;
int32_t srd_posted_hp;
int32_t srd_posted_lp;
#endif
int32_t rd_num;
#if 0
int32_t rd_posted_hp; /**< number of high priority descriptors posted */
int32_t rd_posted_lp; /**< number of low priority descriptors posted */
#endif
int32_t sd_wqe_hp; /**< number of available send wqe entries */
int32_t sd_wqe_lp; /**< number of available send wqe entries */
struct ibv_qp* qp_hp;
struct ibv_qp* qp_lp;
/* Local QP (Low and High) */
}; typedef struct mca_btl_ud_module_t mca_btl_ud_module_t;
struct mca_btl_ud_frag_t;
extern mca_btl_ud_module_t mca_btl_ud_module;
/**
* Register IB component parameters with the MCA framework
*/
extern int mca_btl_ud_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_ud_component_close(void);
/**
* IB component initialization.
*
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
*
* (1) read interface list from kernel and compare against component parameters
* then create a BTL instance for selected interfaces
* (2) setup IB listen socket for incoming connection attempts
* (3) publish BTL addressing info
*
*/
extern mca_btl_base_module_t** mca_btl_ud_component_init(
int *num_btl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
* IB component progress.
*/
extern int mca_btl_ud_component_progress(void);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param btl (IN) BTL module
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BTL of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
int mca_btl_ud_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata
);
/**
* Cleanup any resources held by the BTL.
*
* @param btl BTL instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_btl_ud_finalize(
struct mca_btl_base_module_t* btl
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN)
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param peers (OUT) Set of (optional) peer addressing info.
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_btl_ud_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers,
ompi_bitmap_t* reachable
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN) BTL instance
* @param nproc (IN) Number of processes.
* @param procs (IN) Set of processes.
* @param peers (IN) Set of peer data structures.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_btl_ud_del_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers
);
/**
* PML->BTL Initiate a send of the specified size.
*
* @param btl (IN) BTL instance
* @param btl_base_peer (IN) BTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting BTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
*/
extern int mca_btl_ud_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag
);
/**
* Allocate a descriptor.
*
* @param btl (IN) BTL module
* @param size (IN) Requested descriptor size.
*/
extern mca_btl_base_descriptor_t* mca_btl_ud_alloc(
struct mca_btl_base_module_t* btl,
size_t size);
/**
* Return a segment allocated by this BTL.
*
* @param btl (IN) BTL module
* @param descriptor (IN) Allocated descriptor.
*/
extern int mca_btl_ud_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des);
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t* mca_btl_ud_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size
);
/**
* Return a send fragment to the modules free list.
*
* @param btl (IN) BTL instance
* @param frag (IN) IB send fragment
*
*/
extern void mca_btl_ud_send_frag_return(
struct mca_btl_base_module_t* btl,
struct mca_btl_ud_frag_t*
);
int mca_btl_ud_module_init(mca_btl_ud_module_t* ud_btl);
void mca_btl_ud_dump(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
int verbose);
/*
* Profiling stuff
*/
#if MCA_BTL_UD_ENABLE_PROFILE
#define MCA_BTL_UD_START_TIME(var) \
((mca_btl_ud_profile.tmp_ ## var) = opal_sys_timer_get_cycles())
#define MCA_BTL_UD_END_TIME(var) \
do { \
mca_btl_ud_profile.avg_ ## var += \
opal_sys_timer_get_cycles() - mca_btl_ud_profile.tmp_ ## var; \
mca_btl_ud_profile.cnt_ ## var++; \
} while(0)
#define MCA_BTL_UD_SHOW_TIME(var) \
BTL_VERBOSE((" " #var " avg %lu cnt %lu", \
(mca_btl_ud_profile.avg_ ## var) / (mca_btl_ud_profile.cnt_ ## var), \
mca_btl_ud_profile.cnt_ ## var));
#else
#define MCA_BTL_UD_START_TIME(var)
#define MCA_BTL_UD_END_TIME(var)
#define MCA_BTL_UD_SHOW_TIME(var)
#endif
/*
* Post non-SRQ receive buffers
*/
#define MCA_BTL_UD_ENDPOINT_POST_RR_HIGH(ud_btl, \
additional) \
{ \
do { \
OPAL_THREAD_LOCK(&ud_btl->ib_lock); \
if(ud_btl->rd_posted_hp <= mca_btl_ud_component.rd_low+additional && \
ud_btl->rd_posted_hp < ud_btl->rd_num) { \
MCA_BTL_UD_ENDPOINT_POST_RR_SUB(ud_btl->rd_num - \
ud_btl->rd_posted_hp, \
&ud_btl->recv_free_eager, \
ud_btl->rd_posted_hp, \
ud_btl->qp_hp); \
} \
OPAL_THREAD_UNLOCK(&ud_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_UD_ENDPOINT_POST_RR_LOW(ud_btl, \
additional) { \
do { \
OPAL_THREAD_LOCK(&ud_btl->ib_lock); \
if(ud_btl->rd_posted_lp <= mca_btl_ud_component.rd_low+additional && \
ud_btl->rd_posted_lp < ud_btl->rd_num){ \
MCA_BTL_UD_ENDPOINT_POST_RR_SUB(ud_btl->rd_num - \
ud_btl->rd_posted_lp, \
&ud_btl->recv_free_max, \
ud_btl->rd_posted_lp, \
ud_btl->qp_lp \
); } \
OPAL_THREAD_UNLOCK(&ud_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_UD_ENDPOINT_POST_RR_SUB(cnt, \
frag_list, \
rd_posted, \
qp ) \
do { \
int32_t i; \
int rc; \
int32_t num_post = cnt; \
struct ibv_recv_wr* bad_wr; \
for(i = 0; i < num_post; i++) { \
opal_list_item_t* item; \
mca_btl_ud_frag_t* frag; \
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
frag = (mca_btl_ud_frag_t*) item; \
frag->sg_entry.length = frag->size + sizeof(mca_btl_ud_header_t) + sizeof(mca_btl_ud_ib_header_t); \
if(ibv_post_recv(qp, \
&frag->wr_desc.rd_desc, \
&bad_wr)) { \
BTL_ERROR(("error posting receive errno says %s\n", strerror(errno))); \
return OMPI_ERROR; \
}\
}\
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
} while(0);
#if 0
#define MCA_BTL_UD_ENDPOINT_POST_RR_SUB(cnt, \
frag_list, \
rd_posted, \
qp ) \
do { \
int32_t i; \
int rc; \
int32_t num_post = cnt; \
struct ibv_recv_wr* head_wr; \
struct ibv_recv_wr* prev_wr; \
opal_list_item_t* item; \
mca_btl_ud_frag_t* frag; \
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
frag = (mca_btl_ud_frag_t*)item; \
head_wr = &frag->wr_desc.rd_desc; \
prev_wr = head_wr; \
OPAL_OUTPUT((0, "posting %d recvs\n", num_post)); \
for(i = 1; i < num_post; i++) { \
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
frag = (mca_btl_ud_frag_t*) item; \
prev_wr->next = &frag->wr_desc.rd_desc; \
prev_wr = prev_wr->next; \
}\
prev_wr->next = NULL; \
if(ibv_post_recv(qp, head_wr, &prev_wr)) { \
BTL_ERROR(("error posting receive errno says %s\n", strerror(errno))); \
return OMPI_ERROR; \
}\
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
} while(0);
#endif
#define BTL_OPENIB_INSERT_PENDING(frag, frag_list, tokens, lock) \
do{ \
OPAL_THREAD_LOCK(&lock); \
opal_list_append(&frag_list, (opal_list_item_t *)frag); \
OPAL_THREAD_UNLOCK(&lock); \
OPAL_THREAD_ADD32(&tokens, 1); \
} while(0);
/*
* Post SRQ receive buffers
*/
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
#define MCA_BTL_UD_POST_SRR_HIGH(ud_btl, additional) \
{ \
do{ \
OPAL_THREAD_LOCK(&ud_btl->ib_lock); \
if(ud_btl->srd_posted_hp <= ud_btl->rd_low+additional && \
ud_btl->srd_posted_hp < ud_btl->rd_num){ \
MCA_BTL_UD_POST_SRR_SUB(ud_btl->rd_num - \
ud_btl->srd_posted_hp, \
ud_btl, \
&ud_btl->recv_free_eager, \
&ud_btl->srd_posted_hp, \
ud_btl->srq_hp); \
} \
OPAL_THREAD_UNLOCK(&ud_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_UD_POST_SRR_LOW(ud_btl, additional) \
{ \
do { \
OPAL_THREAD_LOCK(&ud_btl->ib_lock); \
if(ud_btl->srd_posted_lp <= ud_btl->rd_low+additional && \
ud_btl->srd_posted_lp < ud_btl->rd_num){ \
MCA_BTL_UD_POST_SRR_SUB(ud_btl->rd_num - \
ud_btl->srd_posted_lp, \
ud_btl, \
&ud_btl->recv_free_max, \
&ud_btl->srd_posted_lp, \
ud_btl->srq_lp); \
} \
OPAL_THREAD_UNLOCK(&ud_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_UD_POST_SRR_SUB(cnt, \
ud_btl, \
frag_list, \
srd_posted, \
srq) \
{\
do { \
int32_t i; \
int32_t num_post = cnt; \
opal_list_item_t* item = NULL; \
mca_btl_ud_frag_t* frag = NULL; \
struct ibv_recv_wr *bad_wr; \
int32_t rc; \
for(i = 0; i < num_post; i++) { \
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
frag = (mca_btl_ud_frag_t*) item; \
frag->sg_entry.length = frag->size + \
((unsigned char*) frag->segment.seg_addr.pval- \
(unsigned char*) frag->hdr); \
if(ibv_post_srq_recv(srq, &frag->wr_desc.rd_desc, &bad_wr)) { \
BTL_ERROR(("error posting receive descriptors to shared receive queue: %s",\
strerror(errno))); \
return OMPI_ERROR; \
}\
}\
OPAL_THREAD_ADD32(srd_posted, num_post); \
} while(0);\
}
#endif
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,608 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "opal/prefetch.h"
#include "opal/event/event.h"
#include "opal/util/if.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "opal/sys/timer.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/mca/mpool/base/base.h"
#include "btl_ud.h"
#include "btl_ud_frag.h"
#include "btl_ud_endpoint.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/datatype/convertor.h"
#include "ompi/mca/mpool/mpool.h"
#include <sysfs/libsysfs.h>
#include <infiniband/verbs.h>
#include <errno.h>
#include <string.h> /* for strerror()*/
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
mca_btl_ud_component_t mca_btl_ud_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v1.0.0 component (which also implies a
specific MCA version) */
MCA_BTL_BASE_VERSION_1_0_1,
"ud", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_ud_component_open, /* component open */
mca_btl_ud_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_btl_ud_component_init,
mca_btl_ud_component_progress,
}
};
/*
* Profiling information
*/
#if MCA_BTL_UD_ENABLE_PROFILE
mca_btl_ud_profile_t mca_btl_ud_profile = {0};
#endif
/*
* utility routines for parameter registration
*/
static inline void mca_btl_ud_param_register_string(
const char* param_name,
const char* param_desc,
const char* default_value,
char** out_value)
{
mca_base_param_reg_string(&mca_btl_ud_component.super.btl_version,
param_name,
param_desc,
false,
false,
default_value,
out_value);
}
static inline void mca_btl_ud_param_register_int(
const char* param_name,
const char* param_desc,
int default_value,
int* out_value)
{
mca_base_param_reg_int(&mca_btl_ud_component.super.btl_version,
param_name,
param_desc,
false,
false,
default_value,
out_value);
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_btl_ud_component_open(void)
{
int val;
/* initialize state */
mca_btl_ud_component.ib_num_btls=0;
mca_btl_ud_component.ud_btls=NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_ud_component.ib_procs, opal_list_t);
/* register IB component parameters */
mca_btl_ud_param_register_int ("max_btls", "maximum number of HCAs/ports to use",
4, (int*)&mca_btl_ud_component.ib_max_btls);
mca_btl_ud_param_register_int ("free_list_num", "intial size of free lists",
8, &mca_btl_ud_component.ib_free_list_num);
mca_btl_ud_param_register_int ("free_list_max", "maximum size of free lists",
-1, &mca_btl_ud_component.ib_free_list_max);
mca_btl_ud_param_register_int ("free_list_inc", "increment size of free lists",
32, &mca_btl_ud_component.ib_free_list_inc);
mca_btl_ud_param_register_string("mpool", "name of the memory pool to be used",
"openib", &mca_btl_ud_component.ib_mpool_name);
mca_btl_ud_param_register_int("reg_mru_len", "length of the registration cache most recently used list",
16, (int*) &mca_btl_ud_component.reg_mru_len);
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
mca_btl_ud_param_register_int("use_srq", "if 1 use the IB shared receive queue to post receive descriptors",
0, (int*) &mca_btl_ud_component.use_srq);
#endif
mca_btl_ud_param_register_int("ib_cq_size", "size of the IB completion queue",
2000, (int*) &mca_btl_ud_component.ib_cq_size);
mca_btl_ud_param_register_int("ib_sg_list_size", "size of IB segment list",
4, (int*) &mca_btl_ud_component.ib_sg_list_size);
mca_btl_ud_param_register_int("ib_pkey_ix", "IB pkey index",
0, (int*) &mca_btl_ud_component.ib_pkey_ix);
mca_btl_ud_param_register_int("ib_qkey", "IB qkey",
0x01330133, (int*) &mca_btl_ud_component.ib_qkey);
mca_btl_ud_param_register_int("ib_service_level", "IB service level",
0, (int*) &mca_btl_ud_component.ib_service_level);
mca_btl_ud_param_register_int("ib_src_path_bits", "IB source path bits",
0, (int*) &mca_btl_ud_component.ib_src_path_bits);
mca_btl_ud_param_register_int ("exclusivity", "BTL exclusivity",
MCA_BTL_EXCLUSIVITY_DEFAULT, (int*) &mca_btl_ud_module.super.btl_exclusivity);
mca_btl_ud_param_register_int("sd_num", "maximum descriptors to post to a QP",
16, (int*) &mca_btl_ud_component.sd_num);
mca_btl_ud_param_register_int("rd_num", "number of receive descriptors to post to a QP",
500, (int*) &mca_btl_ud_component.rd_num);
mca_btl_ud_param_register_int("srq_rd_max", "Max number of receive descriptors posted per SRQ.",
1000, (int*) &mca_btl_ud_component.srq_rd_max);
mca_btl_ud_param_register_int("srq_rd_per_peer", "Number of receive descriptors posted per peer. (SRQ)",
16, (int*) &mca_btl_ud_component.srq_rd_per_peer);
mca_btl_ud_param_register_int("srq_sd_max", "Maximum number of send descriptors posted. (SRQ)",
8, &mca_btl_ud_component.srq_sd_max);
/* TODO - this assumes a 2k UD MTU - should query/do something more intelligent */
mca_btl_ud_param_register_int ("eager_limit", "eager send limit",
2047, &val);
mca_btl_ud_module.super.btl_eager_limit = val;
mca_btl_ud_param_register_int ("min_send_size", "minimum send size",
2048, &val);
mca_btl_ud_module.super.btl_min_send_size = val;
mca_btl_ud_param_register_int ("max_send_size", "maximum send size",
2048, &val);
mca_btl_ud_module.super.btl_max_send_size = val;
mca_btl_ud_param_register_int("bandwidth", "Approximate maximum bandwidth of interconnect",
800, (int*) &mca_btl_ud_module.super.btl_bandwidth);
mca_btl_ud_module.super.btl_eager_limit -= sizeof(mca_btl_ud_header_t);
mca_btl_ud_module.super.btl_max_send_size -= sizeof(mca_btl_ud_header_t);
mca_btl_ud_component.max_send_size = mca_btl_ud_module.super.btl_max_send_size;
mca_btl_ud_component.eager_limit = mca_btl_ud_module.super.btl_eager_limit;
return OMPI_SUCCESS;
}
/*
* component cleanup - sanity checking of queue lengths
*/
int mca_btl_ud_component_close(void)
{
/* Calculate and print profiling numbers */
MCA_BTL_UD_SHOW_TIME(post_send);
MCA_BTL_UD_SHOW_TIME(endpoint_send_conn);
MCA_BTL_UD_SHOW_TIME(ibv_post_send);
MCA_BTL_UD_SHOW_TIME(full_send);
return OMPI_SUCCESS;
}
/*
* Register UD port information. The MCA framework
* will make this available to all peers.
*/
/* TODO - We need to publish an addr_t (formerly rem_info_t) here */
static int
mca_btl_ud_modex_send(void)
{
int rc;
size_t i;
size_t size;
mca_btl_ud_addr_t* addrs = NULL;
size = mca_btl_ud_component.ib_num_btls * sizeof(mca_btl_ud_addr_t);
if (size != 0) {
addrs = (mca_btl_ud_addr_t *)malloc(size);
if (NULL == addrs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i = 0; i < mca_btl_ud_component.ib_num_btls; i++) {
mca_btl_ud_module_t* btl = &mca_btl_ud_component.ud_btls[i];
addrs[i] = btl->addr;
BTL_VERBOSE(("modex_send HP QP num %d, LP QP num %d, LID = %d",
addrs[i].qp_num_hp,
addrs[i].qp_num_lp,
addrs[i].lid));
}
}
rc = mca_pml_base_modex_send(&mca_btl_ud_component.super.btl_version, addrs, size);
if(NULL != addrs) {
free (addrs);
}
return rc;
}
/*
* UD component initialization:
* (1) read interface list from kernel and compare against component parameters
* then create a BTL instance for selected interfaces
* (2) post OOB receive for incoming connection attempts
* (3) register BTL parameters with the MCA
*/
mca_btl_base_module_t** mca_btl_ud_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
struct ibv_device **ib_devs;
struct ibv_device* ib_dev;
int32_t num_devs;
mca_btl_base_module_t** btls;
uint32_t i, j;
opal_list_t btl_list;
mca_btl_ud_module_t* ud_btl;
mca_btl_base_selected_module_t* ib_selected;
opal_list_item_t* item;
unsigned short seedv[3];
/* initialization */
*num_btl_modules = 0;
num_devs = 0;
seedv[0] = orte_process_info.my_name->vpid;
seedv[1] = opal_sys_timer_get_cycles();
seedv[2] = opal_sys_timer_get_cycles();
seed48(seedv);
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST
ib_devs = ibv_get_device_list(&num_devs);
#else
/* Determine the number of hca's available on the host */
dev_list = ibv_get_devices();
if (NULL == dev_list) {
mca_btl_base_error_no_nics("OpenIB", "HCA");
mca_btl_ud_component.ib_num_btls = 0;
mca_btl_ud_modex_send();
return NULL;
}
dlist_start(dev_list);
dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
num_devs++;
#endif
if(0 == num_devs) {
mca_btl_base_error_no_nics("OpenIB", "HCA");
mca_btl_ud_modex_send();
return NULL;
}
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST == 0
/* Allocate space for the ib devices */
ib_devs = (struct ibv_device**) malloc(num_devs * sizeof(struct ibv_dev*));
if(NULL == ib_devs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
dlist_start(dev_list);
i = 0;
dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
ib_devs[i++] = ib_dev;
#endif
/** We must loop through all the hca id's, get their handles and
for each hca we query the number of ports on the hca and set up
a distinct btl module for each hca port */
OBJ_CONSTRUCT(&btl_list, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_ud_component.ib_lock, opal_mutex_t);
for(i = 0; (int32_t)i < num_devs
&& mca_btl_ud_component.ib_num_btls < mca_btl_ud_component.ib_max_btls; i++){
struct ibv_device_attr ib_dev_attr;
struct ibv_context* ib_dev_context;
ib_dev = ib_devs[i];
ib_dev_context = ibv_open_device(ib_dev);
if(!ib_dev_context) {
BTL_ERROR((" error obtaining device context for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)));
return NULL;
}
if(ibv_query_device(ib_dev_context, &ib_dev_attr)){
BTL_ERROR(("error obtaining device attributes for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)));
return NULL;
}
/* Note ports are 1 based hence j = 1 */
for(j = 1; j <= ib_dev_attr.phys_port_cnt; j++) {
struct ibv_port_attr ib_port_attr;
if(ibv_query_port(ib_dev_context, (uint8_t)j, &ib_port_attr)) {
BTL_ERROR(("error getting port attributes for device %s port number %d errno says %s",
ibv_get_device_name(ib_dev), j, strerror(errno)));
return NULL;
}
if(IBV_PORT_ACTIVE == ib_port_attr.state) {
ud_btl =
(mca_btl_ud_module_t*)malloc(sizeof(mca_btl_ud_module_t));
memcpy(ud_btl, &mca_btl_ud_module, sizeof(mca_btl_ud_module_t));
ib_selected = OBJ_NEW(mca_btl_base_selected_module_t);
ib_selected->btl_module = (mca_btl_base_module_t*)ud_btl;
ud_btl->ib_dev = ib_dev;
ud_btl->ib_dev_context = ib_dev_context;
ud_btl->port_num = (uint8_t)j;
ud_btl->addr.subnet = ib_port_attr.sm_lid;
ud_btl->addr.lid = ib_port_attr.lid;
opal_list_append(&btl_list, (opal_list_item_t*) ib_selected);
if(++mca_btl_ud_component.ib_num_btls >=
mca_btl_ud_component.ib_max_btls)
break;
}
}
}
/* Allocate space for btl modules */
mca_btl_ud_component.ud_btls = (mca_btl_ud_module_t*)
malloc(sizeof(mca_btl_ud_module_t) * mca_btl_ud_component.ib_num_btls);
if(NULL == mca_btl_ud_component.ud_btls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
btls = (struct mca_btl_base_module_t**)
malloc(mca_btl_ud_component.ib_num_btls * sizeof(mca_btl_ud_module_t*));
if(NULL == btls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
for(i = 0; i < mca_btl_ud_component.ib_num_btls; i++){
item = opal_list_remove_first(&btl_list);
ib_selected = (mca_btl_base_selected_module_t*)item;
ud_btl = (mca_btl_ud_module_t*)ib_selected->btl_module;
memcpy(&(mca_btl_ud_component.ud_btls[i]),
ud_btl, sizeof(mca_btl_ud_module_t));
free(ib_selected);
free(ud_btl);
ud_btl = &mca_btl_ud_component.ud_btls[i];
ud_btl->rd_num = mca_btl_ud_component.rd_num;
ud_btl->sd_wqe_lp = mca_btl_ud_component.sd_num;
ud_btl->sd_wqe_hp = mca_btl_ud_component.sd_num;
/* Initialize module state */
if(mca_btl_ud_module_init(ud_btl) != OMPI_SUCCESS) {
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST
ibv_free_device_list(ib_devs);
#else
free(ib_devs);
#endif
return NULL;
}
btls[i] = &ud_btl->super;
}
mca_btl_ud_modex_send();
*num_btl_modules = mca_btl_ud_component.ib_num_btls;
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST
ibv_free_device_list(ib_devs);
#else
free(ib_devs);
#endif
return btls;
}
/*
* IB component progress.
*/
#define MCA_BTL_UD_NUM_WC 64
int mca_btl_ud_component_progress()
{
uint32_t i;
int count = 0, ne, j;
mca_btl_ud_frag_t* frag;
struct ibv_recv_wr* bad_wr;
struct ibv_recv_wr* head_wr;
mca_btl_ud_module_t* ud_btl;
mca_btl_base_recv_reg_t* reg;
struct ibv_wc wc[MCA_BTL_UD_NUM_WC];
struct ibv_wc* cwc;
/* Poll for completions */
for(i = 0; i < mca_btl_ud_component.ib_num_btls; i++) {
ud_btl = &mca_btl_ud_component.ud_btls[i];
ne = ibv_poll_cq(ud_btl->ib_cq_hp, MCA_BTL_UD_NUM_WC, wc);
if(OPAL_UNLIKELY(ne < 0)) {
BTL_ERROR(("error polling HP CQ with %d errno says %s\n",
ne, strerror(errno)));
return OMPI_ERROR;
}
head_wr = NULL;
for(j = 0; j < ne; j++) {
cwc = &wc[j];
if(OPAL_UNLIKELY(cwc->status != IBV_WC_SUCCESS)) {
BTL_ERROR(("error polling HP CQ with status %d for wr_id %llu opcode %d\n",
cwc->status, cwc->wr_id, cwc->opcode));
return OMPI_ERROR;
}
/* Handle work completions */
switch(cwc->opcode) {
case IBV_WC_SEND :
frag = (mca_btl_ud_frag_t*)(unsigned long)cwc->wr_id;
frag->base.des_cbfunc(&ud_btl->super,
frag->endpoint, &frag->base, OMPI_SUCCESS);
/* Increment send counter, post if any sends are queued */
OPAL_THREAD_ADD32(&ud_btl->sd_wqe_hp, 1);
if(OPAL_UNLIKELY(!opal_list_is_empty(&ud_btl->pending_frags_hp))) {
frag = (mca_btl_ud_frag_t*)
opal_list_remove_first(&ud_btl->pending_frags_hp);
mca_btl_ud_endpoint_post_send(ud_btl, frag->endpoint, frag);
}
break;
case IBV_WC_RECV:
frag = (mca_btl_ud_frag_t*)(unsigned long)cwc->wr_id;
reg = &ud_btl->ib_reg[frag->hdr->tag];
frag->segment.seg_addr.pval = frag->hdr + 1;
frag->segment.seg_len = cwc->byte_len -
sizeof(mca_btl_ud_header_t) -
sizeof(mca_btl_ud_ib_header_t);
reg->cbfunc(&ud_btl->super,
frag->hdr->tag, &frag->base, reg->cbdata);
/* Add recv to linked list for reposting */
frag->wr_desc.rd_desc.next = head_wr;
head_wr = &frag->wr_desc.rd_desc;
break;
default:
BTL_ERROR(("Unhandled work completion opcode is %d", cwc->opcode));
break;
}
}
count += ne;
/* Repost any HP recv buffers all at once */
if(OPAL_LIKELY(head_wr)) {
if(OPAL_UNLIKELY(ibv_post_recv(ud_btl->qp_hp, head_wr, &bad_wr))) {
BTL_ERROR(("error posting recv, errno %s\n", strerror(errno)));
return OMPI_ERROR;
}
head_wr = NULL;
}
ne = ibv_poll_cq(ud_btl->ib_cq_lp, MCA_BTL_UD_NUM_WC, wc);
if(OPAL_UNLIKELY(ne < 0)){
BTL_ERROR(("error polling LP CQ with %d errno says %s",
ne, strerror(errno)));
return OMPI_ERROR;
}
for(j = 0; j < ne; j++) {
cwc = &wc[j];
if(OPAL_UNLIKELY(cwc->status != IBV_WC_SUCCESS)) {
BTL_ERROR(("error polling LP CQ with status %d for wr_id %llu opcode %d",
cwc->status, cwc->wr_id, cwc->opcode));
return OMPI_ERROR;
}
/* Handle n/w completions */
switch(cwc->opcode) {
case IBV_WC_SEND:
frag = (mca_btl_ud_frag_t*) (unsigned long) cwc->wr_id;
frag->base.des_cbfunc(&ud_btl->super,
frag->endpoint, &frag->base, OMPI_SUCCESS);
/* Increment send counter, post if any sends are queued */
OPAL_THREAD_ADD32(&ud_btl->sd_wqe_lp, 1);
if(OPAL_UNLIKELY(!opal_list_is_empty(&ud_btl->pending_frags_lp))) {
frag = (mca_btl_ud_frag_t*)
opal_list_remove_first(&ud_btl->pending_frags_lp);
mca_btl_ud_endpoint_post_send(ud_btl, frag->endpoint, frag);
}
break;
case IBV_WC_RECV:
/* Process a RECV */
frag = (mca_btl_ud_frag_t*) (unsigned long) cwc->wr_id;
reg = &ud_btl->ib_reg[frag->hdr->tag];
frag->segment.seg_addr.pval = frag->hdr + 1;
frag->segment.seg_len =
cwc->byte_len - sizeof(mca_btl_ud_header_t) -
sizeof(mca_btl_ud_ib_header_t);
/* call registered callback */
reg->cbfunc(&ud_btl->super,
frag->hdr->tag, &frag->base, reg->cbdata);
/* Add recv to linked list for reposting */
frag->wr_desc.rd_desc.next = head_wr;
head_wr = &frag->wr_desc.rd_desc;
break;
default:
BTL_ERROR(("Unhandled work completion opcode %d", cwc->opcode));
break;
}
}
count += ne;
/* Repost any LP recv buffers all at once */
if(OPAL_LIKELY(head_wr)) {
if(OPAL_UNLIKELY(ibv_post_recv(ud_btl->qp_lp, head_wr, &bad_wr))) {
BTL_ERROR(("error posting recv, errno %s\n", strerror(errno)));
return OMPI_ERROR;
}
}
}
return count;
}

Просмотреть файл

@ -1,218 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "opal/prefetch.h"
#include "ompi/types.h"
#include "ompi/mca/pml/base/pml_base_sendreq.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "btl_ud.h"
#include "btl_ud_endpoint.h"
#include "btl_ud_proc.h"
#include "btl_ud_frag.h"
#include "ompi/class/ompi_free_list.h"
#include <errno.h>
#include <string.h>
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
/*
* post a send to the work queue
*/
inline int mca_btl_ud_endpoint_post_send(mca_btl_ud_module_t* ud_btl,
mca_btl_ud_endpoint_t * endpoint,
mca_btl_ud_frag_t * frag)
{
struct ibv_qp* ib_qp;
struct ibv_send_wr* bad_wr;
/* Have to be careful here - UD adds a 40 byte header, but it is not
included on the sending side. */
frag->sg_entry.length = frag->segment.seg_len + sizeof(mca_btl_ud_header_t);
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
if(frag->size == ud_btl->super.btl_eager_limit) {
if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ud_btl->sd_wqe_hp, -1) < 0)) {
OPAL_THREAD_ADD32(&ud_btl->sd_wqe_hp, 1);
opal_list_append(&ud_btl->pending_frags_hp,
(opal_list_item_t*)frag);
return OMPI_SUCCESS;
}
ib_qp = ud_btl->qp_hp;
frag->wr_desc.sr_desc.wr.ud.ah = endpoint->rmt_ah_hp;
frag->wr_desc.sr_desc.wr.ud.remote_qpn =
endpoint->rem_addr.qp_num_hp;
if(frag->sg_entry.length <= ud_btl->ib_inline_max) {
frag->wr_desc.sr_desc.send_flags =
IBV_SEND_SIGNALED|IBV_SEND_INLINE;
}
} else {
if(OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ud_btl->sd_wqe_lp, -1) < 0)) {
OPAL_THREAD_ADD32(&ud_btl->sd_wqe_lp, 1);
opal_list_append(&ud_btl->pending_frags_lp,
(opal_list_item_t*)frag);
return OMPI_SUCCESS;
}
ib_qp = ud_btl->qp_lp;
frag->wr_desc.sr_desc.wr.ud.ah = endpoint->rmt_ah_lp;
frag->wr_desc.sr_desc.wr.ud.remote_qpn =
endpoint->rem_addr.qp_num_lp;
}
/*OPAL_OUTPUT((0, "Send to LID %d QP %d, len: %d %d %d, frag: %p",
endpoint->rem_addr.lid,
frag->wr_desc.sr_desc.wr.ud.remote_qpn,
frag->sg_entry.length, frag->segment.seg_len,
ud_btl->ib_inline_max, frag));*/
#if MCA_BTL_UD_ENABLE_PROFILE
frag->tm = opal_sys_timer_get_cycles();
#endif
MCA_BTL_UD_START_TIME(ibv_post_send);
if(OPAL_UNLIKELY(ibv_post_send(ib_qp, &frag->wr_desc.sr_desc, &bad_wr))) {
BTL_ERROR(("error posting send request errno says %d %s\n",
errno, strerror(errno)));
return OMPI_ERROR;
}
MCA_BTL_UD_END_TIME(ibv_post_send);
return OMPI_SUCCESS;
}
OBJ_CLASS_INSTANCE(mca_btl_ud_endpoint_t,
opal_list_item_t, mca_btl_ud_endpoint_construct,
mca_btl_ud_endpoint_destruct);
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_btl_ud_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
{
/*OBJ_CONSTRUCT(&endpoint->endpoint_lock, opal_mutex_t);*/
memset(&endpoint->rem_addr, 0, sizeof(struct mca_btl_ud_addr_t));
}
/*
* Destroy a endpoint
*
*/
static void mca_btl_ud_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
{
}
/*
* Create the queue pair note that this is just the initial
* queue pair creation and we need to get the remote queue pair
* info from the peer before the qp is usable,
*/
int mca_btl_ud_endpoint_init_qp(
mca_btl_base_module_t* btl,
struct ibv_cq* cq,
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
struct ibv_srq* srq,
#endif
struct ibv_qp** qp,
uint32_t lcl_psn
)
{
mca_btl_ud_module_t* ud_btl = (mca_btl_ud_module_t*)btl;
struct ibv_qp* my_qp;
struct ibv_qp_attr qp_attr;
struct ibv_qp_init_attr qp_init_attr;
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
qp_init_attr.send_cq = cq;
qp_init_attr.recv_cq = cq;
qp_init_attr.cap.max_send_wr = mca_btl_ud_component.rd_num;
qp_init_attr.cap.max_recv_wr = mca_btl_ud_component.rd_num;
qp_init_attr.cap.max_send_sge = mca_btl_ud_component.ib_sg_list_size;
qp_init_attr.cap.max_recv_sge = mca_btl_ud_component.ib_sg_list_size;
qp_init_attr.qp_type = IBV_QPT_UD;
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_ud_component.use_srq) {
qp_init_attr.srq = srq;
}
#endif
my_qp = ibv_create_qp(ud_btl->ib_pd, &qp_init_attr);
if(NULL == my_qp) {
BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
return OMPI_ERROR;
}
(*qp) = my_qp;
if(0 == (ud_btl->ib_inline_max = qp_init_attr.cap.max_inline_data)) {
BTL_ERROR(("ibv_create_qp: returned 0 byte(s) for max inline data"));
}
qp_attr.qp_state = IBV_QPS_INIT;
qp_attr.pkey_index = mca_btl_ud_component.ib_pkey_ix;
qp_attr.qkey = mca_btl_ud_component.ib_qkey;
qp_attr.port_num = ud_btl->port_num;
if(ibv_modify_qp(*qp, &qp_attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_QKEY)) {
BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
return OMPI_ERROR;
}
qp_attr.qp_state = IBV_QPS_RTR;
if(ibv_modify_qp(*qp, &qp_attr, IBV_QP_STATE)) {
BTL_ERROR(("error modifing QP to RTR errno says %s", strerror(errno)));
return OMPI_ERROR;
}
qp_attr.qp_state = IBV_QPS_RTS;
qp_attr.sq_psn = lcl_psn;
if (ibv_modify_qp(*qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) {
BTL_ERROR(("error modifying QP to RTS errno says %s", strerror(errno)));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,100 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_IB_ENDPOINT_H
#define MCA_BTL_IB_ENDPOINT_H
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "btl_ud_frag.h"
#include "btl_ud.h"
#include <errno.h>
#include <string.h>
#include <infiniband/verbs.h>
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_btl_ud_frag_t;
struct mca_btl_ud_addr_t {
uint32_t qp_num_hp;
uint32_t qp_num_lp;
/* QP number (Low and High priority) */
uint32_t psn_hp;
uint32_t psn_lp;
/* Port sequence number (Low and High) */
uint16_t lid;
uint16_t subnet;
/* Local Identifier & Subnet */
};
typedef struct mca_btl_ud_addr_t mca_btl_ud_addr_t;
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair and address information is exchanged at startup.
* The UD BTL is connectionless, so no connection is ever established.
*/
struct mca_btl_base_endpoint_t {
opal_list_item_t super;
/*opal_mutex_t endpoint_lock;*/
/**< lock for concurrent access to endpoint state */
mca_btl_ud_addr_t rem_addr;
/**< Remote address information */
struct ibv_ah* rmt_ah_hp;
struct ibv_ah* rmt_ah_lp;
/**< Remote Address Handle (Low and High) */
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_ud_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_endpoint_t);
inline int mca_btl_ud_endpoint_post_send(struct mca_btl_ud_module_t* ud_btl,
mca_btl_ud_endpoint_t * endpoint,
struct mca_btl_ud_frag_t * frag);
int mca_btl_ud_endpoint_init_qp(
mca_btl_base_module_t* btl,
struct ibv_cq* cq,
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
struct ibv_srq* srq,
#endif
struct ibv_qp** qp,
uint32_t lcl_psn);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,154 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_ud.h"
#include "btl_ud_frag.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
static inline void mca_btl_ud_frag_common_constructor( mca_btl_ud_frag_t* frag)
{
frag->ud_reg = (mca_mpool_openib_registration_t*)frag->base.super.user_data;
frag->sg_entry.lkey = frag->ud_reg->mr->lkey;
frag->base.des_flags = 0;
}
static void mca_btl_ud_send_frag_common_constructor(mca_btl_ud_frag_t* frag)
{
mca_btl_ud_frag_common_constructor(frag);
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
/* We do not include the mca_btl_ud_ib_header_t data when sending */
frag->hdr = (mca_btl_ud_header_t*)(frag+1);
frag->segment.seg_addr.pval = frag->hdr + 1;
frag->sg_entry.addr = (unsigned long)frag->hdr;
frag->wr_desc.sr_desc.wr_id = (unsigned long) frag;
frag->wr_desc.sr_desc.sg_list = &frag->sg_entry;
frag->wr_desc.sr_desc.num_sge = 1;
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
frag->wr_desc.sr_desc.next = NULL;
frag->wr_desc.sr_desc.wr.ud.remote_qkey = mca_btl_ud_component.ib_qkey;
}
static void mca_btl_ud_recv_frag_common_constructor(mca_btl_ud_frag_t* frag)
{
mca_btl_ud_frag_common_constructor(frag);
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
/* Receive frag headers start 40 bytes later */
frag->hdr = (mca_btl_ud_header_t*)((unsigned char*)frag) +
sizeof(mca_btl_ud_frag_t) + sizeof(mca_btl_ud_ib_header_t);
frag->segment.seg_addr.pval = frag->hdr + 1;
frag->sg_entry.addr = (unsigned long)(frag + 1);
frag->segment.seg_len = frag->size;
frag->sg_entry.length = frag->size +
sizeof(mca_btl_ud_ib_header_t) + sizeof(mca_btl_ud_header_t);
frag->wr_desc.rd_desc.wr_id = (unsigned long) frag;
frag->wr_desc.rd_desc.sg_list = &frag->sg_entry;
frag->wr_desc.rd_desc.num_sge = 1;
frag->wr_desc.rd_desc.next = NULL;
}
static void mca_btl_ud_send_frag_eager_constructor(mca_btl_ud_frag_t* frag)
{
frag->size = mca_btl_ud_component.eager_limit;
mca_btl_ud_send_frag_common_constructor(frag);
}
static void mca_btl_ud_send_frag_max_constructor(mca_btl_ud_frag_t* frag)
{
frag->size = mca_btl_ud_component.max_send_size;
mca_btl_ud_send_frag_common_constructor(frag);
}
static void mca_btl_ud_recv_frag_max_constructor(mca_btl_ud_frag_t* frag)
{
frag->size = mca_btl_ud_component.max_send_size;
mca_btl_ud_recv_frag_common_constructor(frag);
}
static void mca_btl_ud_recv_frag_eager_constructor(mca_btl_ud_frag_t* frag)
{
frag->size = mca_btl_ud_component.eager_limit;
mca_btl_ud_recv_frag_common_constructor(frag);
}
static void mca_btl_ud_send_frag_frag_constructor(mca_btl_ud_frag_t* frag)
{
frag->size = 0;
mca_btl_ud_send_frag_common_constructor(frag);
}
OBJ_CLASS_INSTANCE(
mca_btl_ud_frag_t,
mca_btl_base_descriptor_t,
NULL,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_ud_send_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_ud_send_frag_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_ud_send_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_ud_send_frag_max_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_ud_send_frag_frag_t,
mca_btl_base_descriptor_t,
mca_btl_ud_send_frag_frag_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_ud_recv_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_ud_recv_frag_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_ud_recv_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_ud_recv_frag_max_constructor,
NULL);

Просмотреть файл

@ -1,148 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_UD_FRAG_H
#define MCA_BTL_UD_FRAG_H
#define MCA_BTL_IB_FRAG_ALIGN (8)
#include "ompi_config.h"
#include "btl_ud.h"
#include <infiniband/verbs.h>
#include "ompi/mca/mpool/openib/mpool_openib.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/* UD adds a 40 byte global routing header */
/* This works in strange ways - the sending side does not need to explicitly
include this data in sg lists. Then, on the receiving side, the extra 40
bytes magically appear. */
struct mca_btl_ud_ib_header_t {
uint8_t ib_grh[40];
};
typedef struct mca_btl_ud_ib_header_t mca_btl_ud_ib_header_t;
struct mca_btl_ud_header_t {
mca_btl_base_tag_t tag;
};
typedef struct mca_btl_ud_header_t mca_btl_ud_header_t;
/**
* IB send fragment derived type.
*/
struct mca_btl_ud_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
size_t size;
union{
struct ibv_recv_wr rd_desc;
struct ibv_send_wr sr_desc;
} wr_desc;
struct ibv_sge sg_entry;
/* When this is a send frag, hdr points right after this, as expected.
But when this is a receive frag, we have an extra 40 bytes provided
by IB, so this points 40 bytes past the end of the frag. */
mca_btl_ud_header_t *hdr;
mca_mpool_openib_registration_t* ud_reg;
opal_timer_t tm;
};
typedef struct mca_btl_ud_frag_t mca_btl_ud_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_frag_t);
typedef struct mca_btl_ud_frag_t mca_btl_ud_send_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_send_frag_eager_t);
typedef struct mca_btl_ud_frag_t mca_btl_ud_send_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_send_frag_max_t);
typedef struct mca_btl_ud_frag_t mca_btl_ud_send_frag_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_send_frag_frag_t);
typedef struct mca_btl_ud_frag_t mca_btl_ud_recv_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_recv_frag_eager_t);
typedef struct mca_btl_ud_frag_t mca_btl_ud_recv_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_recv_frag_max_t);
/*
* Allocate an IB send descriptor
*
*/
#define MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc) \
{ \
\
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&((mca_btl_ud_module_t*)btl)->send_free_eager, item, rc); \
frag = (mca_btl_ud_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag) \
{ \
OMPI_FREE_LIST_RETURN(&((mca_btl_ud_module_t*)btl)->send_free_eager, (ompi_free_list_item_t*)(frag)); \
}
#define MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc) \
{ \
\
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&((mca_btl_ud_module_t*)btl)->send_free_max, item, rc); \
frag = (mca_btl_ud_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_RETURN_MAX(btl, frag) \
{ \
OMPI_FREE_LIST_RETURN(&((mca_btl_ud_module_t*)btl)->send_free_max, (ompi_free_list_item_t*)(frag)); \
}
#define MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc) \
{ \
\
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&((mca_btl_ud_module_t*)btl)->send_free_frag, item, rc); \
frag = (mca_btl_ud_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag) \
{ \
OMPI_FREE_LIST_RETURN(&((mca_btl_ud_module_t*)btl)->send_free_frag, (ompi_free_list_item_t*)(frag)); \
}
struct mca_btl_ud_module_t;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,178 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/class/opal_hash_table.h"
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
#include "btl_ud.h"
#include "btl_ud_proc.h"
static void mca_btl_ud_proc_construct(mca_btl_ud_proc_t* proc);
static void mca_btl_ud_proc_destruct(mca_btl_ud_proc_t* proc);
OBJ_CLASS_INSTANCE(mca_btl_ud_proc_t,
opal_list_item_t, mca_btl_ud_proc_construct,
mca_btl_ud_proc_destruct);
void mca_btl_ud_proc_construct(mca_btl_ud_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_addr_count = 0;
proc->proc_endpoints = 0;
proc->proc_endpoint_count = 0;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
/* add to list of all proc instance */
OPAL_THREAD_LOCK(&mca_btl_ud_component.ib_lock);
opal_list_append(&mca_btl_ud_component.ib_procs, &proc->super);
OPAL_THREAD_UNLOCK(&mca_btl_ud_component.ib_lock);
}
/*
* Cleanup ib proc instance
*/
void mca_btl_ud_proc_destruct(mca_btl_ud_proc_t* proc)
{
/* remove from list of all proc instances */
OPAL_THREAD_LOCK(&mca_btl_ud_component.ib_lock);
opal_list_remove_item(&mca_btl_ud_component.ib_procs, &proc->super);
OPAL_THREAD_UNLOCK(&mca_btl_ud_component.ib_lock);
/* release resources */
if(NULL != proc->proc_endpoints) {
free(proc->proc_endpoints);
}
}
/*
* Look for an existing IB process instances based on the associated
* ompi_proc_t instance.
*/
static mca_btl_ud_proc_t* mca_btl_ud_proc_lookup_ompi(ompi_proc_t* ompi_proc)
{
mca_btl_ud_proc_t* ib_proc;
OPAL_THREAD_LOCK(&mca_btl_ud_component.ib_lock);
for(ib_proc = (mca_btl_ud_proc_t*)
opal_list_get_first(&mca_btl_ud_component.ib_procs);
ib_proc != (mca_btl_ud_proc_t*)
opal_list_get_end(&mca_btl_ud_component.ib_procs);
ib_proc = (mca_btl_ud_proc_t*)opal_list_get_next(ib_proc)) {
if(ib_proc->proc_ompi == ompi_proc) {
OPAL_THREAD_UNLOCK(&mca_btl_ud_component.ib_lock);
return ib_proc;
}
}
OPAL_THREAD_UNLOCK(&mca_btl_ud_component.ib_lock);
return NULL;
}
/*
* Create a IB process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_btl_ud_proc_t instance. We cache
* additional data (specifically the list of mca_btl_ud_endpoint_t instances,
* and published addresses) associated w/ a given destination on this
* datastructure.
*/
mca_btl_ud_proc_t* mca_btl_ud_proc_create(ompi_proc_t* ompi_proc)
{
mca_btl_ud_proc_t* module_proc = NULL;
size_t size;
int rc;
/* Check if we have already created a IB proc
* structure for this ompi process */
module_proc = mca_btl_ud_proc_lookup_ompi(ompi_proc);
if(module_proc != NULL) {
/* Gotcha! */
return module_proc;
}
/* Oops! First time, gotta create a new IB proc out of the ompi_proc ... */
module_proc = OBJ_NEW(mca_btl_ud_proc_t);
/* Initialize number of peer */
module_proc->proc_endpoint_count = 0;
module_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary size) to represent the proc */
module_proc->proc_guid = ompi_proc->proc_name;
/* query for the peer address info */
rc = mca_pml_base_modex_recv(&mca_btl_ud_component.super.btl_version,
ompi_proc, (void*)&module_proc->proc_addrs,
&size);
if(OMPI_SUCCESS != rc) {
opal_output(0, "[%s:%d] mca_pml_base_modex_recv failed for peer [%d,%d,%d]",
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
OBJ_RELEASE(module_proc);
return NULL;
}
if((size % sizeof(mca_btl_ud_addr_t)) != 0) {
opal_output(0, "[%s:%d] invalid module address for peer [%d,%d,%d]",
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
OBJ_RELEASE(module_proc);
return NULL;
}
module_proc->proc_addr_count = size / sizeof(mca_btl_ud_addr_t);
if (0 == module_proc->proc_addr_count) {
module_proc->proc_endpoints = NULL;
} else {
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(module_proc->proc_addr_count *
sizeof(mca_btl_base_endpoint_t*));
}
if(NULL == module_proc->proc_endpoints) {
OBJ_RELEASE(module_proc);
return NULL;
}
return module_proc;
}
/*
* Note that this routine must be called with the lock on the process
* already held. Insert a btl instance into the proc array and assign
* it an address.
*/
int mca_btl_ud_proc_insert(mca_btl_ud_proc_t* module_proc,
mca_btl_base_endpoint_t* module_endpoint)
{
/* insert into endpoint array */
module_endpoint->rem_addr =
module_proc->proc_addrs[module_proc->proc_endpoint_count];
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] =
module_endpoint;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,72 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_UD_PROC_H
#define MCA_BTL_UD_PROC_H
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#include "ompi/proc/proc.h"
#include "btl_ud.h"
#include "btl_ud_endpoint.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Represents the state of a remote process and the set of addresses
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
* each
* BTL instance that attempts to open a connection to the process.
*/
struct mca_btl_ud_proc_t {
opal_list_item_t super;
/**< allow proc to be placed on a list */
ompi_proc_t *proc_ompi;
/**< pointer to corresponding ompi_proc_t */
orte_process_name_t proc_guid;
/**< globally unique identifier for the process */
struct mca_btl_ud_addr_t* proc_addrs;
size_t proc_addr_count;
/**< number of addresses published by endpoint */
struct mca_btl_base_endpoint_t **proc_endpoints;
/**< array of endpoints that have been created to access this proc */
size_t proc_endpoint_count;
/**< number of endpoints */
opal_mutex_t proc_lock;
/**< lock to protect against concurrent access to proc state */
};
typedef struct mca_btl_ud_proc_t mca_btl_ud_proc_t;
OBJ_CLASS_DECLARATION(mca_btl_ud_proc_t);
mca_btl_ud_proc_t* mca_btl_ud_proc_create(ompi_proc_t* ompi_proc);
int mca_btl_ud_proc_insert(mca_btl_ud_proc_t*, mca_btl_base_endpoint_t*);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,43 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Sandia National Laboratories. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_btl_ud_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_btl_ud_CONFIG],[
OMPI_CHECK_OPENIB([btl_ud],
[btl_ud_happy="yes"],
[btl_ud_happy="no"])
AS_IF([test "$btl_ud_happy" = "yes"],
[btl_ud_WRAPPER_EXTRA_LDFLAGS="$btl_ud_LDFLAGS"
btl_ud_WRAPPER_EXTRA_LIBS="$btl_ud_LIBS"
$1],
[$2])
# substitute in the things needed to build ud
AC_SUBST([btl_ud_CFLAGS])
AC_SUBST([btl_ud_CPPFLAGS])
AC_SUBST([btl_ud_LDFLAGS])
AC_SUBST([btl_ud_LIBS])
])dnl

Просмотреть файл

@ -1,26 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Sandia National Laboratories. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=btl_ud.c
PARAM_CONFIG_HEADER_FILE="ud_config.h"
PARAM_CONFIG_FILES="Makefile"