Remove the mvapi BTL. Woo hoo!
This commit was SVN r16483.
Этот коммит содержится в:
родитель
0bf61a1b84
Коммит
b7eeae0a74
3
NEWS
3
NEWS
@ -68,6 +68,9 @@ Trunk (not on release branches yet)
|
|||||||
- Added checkpoint/restart process fault tolerance support. Initially
|
- Added checkpoint/restart process fault tolerance support. Initially
|
||||||
support a LAM/MPI-like protocol.
|
support a LAM/MPI-like protocol.
|
||||||
--> Expected: 1.3
|
--> Expected: 1.3
|
||||||
|
- Removed "mvapi" BTL; all InfiniBand support now uses the OpenFabrics
|
||||||
|
driver stacks.
|
||||||
|
--> Expected: 1.3
|
||||||
|
|
||||||
- Fixed issue with pthread detection when compilers are not all
|
- Fixed issue with pthread detection when compilers are not all
|
||||||
from the same vendor. Thanks to Ake Sandgren for the bug
|
from the same vendor. Thanks to Ake Sandgren for the bug
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
# University Research and Technology
|
|
||||||
# Corporation. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
# of Tennessee Research Foundation. All rights
|
|
||||||
# reserved.
|
|
||||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
# University of Stuttgart. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
# All rights reserved.
|
|
||||||
# $COPYRIGHT$
|
|
||||||
#
|
|
||||||
# Additional copyrights may follow
|
|
||||||
#
|
|
||||||
# $HEADER$
|
|
||||||
#
|
|
||||||
|
|
||||||
# Use the top-level Makefile.options
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
CFLAGS = $(btl_mvapi_CFLAGS)
|
|
||||||
AM_CPPFLAGS = $(btl_mvapi_CPPFLAGS)
|
|
||||||
|
|
||||||
dist_pkgdata_DATA=help-mpi-btl-mvapi.txt
|
|
||||||
|
|
||||||
sources = \
|
|
||||||
btl_mvapi.c \
|
|
||||||
btl_mvapi.h \
|
|
||||||
btl_mvapi_component.c \
|
|
||||||
btl_mvapi_endpoint.c \
|
|
||||||
btl_mvapi_endpoint.h \
|
|
||||||
btl_mvapi_frag.c \
|
|
||||||
btl_mvapi_frag.h \
|
|
||||||
btl_mvapi_proc.c \
|
|
||||||
btl_mvapi_proc.h \
|
|
||||||
btl_mvapi_eager_rdma.h
|
|
||||||
|
|
||||||
# Make the output library in this directory, and name it either
|
|
||||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
|
||||||
# (for static builds).
|
|
||||||
|
|
||||||
if OMPI_BUILD_btl_mvapi_DSO
|
|
||||||
lib =
|
|
||||||
lib_sources =
|
|
||||||
component = mca_btl_mvapi.la
|
|
||||||
component_sources = $(sources)
|
|
||||||
else
|
|
||||||
lib = libmca_btl_mvapi.la
|
|
||||||
lib_sources = $(sources)
|
|
||||||
component =
|
|
||||||
component_sources =
|
|
||||||
endif
|
|
||||||
|
|
||||||
mcacomponentdir = $(pkglibdir)
|
|
||||||
mcacomponent_LTLIBRARIES = $(component)
|
|
||||||
mca_btl_mvapi_la_SOURCES = $(component_sources)
|
|
||||||
mca_btl_mvapi_la_LDFLAGS = -module -avoid-version $(btl_mvapi_LDFLAGS)
|
|
||||||
mca_btl_mvapi_la_LIBADD = \
|
|
||||||
$(btl_mvapi_LIBS) \
|
|
||||||
$(top_ompi_builddir)/ompi/libmpi.la \
|
|
||||||
$(top_ompi_builddir)/orte/libopen-rte.la \
|
|
||||||
$(top_ompi_builddir)/opal/libopen-pal.la
|
|
||||||
|
|
||||||
noinst_LTLIBRARIES = $(lib)
|
|
||||||
libmca_btl_mvapi_la_SOURCES = $(lib_sources)
|
|
||||||
libmca_btl_mvapi_la_LDFLAGS = -module -avoid-version$ $(btl_mvapi_LDFLAGS)
|
|
||||||
libmca_btl_mvapi_la_LIBADD = $(btl_mvapi_LIBS)
|
|
@ -1,856 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include <string.h>
|
|
||||||
#include "opal/util/output.h"
|
|
||||||
#include "opal/util/if.h"
|
|
||||||
#include "ompi/mca/pml/pml.h"
|
|
||||||
#include "ompi/mca/btl/btl.h"
|
|
||||||
|
|
||||||
#include "btl_mvapi.h"
|
|
||||||
#include "btl_mvapi_frag.h"
|
|
||||||
#include "btl_mvapi_proc.h"
|
|
||||||
#include "btl_mvapi_endpoint.h"
|
|
||||||
#include "ompi/datatype/convertor.h"
|
|
||||||
#include "ompi/datatype/datatype.h"
|
|
||||||
#include "ompi/mca/mpool/base/base.h"
|
|
||||||
#include "ompi/mca/mpool/mpool.h"
|
|
||||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
|
||||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
|
||||||
#include <vapi_types.h>
|
|
||||||
#include <math.h> /* for log2 */
|
|
||||||
|
|
||||||
mca_btl_mvapi_module_t mca_btl_mvapi_module = {
|
|
||||||
{
|
|
||||||
&mca_btl_mvapi_component.super,
|
|
||||||
0, /* max size of first fragment */
|
|
||||||
0, /* min send fragment size */
|
|
||||||
0, /* max send fragment size */
|
|
||||||
0, /* btl_rdma_pipeline_send_length */
|
|
||||||
0, /* btl_rdma_pipeline_frag_size */
|
|
||||||
0, /* btl_min_rdma_pipeline_size */
|
|
||||||
0, /* exclusivity */
|
|
||||||
0, /* latency */
|
|
||||||
0, /* bandwidth */
|
|
||||||
0, /* TODO this should be PUT btl flags */
|
|
||||||
mca_btl_mvapi_add_procs,
|
|
||||||
mca_btl_mvapi_del_procs,
|
|
||||||
mca_btl_mvapi_register,
|
|
||||||
mca_btl_mvapi_finalize,
|
|
||||||
/* we need alloc free, pack */
|
|
||||||
mca_btl_mvapi_alloc,
|
|
||||||
mca_btl_mvapi_free,
|
|
||||||
mca_btl_mvapi_prepare_src,
|
|
||||||
mca_btl_mvapi_prepare_dst,
|
|
||||||
mca_btl_mvapi_send,
|
|
||||||
mca_btl_mvapi_put,
|
|
||||||
mca_btl_mvapi_get,
|
|
||||||
mca_btl_mvapi_dump,
|
|
||||||
NULL, /* mpool */
|
|
||||||
NULL, /* error call back registration */
|
|
||||||
mca_btl_mvapi_ft_event
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* add a proc to this btl module
|
|
||||||
* creates an endpoint that is setup on the
|
|
||||||
* first send to the endpoint
|
|
||||||
*/
|
|
||||||
int mca_btl_mvapi_add_procs(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t **ompi_procs,
|
|
||||||
struct mca_btl_base_endpoint_t** peers,
|
|
||||||
ompi_bitmap_t* reachable)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*)btl;
|
|
||||||
int i, rc;
|
|
||||||
|
|
||||||
for(i = 0; i < (int) nprocs; i++) {
|
|
||||||
|
|
||||||
struct ompi_proc_t* ompi_proc = ompi_procs[i];
|
|
||||||
mca_btl_mvapi_proc_t* ib_proc;
|
|
||||||
mca_btl_base_endpoint_t* ib_peer;
|
|
||||||
|
|
||||||
/* mvapi doesn't support heterogeneous yet... */
|
|
||||||
if (ompi_proc_local()->proc_arch != ompi_proc->proc_arch) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(NULL == (ib_proc = mca_btl_mvapi_proc_create(ompi_proc))) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check to make sure that the peer has at least as many interface
|
|
||||||
* addresses exported as we are trying to use. If not, then
|
|
||||||
* don't bind this PTL instance to the proc.
|
|
||||||
*/
|
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
|
|
||||||
|
|
||||||
/* The btl_proc datastructure is shared by all IB PTL
|
|
||||||
* instances that are trying to reach this destination.
|
|
||||||
* Cache the peer instance on the btl_proc.
|
|
||||||
*/
|
|
||||||
ib_peer = OBJ_NEW(mca_btl_mvapi_endpoint_t);
|
|
||||||
if(NULL == ib_peer) {
|
|
||||||
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
|
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
||||||
}
|
|
||||||
|
|
||||||
ib_peer->endpoint_btl = mvapi_btl;
|
|
||||||
ib_peer->subnet = mvapi_btl->port_info.subnet;
|
|
||||||
rc = mca_btl_mvapi_proc_insert(ib_proc, ib_peer);
|
|
||||||
if(rc != OMPI_SUCCESS) {
|
|
||||||
OBJ_RELEASE(ib_peer);
|
|
||||||
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ompi_bitmap_set_bit(reachable, i);
|
|
||||||
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
|
|
||||||
peers[i] = ib_peer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* currently we only scale the srq the first time
|
|
||||||
add_procs is called, subsequent calls are ignored,
|
|
||||||
we should be able to change this to modify the SRQ but
|
|
||||||
I am unsure as to what this entails
|
|
||||||
*/
|
|
||||||
|
|
||||||
if( 0 == mvapi_btl->num_peers ) {
|
|
||||||
mvapi_btl->num_peers += nprocs;
|
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
|
||||||
mvapi_btl->rd_num = mca_btl_mvapi_component.rd_num + log2(nprocs) * mca_btl_mvapi_component.srq_rd_per_peer;
|
|
||||||
if(mvapi_btl->rd_num > mca_btl_mvapi_component.srq_rd_max)
|
|
||||||
mvapi_btl->rd_num = mca_btl_mvapi_component.srq_rd_max;
|
|
||||||
mvapi_btl->rd_low = mvapi_btl->rd_num - 1;
|
|
||||||
free(mvapi_btl->rr_desc_post);
|
|
||||||
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_num * sizeof(VAPI_rr_desc_t)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* delete the proc as reachable from this btl module
|
|
||||||
*/
|
|
||||||
int mca_btl_mvapi_del_procs(struct mca_btl_base_module_t* btl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t **procs,
|
|
||||||
struct mca_btl_base_endpoint_t ** peers)
|
|
||||||
{
|
|
||||||
/* Stub */
|
|
||||||
BTL_VERBOSE(("Stub\n"));
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
*Register callback function to support send/recv semantics
|
|
||||||
*/
|
|
||||||
int mca_btl_mvapi_register(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
mca_btl_base_tag_t tag,
|
|
||||||
mca_btl_base_module_recv_cb_fn_t cbfunc,
|
|
||||||
void* cbdata)
|
|
||||||
{
|
|
||||||
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
|
||||||
|
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock);
|
|
||||||
mvapi_btl->ib_reg[tag].cbfunc = cbfunc;
|
|
||||||
mvapi_btl->ib_reg[tag].cbdata = cbdata;
|
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate a segment.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @param size (IN) Request segment size.
|
|
||||||
*
|
|
||||||
* When allocating a segment we pull a pre-alllocated segment
|
|
||||||
* from one of two free lists, an eager list and a max list
|
|
||||||
*/
|
|
||||||
mca_btl_base_descriptor_t* mca_btl_mvapi_alloc(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
uint8_t order,
|
|
||||||
size_t size)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_frag_t* frag;
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl;
|
|
||||||
int rc;
|
|
||||||
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
|
||||||
|
|
||||||
if(size <= mca_btl_mvapi_component.eager_limit){
|
|
||||||
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
|
|
||||||
if(NULL == frag) return NULL;
|
|
||||||
frag->segment.seg_len = size;
|
|
||||||
} else if (size <= mca_btl_mvapi_component.max_send_size) {
|
|
||||||
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
|
|
||||||
if(NULL == frag) return NULL;
|
|
||||||
frag->segment.seg_len = size;
|
|
||||||
} else {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
frag->segment.seg_len = size <= mvapi_btl->super.btl_eager_limit ? size : mvapi_btl->super.btl_eager_limit;
|
|
||||||
frag->base.des_flags = 0;
|
|
||||||
frag->base.order = MCA_BTL_NO_ORDER;
|
|
||||||
|
|
||||||
return (mca_btl_base_descriptor_t*)frag;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a segment
|
|
||||||
*
|
|
||||||
* Return the segment to the appropriate
|
|
||||||
* preallocated segment list
|
|
||||||
*/
|
|
||||||
int mca_btl_mvapi_free(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
mca_btl_base_descriptor_t* des)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*)des;
|
|
||||||
if (MCA_BTL_MVAPI_FRAG_FRAG == frag->type && frag->registration != NULL) {
|
|
||||||
btl->btl_mpool->mpool_deregister(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->registration);
|
|
||||||
frag->registration = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
MCA_BTL_IB_FRAG_RETURN(btl, frag);
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* register user buffer or pack
|
|
||||||
* data into pre-registered buffer and return a
|
|
||||||
* descriptor that can be
|
|
||||||
* used for send/put.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @param endpoint (IN) BTL peer addressing
|
|
||||||
*
|
|
||||||
* prepare source's behavior depends on the following:
|
|
||||||
* Has a valid memory registration been passed to prepare_src?
|
|
||||||
* if so we attempt to use the pre-registred user-buffer, if the memory registration
|
|
||||||
* is to small (only a portion of the user buffer) then we must reregister the user buffer
|
|
||||||
* Has the user requested the memory to be left pinned?
|
|
||||||
* if so we insert the memory registration into a memory tree for later lookup, we
|
|
||||||
* may also remove a previous registration if a MRU (most recently used) list of
|
|
||||||
* registions is full, this prevents resources from being exhausted.
|
|
||||||
* Is the requested size larger than the btl's max send size?
|
|
||||||
* if so and we aren't asked to leave the registration pinned than we register the memory if
|
|
||||||
* the users buffer is contiguous
|
|
||||||
* Otherwise we choose from two free lists of pre-registered memory in which to pack the data into.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* endpoint,
|
|
||||||
mca_mpool_base_registration_t* registration,
|
|
||||||
struct ompi_convertor_t* convertor,
|
|
||||||
uint8_t order,
|
|
||||||
size_t reserve,
|
|
||||||
size_t* size
|
|
||||||
)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl;
|
|
||||||
mca_btl_mvapi_frag_t* frag = NULL;
|
|
||||||
mca_btl_mvapi_reg_t *mvapi_reg;
|
|
||||||
struct iovec iov;
|
|
||||||
uint32_t iov_count = 1;
|
|
||||||
size_t max_data = *size;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
|
|
||||||
mvapi_btl = (mca_btl_mvapi_module_t*)btl;
|
|
||||||
|
|
||||||
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
|
|
||||||
if(registration != NULL || max_data > btl->btl_max_send_size) {
|
|
||||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
|
||||||
if(NULL == frag) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
iov.iov_len = max_data;
|
|
||||||
iov.iov_base = NULL;
|
|
||||||
|
|
||||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
|
||||||
|
|
||||||
*size = max_data;
|
|
||||||
|
|
||||||
if(NULL == registration) {
|
|
||||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
|
||||||
iov.iov_base, max_data, 0, ®istration);
|
|
||||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
|
||||||
MCA_BTL_IB_FRAG_RETURN(mvapi_btl, frag);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
frag->registration = (mca_btl_mvapi_reg_t*)registration;
|
|
||||||
}
|
|
||||||
mvapi_reg = (mca_btl_mvapi_reg_t*)registration;
|
|
||||||
|
|
||||||
frag->base.des_flags = 0;
|
|
||||||
frag->base.des_src = &frag->segment;
|
|
||||||
frag->base.des_src_cnt = 1;
|
|
||||||
frag->base.des_dst = NULL;
|
|
||||||
frag->base.des_dst_cnt = 0;
|
|
||||||
frag->base.des_flags = 0;
|
|
||||||
frag->base.order = MCA_BTL_NO_ORDER;
|
|
||||||
|
|
||||||
frag->sg_entry.len = max_data;
|
|
||||||
frag->sg_entry.lkey = mvapi_reg->l_key;
|
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t)iov.iov_base;
|
|
||||||
|
|
||||||
frag->segment.seg_len = max_data;
|
|
||||||
frag->segment.seg_addr.pval = iov.iov_base;
|
|
||||||
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
|
|
||||||
|
|
||||||
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
|
|
||||||
"frag->segment.seg_key.key32[0] = %lu",
|
|
||||||
frag->sg_entry.lkey, frag->sg_entry.addr,
|
|
||||||
frag->segment.seg_key.key32[0]));
|
|
||||||
return &frag->base;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(max_data + reserve <= btl->btl_eager_limit) {
|
|
||||||
/* the data is small enough to fit in the eager frag and
|
|
||||||
* memory is not prepinned */
|
|
||||||
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(NULL == frag) {
|
|
||||||
/* the data doesn't fit into eager frag or eger frag is
|
|
||||||
* not available */
|
|
||||||
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
|
|
||||||
if(NULL == frag) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if(max_data + reserve > btl->btl_max_send_size) {
|
|
||||||
max_data = btl->btl_max_send_size - reserve;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
iov.iov_len = max_data;
|
|
||||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
|
||||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
|
||||||
if( rc < 0 ) {
|
|
||||||
MCA_BTL_IB_FRAG_RETURN(mvapi_btl, frag);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
*size = max_data;
|
|
||||||
frag->segment.seg_len = max_data + reserve;
|
|
||||||
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
|
|
||||||
frag->base.des_src = &frag->segment;
|
|
||||||
frag->base.des_src_cnt = 1;
|
|
||||||
frag->base.des_dst = NULL;
|
|
||||||
frag->base.des_dst_cnt = 0;
|
|
||||||
frag->base.des_flags = 0;
|
|
||||||
frag->base.order = MCA_BTL_NO_ORDER;
|
|
||||||
|
|
||||||
return &frag->base;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prepare the dst buffer
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @param peer (IN) BTL peer addressing
|
|
||||||
* prepare dest's behavior depends on the following:
|
|
||||||
* Has a valid memory registration been passed to prepare_src?
|
|
||||||
* if so we attempt to use the pre-registred user-buffer, if the memory registration
|
|
||||||
* is to small (only a portion of the user buffer) then we must reregister the user buffer
|
|
||||||
* Has the user requested the memory to be left pinned?
|
|
||||||
* if so we insert the memory registration into a memory tree for later lookup, we
|
|
||||||
* may also remove a previous registration if a MRU (most recently used) list of
|
|
||||||
* registions is full, this prevents resources from being exhausted.
|
|
||||||
*/
|
|
||||||
mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* endpoint,
|
|
||||||
mca_mpool_base_registration_t* registration,
|
|
||||||
struct ompi_convertor_t* convertor,
|
|
||||||
uint8_t order,
|
|
||||||
size_t reserve,
|
|
||||||
size_t* size)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl;
|
|
||||||
mca_btl_mvapi_frag_t* frag;
|
|
||||||
mca_btl_mvapi_reg_t *mvapi_reg;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
|
||||||
|
|
||||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
|
||||||
|
|
||||||
if(NULL == frag){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
frag->segment.seg_len = *size;
|
|
||||||
ompi_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) );
|
|
||||||
frag->base.des_flags = 0;
|
|
||||||
frag->base.order = MCA_BTL_NO_ORDER;
|
|
||||||
|
|
||||||
if(NULL == registration) {
|
|
||||||
/* we didn't get a memory registration passed in, so we have to register the region
|
|
||||||
* ourselves
|
|
||||||
*/
|
|
||||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
|
||||||
frag->segment.seg_addr.pval, *size, 0, ®istration);
|
|
||||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
|
||||||
BTL_ERROR(("mpool_register(%p,%lu) failed: base %p offset %lu",
|
|
||||||
frag->segment.seg_addr.pval, *size, convertor->pBaseBuf, convertor->bConverted));
|
|
||||||
MCA_BTL_IB_FRAG_RETURN(btl, frag);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
frag->registration = (mca_btl_mvapi_reg_t*)registration;
|
|
||||||
}
|
|
||||||
mvapi_reg = (mca_btl_mvapi_reg_t*)registration;
|
|
||||||
|
|
||||||
frag->sg_entry.len = *size;
|
|
||||||
frag->sg_entry.lkey = mvapi_reg->l_key;
|
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->segment.seg_addr.pval;
|
|
||||||
|
|
||||||
frag->segment.seg_key.key32[0] =mvapi_reg->r_key;
|
|
||||||
|
|
||||||
frag->base.des_dst = &frag->segment;
|
|
||||||
frag->base.des_dst_cnt = 1;
|
|
||||||
frag->base.des_src = NULL;
|
|
||||||
frag->base.des_src_cnt = 0;
|
|
||||||
|
|
||||||
return &frag->base;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
int mca_btl_mvapi_finalize(struct mca_btl_base_module_t* btl)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl;
|
|
||||||
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initiate a send.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int mca_btl_mvapi_send(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* endpoint,
|
|
||||||
struct mca_btl_base_descriptor_t* descriptor,
|
|
||||||
mca_btl_base_tag_t tag)
|
|
||||||
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*)descriptor;
|
|
||||||
frag->endpoint = endpoint;
|
|
||||||
frag->hdr->tag = tag;
|
|
||||||
frag->desc.sr_desc.opcode = VAPI_SEND;
|
|
||||||
return mca_btl_mvapi_endpoint_send(endpoint, frag);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* RDMA local buffer to remote buffer address.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
|
|
||||||
mca_btl_base_endpoint_t* endpoint,
|
|
||||||
mca_btl_base_descriptor_t* descriptor)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
|
||||||
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
|
|
||||||
|
|
||||||
/* setup for queued requests */
|
|
||||||
frag->endpoint = endpoint;
|
|
||||||
frag->desc.sr_desc.opcode = VAPI_RDMA_WRITE;
|
|
||||||
|
|
||||||
/* check for a send wqe */
|
|
||||||
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
|
|
||||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
|
|
||||||
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
|
||||||
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
|
|
||||||
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
|
|
||||||
/* post descriptor */
|
|
||||||
} else {
|
|
||||||
|
|
||||||
frag->desc.sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_lp;
|
|
||||||
frag->desc.sr_desc.remote_addr = (VAPI_virt_addr_t) frag->base.des_dst->seg_addr.lval;
|
|
||||||
frag->desc.sr_desc.r_key = frag->base.des_dst->seg_key.key32[0];
|
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
|
|
||||||
frag->sg_entry.len = frag->base.des_src->seg_len;
|
|
||||||
if(VAPI_OK != VAPI_post_sr(mvapi_btl->nic, endpoint->lcl_qp_hndl_lp, &frag->desc.sr_desc)) {
|
|
||||||
rc = OMPI_ERROR;
|
|
||||||
} else {
|
|
||||||
rc = OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 1);
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 1);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 1);
|
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* RDMA read remote buffer to local buffer address.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int mca_btl_mvapi_get( mca_btl_base_module_t* btl,
|
|
||||||
mca_btl_base_endpoint_t* endpoint,
|
|
||||||
mca_btl_base_descriptor_t* descriptor)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
|
||||||
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
|
|
||||||
|
|
||||||
frag->endpoint = endpoint;
|
|
||||||
frag->desc.sr_desc.opcode = VAPI_RDMA_READ;
|
|
||||||
|
|
||||||
/* check for a send wqe */
|
|
||||||
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
|
|
||||||
|
|
||||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
|
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock);
|
|
||||||
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
|
|
||||||
/* check for a get token */
|
|
||||||
} else if(OPAL_THREAD_ADD32(&endpoint->get_tokens,-1) < 0) {
|
|
||||||
|
|
||||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
|
|
||||||
OPAL_THREAD_ADD32(&endpoint->get_tokens,1);
|
|
||||||
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
|
||||||
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t*)frag);
|
|
||||||
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
frag->desc.sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_lp;
|
|
||||||
frag->desc.sr_desc.remote_addr = (VAPI_virt_addr_t) frag->base.des_src->seg_addr.lval;
|
|
||||||
frag->desc.sr_desc.r_key = frag->base.des_src->seg_key.key32[0];
|
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
|
|
||||||
frag->sg_entry.len = frag->base.des_dst->seg_len;
|
|
||||||
|
|
||||||
if(VAPI_OK != VAPI_post_sr(mvapi_btl->nic, endpoint->lcl_qp_hndl_lp, &frag->desc.sr_desc)) {
|
|
||||||
rc = OMPI_ERROR;
|
|
||||||
} else {
|
|
||||||
rc = OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 1);
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 1);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 1);
|
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Asynchronous event handler to detect unforseen
|
|
||||||
* events. Usually, such events are catastrophic.
|
|
||||||
* Should have a robust mechanism to handle these
|
|
||||||
* events and abort the OMPI application if necessary.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
|
|
||||||
VAPI_event_record_t * event_p,
|
|
||||||
void *priv_data)
|
|
||||||
{
|
|
||||||
switch (event_p->type) {
|
|
||||||
case VAPI_QP_PATH_MIGRATED:
|
|
||||||
case VAPI_EEC_PATH_MIGRATED:
|
|
||||||
case VAPI_QP_COMM_ESTABLISHED:
|
|
||||||
case VAPI_EEC_COMM_ESTABLISHED:
|
|
||||||
case VAPI_SEND_QUEUE_DRAINED:
|
|
||||||
case VAPI_PORT_ACTIVE:
|
|
||||||
{
|
|
||||||
BTL_VERBOSE(("Got an asynchronous event: %s\n", VAPI_event_record_sym(event_p->type)));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case VAPI_CQ_ERROR:
|
|
||||||
case VAPI_LOCAL_WQ_INV_REQUEST_ERROR:
|
|
||||||
case VAPI_LOCAL_WQ_ACCESS_VIOL_ERROR:
|
|
||||||
case VAPI_LOCAL_WQ_CATASTROPHIC_ERROR:
|
|
||||||
case VAPI_PATH_MIG_REQ_ERROR:
|
|
||||||
case VAPI_LOCAL_EEC_CATASTROPHIC_ERROR:
|
|
||||||
case VAPI_LOCAL_CATASTROPHIC_ERROR:
|
|
||||||
case VAPI_PORT_ERROR:
|
|
||||||
{
|
|
||||||
BTL_ERROR(("Got an asynchronous event: %s (%s)",
|
|
||||||
VAPI_event_record_sym(event_p->type),
|
|
||||||
VAPI_event_syndrome_sym(event_p->syndrome)));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
case VAPI_SRQ_LIMIT_REACHED:
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
BTL_ERROR(("SRQ limit is reached, posting more buffers %s\n", VAPI_event_record_sym(event_p->type)));
|
|
||||||
for(i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++) {
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
|
||||||
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 1);
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* BWB - is this right? */
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
case VAPI_RECEIVE_QUEUE_DRAINED: {
|
|
||||||
fprintf(stderr, "VAPI_RECEIVE_QUEUE_DRAINEDD\n");
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
default:
|
|
||||||
BTL_ERROR(("Warning!! Got an undefined "
|
|
||||||
"asynchronous event %s", VAPI_event_record_sym(event_p->type)));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize the btl module by allocating a protection domain
|
|
||||||
* and creating both the high and low priority completion queues
|
|
||||||
*/
|
|
||||||
int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Allocate Protection Domain */
|
|
||||||
VAPI_ret_t ret;
|
|
||||||
uint32_t cqe_cnt = 0;
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
VAPI_srq_attr_t srq_attr, srq_attr_out, srq_attr_mod;
|
|
||||||
VAPI_srq_attr_mask_t srq_attr_mask;
|
|
||||||
uint32_t max_outs_wr;
|
|
||||||
#endif
|
|
||||||
ret = VAPI_alloc_pd(mvapi_btl->nic, &mvapi_btl->ptag);
|
|
||||||
|
|
||||||
if(ret != VAPI_OK) {
|
|
||||||
BTL_ERROR(("error in VAPI_alloc_pd: %s", VAPI_strerror(ret)));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
|
||||||
mvapi_btl->srd_posted_hp = 0;
|
|
||||||
mvapi_btl->srd_posted_lp = 0;
|
|
||||||
srq_attr.pd_hndl = mvapi_btl->ptag;
|
|
||||||
srq_attr.max_outs_wr = mca_btl_mvapi_component.srq_rd_max;
|
|
||||||
srq_attr.max_sentries = mca_btl_mvapi_component.ib_sg_list_size;
|
|
||||||
|
|
||||||
srq_attr_mod.srq_limit = mvapi_btl->rd_num * 0.9;
|
|
||||||
ret = VAPI_create_srq(mvapi_btl->nic,
|
|
||||||
&srq_attr,
|
|
||||||
&mvapi_btl->srq_hndl_hp,
|
|
||||||
&srq_attr_out);
|
|
||||||
if(ret != VAPI_OK) {
|
|
||||||
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
srq_attr_mask = 0;
|
|
||||||
srq_attr_mask |= VAPI_SRQ_ATTR_LIMIT;
|
|
||||||
|
|
||||||
ret = VAPI_modify_srq
|
|
||||||
(
|
|
||||||
mvapi_btl->nic,
|
|
||||||
mvapi_btl->srq_hndl_hp,
|
|
||||||
&srq_attr_mod,
|
|
||||||
srq_attr_mask,
|
|
||||||
&max_outs_wr
|
|
||||||
);
|
|
||||||
|
|
||||||
if(ret != VAPI_OK) {
|
|
||||||
/* BTL_ERROR(("error in VAPI_modify_srq: %s", VAPI_strerror(ret))); */
|
|
||||||
/* return OMPI_ERROR; */
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = VAPI_create_srq(mvapi_btl->nic,
|
|
||||||
&srq_attr,
|
|
||||||
&mvapi_btl->srq_hndl_lp,
|
|
||||||
&srq_attr_out);
|
|
||||||
if(ret != VAPI_OK) {
|
|
||||||
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
srq_attr_mask = 0;
|
|
||||||
srq_attr_mask |= VAPI_SRQ_ATTR_LIMIT;
|
|
||||||
|
|
||||||
ret = VAPI_modify_srq
|
|
||||||
(
|
|
||||||
mvapi_btl->nic,
|
|
||||||
mvapi_btl->srq_hndl_lp,
|
|
||||||
&srq_attr_mod,
|
|
||||||
srq_attr_mask,
|
|
||||||
&max_outs_wr
|
|
||||||
);
|
|
||||||
|
|
||||||
if(ret != VAPI_OK) {
|
|
||||||
/* BTL_ERROR(("error in VAPI_modify_srq: %s", VAPI_strerror(ret))); */
|
|
||||||
/* return OMPI_ERROR; */
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} else {
|
|
||||||
mvapi_btl->srq_hndl_hp = VAPI_INVAL_SRQ_HNDL;
|
|
||||||
mvapi_btl->srq_hndl_lp = VAPI_INVAL_SRQ_HNDL;
|
|
||||||
}
|
|
||||||
#endif /* VAPI_FEATURE_SRQ */
|
|
||||||
ret = VAPI_create_cq(mvapi_btl->nic, mca_btl_mvapi_component.ib_cq_size,
|
|
||||||
&mvapi_btl->cq_hndl_lp, &cqe_cnt);
|
|
||||||
|
|
||||||
|
|
||||||
if( VAPI_OK != ret) {
|
|
||||||
BTL_ERROR(("error in VAPI_create_cq: %s", VAPI_strerror(ret)));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = VAPI_create_cq(mvapi_btl->nic, mca_btl_mvapi_component.ib_cq_size,
|
|
||||||
&mvapi_btl->cq_hndl_hp, &cqe_cnt);
|
|
||||||
|
|
||||||
|
|
||||||
if( VAPI_OK != ret) {
|
|
||||||
BTL_ERROR(("error in VAPI_create_cq: %s", VAPI_strerror(ret)));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if(cqe_cnt <= 0) {
|
|
||||||
BTL_ERROR(("error creating completion queue "));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = EVAPI_set_async_event_handler(mvapi_btl->nic,
|
|
||||||
async_event_handler, 0, &mvapi_btl->async_handler);
|
|
||||||
|
|
||||||
if(VAPI_OK != ret) {
|
|
||||||
BTL_ERROR(("error in EVAPI_set_async_event_handler: %s", VAPI_strerror(ret)));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Dump state of btl/queues
|
|
||||||
*/
|
|
||||||
/*#include "orte/mca/ns/ns_types.h"*/
|
|
||||||
void mca_btl_mvapi_dump(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* endpoint,
|
|
||||||
int verbose)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*)btl;
|
|
||||||
|
|
||||||
if( NULL == endpoint ) {
|
|
||||||
opal_output( 0, "No endpoint for this peer\n" );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
opal_output( 0, "endpoint with processor %s\n",
|
|
||||||
ORTE_NAME_PRINT( &(endpoint->endpoint_proc->proc_ompi->proc_name) ) );
|
|
||||||
opal_output( 0, "endpoint state: %s\n",
|
|
||||||
(endpoint->endpoint_state == MCA_BTL_IB_CONNECTING ? "connecting" :
|
|
||||||
(endpoint->endpoint_state == MCA_BTL_IB_CONNECT_ACK ? "waiting ack" :
|
|
||||||
(endpoint->endpoint_state == MCA_BTL_IB_WAITING_ACK ? "waiting final ack" :
|
|
||||||
(endpoint->endpoint_state == MCA_BTL_IB_CONNECTED ? "connected" :
|
|
||||||
(endpoint->endpoint_state == MCA_BTL_IB_CLOSED ? "closed" :
|
|
||||||
(endpoint->endpoint_state == MCA_BTL_IB_FAILED ? "failed" : "unknown")))))));
|
|
||||||
|
|
||||||
opal_output( 0, "pending send frags: %u\n", (unsigned int)opal_list_get_size(&endpoint->pending_send_frags) );
|
|
||||||
opal_output( 0, "pending frags hp : %u\n", (unsigned int)opal_list_get_size(&endpoint->pending_frags_hp) );
|
|
||||||
opal_output( 0, "pending frags lp : %u\n", (unsigned int)opal_list_get_size(&endpoint->pending_frags_lp) );
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
if( mca_btl_mvapi_component.use_srq ) {
|
|
||||||
opal_output( 0, "mvapi_btl->srd_posted_hp %d\n", mvapi_btl->srd_posted_hp );
|
|
||||||
opal_output( 0, "mvapi_btl->srd_posted_lp %d\n", mvapi_btl->srd_posted_lp );
|
|
||||||
opal_output( 0, "mvapi_btl->sd_tokens_hp %d\n", mvapi_btl->sd_tokens_hp );
|
|
||||||
opal_output( 0, "mvapi_btl->sd_tokens_lp %d\n", mvapi_btl->sd_tokens_lp );
|
|
||||||
} else {
|
|
||||||
#endif /* VAPI_FEATURE_SRQ */
|
|
||||||
opal_output( 0, "sd_tokens_hp %d\n", endpoint->sd_tokens_hp );
|
|
||||||
opal_output( 0, "sd_tokens_lp %d\n", endpoint->sd_tokens_lp );
|
|
||||||
opal_output( 0, "get_tokens %d\n", endpoint->get_tokens );
|
|
||||||
opal_output( 0, "rd_posted_hp %d\n", endpoint->rd_posted_hp );
|
|
||||||
opal_output( 0, "rd_posted_lp %d\n", endpoint->rd_posted_lp );
|
|
||||||
opal_output( 0, "rd_credits_hp %d\n", endpoint->rd_credits_hp );
|
|
||||||
opal_output( 0, "rd_credits_lp %d\n", endpoint->rd_credits_lp );
|
|
||||||
opal_output( 0, "sd_credits_hp %d\n", endpoint->sd_credits_hp );
|
|
||||||
opal_output( 0, "sd_credits_lp %d\n", endpoint->sd_credits_lp );
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
}
|
|
||||||
#endif /* VAPI_FEATURE_SRQ */
|
|
||||||
opal_output( 0, "sd_wqe_hp %d\n", endpoint->sd_wqe_hp );
|
|
||||||
opal_output( 0, "sd_wqe_lp %d\n", endpoint->sd_wqe_lp );
|
|
||||||
}
|
|
||||||
|
|
||||||
int mca_btl_mvapi_ft_event(int state) {
|
|
||||||
if(OPAL_CRS_CHECKPOINT == state) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
else if(OPAL_CRS_CONTINUE == state) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
else if(OPAL_CRS_RESTART == state) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
else if(OPAL_CRS_TERM == state ) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
@ -1,532 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
*/
|
|
||||||
#ifndef MCA_PTL_IB_H
|
|
||||||
#define MCA_PTL_IB_H
|
|
||||||
|
|
||||||
/* Standard system includes */
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
/* Open MPI includes */
|
|
||||||
#include "ompi/class/ompi_free_list.h"
|
|
||||||
#include "ompi/class/ompi_bitmap.h"
|
|
||||||
#include "orte/class/orte_pointer_array.h"
|
|
||||||
#include "opal/event/event.h"
|
|
||||||
#include "ompi/mca/pml/pml.h"
|
|
||||||
#include "ompi/mca/btl/btl.h"
|
|
||||||
#include "opal/util/output.h"
|
|
||||||
#include "ompi/mca/mpool/mpool.h"
|
|
||||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
|
||||||
|
|
||||||
#include "ompi/mca/btl/btl.h"
|
|
||||||
#include "ompi/mca/btl/base/base.h"
|
|
||||||
#include "btl_mvapi_endpoint.h"
|
|
||||||
|
|
||||||
#include <vapi.h>
|
|
||||||
#include <mtl_common.h>
|
|
||||||
#include <vapi_common.h>
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_LEAVE_PINNED 1
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Infiniband (IB) BTL component.
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_component_t {
|
|
||||||
mca_btl_base_component_1_0_1_t super; /**< base BTL component */
|
|
||||||
|
|
||||||
uint32_t ib_max_btls;
|
|
||||||
/**< maximum number of hcas available to the IB component */
|
|
||||||
|
|
||||||
uint32_t ib_num_btls;
|
|
||||||
/**< number of hcas available to the IB component */
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_module_t *mvapi_btls;
|
|
||||||
/**< array of available PTLs */
|
|
||||||
|
|
||||||
int ib_free_list_num;
|
|
||||||
/**< initial size of free lists */
|
|
||||||
|
|
||||||
int ib_free_list_max;
|
|
||||||
/**< maximum size of free lists */
|
|
||||||
|
|
||||||
int ib_free_list_inc;
|
|
||||||
/**< number of elements to alloc when growing free lists */
|
|
||||||
|
|
||||||
opal_list_t ib_procs;
|
|
||||||
/**< list of ib proc structures */
|
|
||||||
|
|
||||||
opal_event_t ib_send_event;
|
|
||||||
/**< event structure for sends */
|
|
||||||
|
|
||||||
opal_event_t ib_recv_event;
|
|
||||||
/**< event structure for recvs */
|
|
||||||
|
|
||||||
opal_mutex_t ib_lock;
|
|
||||||
/**< lock for accessing module state */
|
|
||||||
|
|
||||||
char* ib_mpool_name;
|
|
||||||
/**< name of ib memory pool */
|
|
||||||
|
|
||||||
int32_t rd_num; /**< the number of receive descriptors to post to each queue pair */
|
|
||||||
int32_t rd_low; /**< low water mark to reach before posting additional receive descriptors */
|
|
||||||
int32_t rd_win; /**< ack credits when window size exceeded */
|
|
||||||
int32_t rd_rsv; /**< descriptors held in reserve for control messages */
|
|
||||||
|
|
||||||
/* number of srq send tokes available */
|
|
||||||
int32_t srq_sd_max;
|
|
||||||
int32_t srq_rd_max;
|
|
||||||
int32_t srq_rd_per_peer;
|
|
||||||
/**< the number of recv desc posted per log(peer) in SRQ mode */
|
|
||||||
|
|
||||||
size_t eager_limit;
|
|
||||||
size_t max_send_size;
|
|
||||||
|
|
||||||
uint32_t reg_mru_len;
|
|
||||||
uint32_t use_srq;
|
|
||||||
|
|
||||||
uint32_t ib_cq_size; /**< Max outstanding CQE on the CQ */
|
|
||||||
uint32_t ib_wq_size; /**< Max outstanding WR on the WQ */
|
|
||||||
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ*/
|
|
||||||
uint32_t ib_pkey_ix;
|
|
||||||
uint32_t ib_psn;
|
|
||||||
uint32_t ib_qp_ous_rd_atom;
|
|
||||||
uint32_t ib_mtu;
|
|
||||||
uint32_t ib_min_rnr_timer;
|
|
||||||
uint32_t ib_timeout;
|
|
||||||
uint32_t ib_retry_count;
|
|
||||||
uint32_t ib_rnr_retry;
|
|
||||||
uint32_t ib_max_rdma_dst_ops;
|
|
||||||
uint32_t ib_service_level;
|
|
||||||
uint32_t ib_static_rate;
|
|
||||||
uint32_t ib_src_path_bits;
|
|
||||||
uint32_t use_eager_rdma;
|
|
||||||
uint32_t eager_rdma_threshold;
|
|
||||||
uint32_t eager_rdma_num;
|
|
||||||
uint32_t max_eager_rdma;
|
|
||||||
}; typedef struct mca_btl_mvapi_component_t mca_btl_mvapi_component_t;
|
|
||||||
|
|
||||||
OMPI_MODULE_DECLSPEC extern mca_btl_mvapi_component_t mca_btl_mvapi_component;
|
|
||||||
|
|
||||||
typedef mca_btl_base_recv_reg_t mca_btl_mvapi_recv_reg_t;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IB PTL Interface
|
|
||||||
*/
|
|
||||||
struct mca_btl_mvapi_module_t {
|
|
||||||
mca_btl_base_module_t super; /**< base PTL interface */
|
|
||||||
bool btl_inited;
|
|
||||||
mca_btl_mvapi_recv_reg_t ib_reg[256];
|
|
||||||
mca_btl_mvapi_port_info_t port_info; /* contains only the subnet right now */
|
|
||||||
VAPI_hca_id_t hca_id; /**< ID of HCA */
|
|
||||||
IB_port_t port_id; /**< ID of the PORT */
|
|
||||||
VAPI_hca_port_t port; /**< IB port of this PTL */
|
|
||||||
VAPI_hca_hndl_t nic; /**< NIC handle */
|
|
||||||
VAPI_pd_hndl_t ptag; /**< Protection Domain tag */
|
|
||||||
|
|
||||||
VAPI_cq_hndl_t cq_hndl_hp; /**< High Priority Completion Queue handle */
|
|
||||||
VAPI_cq_hndl_t cq_hndl_lp; /**< Low Priority Completion Queue handle */
|
|
||||||
|
|
||||||
EVAPI_async_handler_hndl_t async_handler;
|
|
||||||
/**< Async event handler used to detect weird/unknown events */
|
|
||||||
|
|
||||||
ompi_free_list_t send_free_eager; /**< free list of eager buffer descriptors */
|
|
||||||
ompi_free_list_t send_free_max; /**< free list of max buffer descriptors */
|
|
||||||
ompi_free_list_t send_free_frag; /**< free list of frags only... used for pining memory */
|
|
||||||
|
|
||||||
ompi_free_list_t recv_free_eager; /**< High priority free list of buffer descriptors */
|
|
||||||
ompi_free_list_t recv_free_max; /**< Low priority free list of buffer descriptors */
|
|
||||||
|
|
||||||
opal_mutex_t ib_lock; /**< module level lock */
|
|
||||||
|
|
||||||
VAPI_rr_desc_t* rr_desc_post; /**< an array to allow posting of rr in one swoop */
|
|
||||||
#ifdef VAPI_FEATURE_SRQ
|
|
||||||
VAPI_srq_hndl_t srq_hndl_hp; /**< A high priority shared receive queue
|
|
||||||
runtime optional, can also use a receive queue
|
|
||||||
per queue pair.. */
|
|
||||||
VAPI_srq_hndl_t srq_hndl_lp; /**< A low priority shared receive queue */
|
|
||||||
#endif
|
|
||||||
size_t ib_inline_max; /**< max size of inline send*/
|
|
||||||
int32_t num_peers;
|
|
||||||
|
|
||||||
int32_t srd_posted_hp; /**< number of high priority shared receive descriptors posted to the nic*/
|
|
||||||
int32_t srd_posted_lp; /**< number of low priority shared receive descriptors posted to the nic*/
|
|
||||||
|
|
||||||
int32_t rd_num; /**< number of receive descriptors to post to srq */
|
|
||||||
int32_t rd_low; /**< low water mark before reposting descriptors to srq */
|
|
||||||
|
|
||||||
int32_t sd_tokens_hp; /**< number of send tokens available on high priority srq */
|
|
||||||
int32_t sd_tokens_lp; /**< number of send tokens available on low priority srq */
|
|
||||||
|
|
||||||
opal_list_t pending_frags_hp; /**< list of pending high priority frags */
|
|
||||||
opal_list_t pending_frags_lp; /**< list of pending low priority frags */
|
|
||||||
|
|
||||||
opal_mutex_t eager_rdma_lock;
|
|
||||||
size_t eager_rdma_frag_size; /**< length of eager frag */
|
|
||||||
orte_pointer_array_t *eager_rdma_buffers; /**< RDMA buffers to poll */
|
|
||||||
uint32_t eager_rdma_buffers_count; /**< number of RDMA buffers */
|
|
||||||
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_reg_t {
|
|
||||||
mca_mpool_base_registration_t base;
|
|
||||||
VAPI_mr_hndl_t hndl; /* Memory region handle */
|
|
||||||
VAPI_lkey_t l_key; /* Local key to registered memory */
|
|
||||||
VAPI_rkey_t r_key; /* Remote key to registered memory */
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_reg_t mca_btl_mvapi_reg_t;
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, \
|
|
||||||
additional) \
|
|
||||||
{ \
|
|
||||||
do { \
|
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
|
||||||
if(mvapi_btl->srd_posted_hp <= mvapi_btl->rd_low+additional && \
|
|
||||||
mvapi_btl->srd_posted_hp < mvapi_btl->rd_num){ \
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_num - \
|
|
||||||
mvapi_btl->srd_posted_hp, \
|
|
||||||
mvapi_btl, \
|
|
||||||
&mvapi_btl->recv_free_eager, \
|
|
||||||
&mvapi_btl->srd_posted_hp, \
|
|
||||||
mvapi_btl->nic, \
|
|
||||||
mvapi_btl->srq_hndl_hp); \
|
|
||||||
} \
|
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
|
||||||
}while(0);\
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, \
|
|
||||||
additional) \
|
|
||||||
{ \
|
|
||||||
do { \
|
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
|
||||||
if(mvapi_btl->srd_posted_lp <= mvapi_btl->rd_low+additional && \
|
|
||||||
mvapi_btl->srd_posted_lp < mvapi_btl->rd_num){ \
|
|
||||||
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_num - \
|
|
||||||
mvapi_btl->srd_posted_lp, \
|
|
||||||
mvapi_btl, \
|
|
||||||
&mvapi_btl->recv_free_max, \
|
|
||||||
&mvapi_btl->srd_posted_lp, \
|
|
||||||
mvapi_btl->nic, \
|
|
||||||
mvapi_btl->srq_hndl_lp); \
|
|
||||||
} \
|
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
|
||||||
} while(0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_POST_SRR_SUB(cnt, \
|
|
||||||
mvapi_btl, \
|
|
||||||
frag_list, \
|
|
||||||
srd_posted, \
|
|
||||||
nic, \
|
|
||||||
srq_hndl) \
|
|
||||||
{\
|
|
||||||
do { \
|
|
||||||
int32_t i; \
|
|
||||||
VAPI_ret_t ret; \
|
|
||||||
uint32_t rwqe_posted = 0; \
|
|
||||||
int rc; \
|
|
||||||
ompi_free_list_item_t* item = NULL; \
|
|
||||||
mca_btl_mvapi_frag_t* frag = NULL; \
|
|
||||||
VAPI_rr_desc_t* desc_post = mvapi_btl->rr_desc_post; \
|
|
||||||
for(i = 0; i < cnt; i++) { \
|
|
||||||
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) item; \
|
|
||||||
frag->sg_entry.len = frag->size + \
|
|
||||||
((unsigned char*) frag->segment.seg_addr.pval- \
|
|
||||||
(unsigned char*) frag->hdr); \
|
|
||||||
desc_post[i] = frag->desc.rr_desc; \
|
|
||||||
}\
|
|
||||||
ret = VAPI_post_srq( nic, \
|
|
||||||
srq_hndl, \
|
|
||||||
cnt, \
|
|
||||||
desc_post, \
|
|
||||||
&rwqe_posted); \
|
|
||||||
if(VAPI_OK != ret) { \
|
|
||||||
BTL_ERROR(("error posting receive descriptors to shared receive queue: %s",\
|
|
||||||
VAPI_strerror(ret))); \
|
|
||||||
} else if(rwqe_posted < 1) { \
|
|
||||||
BTL_ERROR(("error posting receive descriptors to shared receive queue, number of entries posted is %d", rwqe_posted)); \
|
|
||||||
} else {\
|
|
||||||
OPAL_THREAD_ADD32(srd_posted, cnt); \
|
|
||||||
}\
|
|
||||||
} while(0);\
|
|
||||||
}
|
|
||||||
struct mca_btl_mvapi_frag_t;
|
|
||||||
extern mca_btl_mvapi_module_t mca_btl_mvapi_module;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register IB component parameters with the MCA framework
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_component_open(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Any final cleanup before being unloaded.
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_component_close(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IB component initialization.
|
|
||||||
*
|
|
||||||
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
|
|
||||||
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
|
|
||||||
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
|
|
||||||
*
|
|
||||||
* (1) read interface list from kernel and compare against component parameters
|
|
||||||
* then create a BTL instance for selected interfaces
|
|
||||||
* (2) setup IB listen socket for incoming connection attempts
|
|
||||||
* (3) publish BTL addressing info
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
extern mca_btl_base_module_t** mca_btl_mvapi_component_init(
|
|
||||||
int *num_btl_modules,
|
|
||||||
bool allow_multi_user_threads,
|
|
||||||
bool have_hidden_threads
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IB component progress.
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_component_progress( void );
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register a callback function that is called on receipt
|
|
||||||
* of a fragment.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @return Status indicating if cleanup was successful
|
|
||||||
*
|
|
||||||
* When the process list changes, the PML notifies the BTL of the
|
|
||||||
* change, to provide the opportunity to cleanup or release any
|
|
||||||
* resources associated with the peer.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int mca_btl_mvapi_register(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
mca_btl_base_tag_t tag,
|
|
||||||
mca_btl_base_module_recv_cb_fn_t cbfunc,
|
|
||||||
void* cbdata
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Cleanup any resources held by the BTL.
|
|
||||||
*
|
|
||||||
* @param btl BTL instance.
|
|
||||||
* @return OMPI_SUCCESS or error status on failure.
|
|
||||||
*/
|
|
||||||
|
|
||||||
extern int mca_btl_mvapi_finalize(
|
|
||||||
struct mca_btl_base_module_t* btl
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PML->BTL notification of change in the process list.
|
|
||||||
*
|
|
||||||
* @param btl (IN)
|
|
||||||
* @param nprocs (IN) Number of processes
|
|
||||||
* @param procs (IN) Set of processes
|
|
||||||
* @param peers (OUT) Set of (optional) peer addressing info.
|
|
||||||
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
|
|
||||||
* @return OMPI_SUCCESS or error status on failure.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
extern int mca_btl_mvapi_add_procs(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t **procs,
|
|
||||||
struct mca_btl_base_endpoint_t** peers,
|
|
||||||
ompi_bitmap_t* reachable
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PML->BTL notification of change in the process list.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL instance
|
|
||||||
* @param nproc (IN) Number of processes.
|
|
||||||
* @param procs (IN) Set of processes.
|
|
||||||
* @param peers (IN) Set of peer data structures.
|
|
||||||
* @return Status indicating if cleanup was successful
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_del_procs(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t **procs,
|
|
||||||
struct mca_btl_base_endpoint_t** peers
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PML->BTL Initiate a send of the specified size.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL instance
|
|
||||||
* @param btl_base_peer (IN) BTL peer addressing
|
|
||||||
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
|
|
||||||
* @param size (IN) Number of bytes PML is requesting BTL to deliver
|
|
||||||
* @param flags (IN) Flags that should be passed to the peer via the message header.
|
|
||||||
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_send(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* btl_peer,
|
|
||||||
struct mca_btl_base_descriptor_t* descriptor,
|
|
||||||
mca_btl_base_tag_t tag
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PML->BTL Initiate a put of the specified size.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL instance
|
|
||||||
* @param btl_base_peer (IN) BTL peer addressing
|
|
||||||
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
|
|
||||||
* @param size (IN) Number of bytes PML is requesting BTL to deliver
|
|
||||||
* @param flags (IN) Flags that should be passed to the peer via the message header.
|
|
||||||
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_put(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* btl_peer,
|
|
||||||
struct mca_btl_base_descriptor_t* decriptor
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PML->BTL Initiate a get of the specified size.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL instance
|
|
||||||
* @param btl_base_peer (IN) BTL peer addressing
|
|
||||||
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
|
|
||||||
* @param size (IN) Number of bytes PML is requesting BTL to deliver
|
|
||||||
* @param flags (IN) Flags that should be passed to the peer via the message header.
|
|
||||||
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_get(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* btl_peer,
|
|
||||||
struct mca_btl_base_descriptor_t* decriptor
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate a descriptor.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @param size (IN) Requested descriptor size.
|
|
||||||
*/
|
|
||||||
extern mca_btl_base_descriptor_t* mca_btl_mvapi_alloc(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
uint8_t order,
|
|
||||||
size_t size);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a segment allocated by this BTL.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @param descriptor (IN) Allocated descriptor.
|
|
||||||
*/
|
|
||||||
extern int mca_btl_mvapi_free(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
mca_btl_base_descriptor_t* des);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pack data and return a descriptor that can be
|
|
||||||
* used for send/put.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @param peer (IN) BTL peer addressing
|
|
||||||
*/
|
|
||||||
mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* peer,
|
|
||||||
mca_mpool_base_registration_t* registration,
|
|
||||||
struct ompi_convertor_t* convertor,
|
|
||||||
uint8_t order,
|
|
||||||
size_t reserve,
|
|
||||||
size_t* size
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate a descriptor initialized for RDMA write.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL module
|
|
||||||
* @param peer (IN) BTL peer addressing
|
|
||||||
*/
|
|
||||||
extern mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* peer,
|
|
||||||
mca_mpool_base_registration_t* registration,
|
|
||||||
struct ompi_convertor_t* convertor,
|
|
||||||
uint8_t order,
|
|
||||||
size_t reserve,
|
|
||||||
size_t* size);
|
|
||||||
/**
|
|
||||||
* Return a send fragment to the modules free list.
|
|
||||||
*
|
|
||||||
* @param btl (IN) BTL instance
|
|
||||||
* @param frag (IN) IB send fragment
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
extern void mca_btl_mvapi_send_frag_return(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_mvapi_frag_t*
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Dump state of btl/queues
|
|
||||||
*/
|
|
||||||
|
|
||||||
extern void mca_btl_mvapi_dump(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
struct mca_btl_base_endpoint_t* endpoint,
|
|
||||||
int verbose
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t* mvapi_btl);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fault Tolerance Event Notification Function
|
|
||||||
* @param state Checkpoint Stae
|
|
||||||
* @return OMPI_SUCCESS or failure status
|
|
||||||
*/
|
|
||||||
int mca_btl_mvapi_ft_event(int state);
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,88 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2006 Voltaire All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MCA_BTL_MVAPI_EAGER_RDMA_BUF_H
|
|
||||||
#define MCA_BTL_MVAPI_EAGER_RDMA_BUF_H
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "btl_mvapi.h"
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_reg_t;
|
|
||||||
struct mca_btl_mvapi_frag_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_eager_rdma_local_t {
|
|
||||||
ompi_ptr_t base; /**< buffer for RDMAing eager messages */
|
|
||||||
struct mca_btl_mvapi_frag_t *frags;
|
|
||||||
struct mca_btl_mvapi_reg_t *reg;
|
|
||||||
uint16_t head; /**< RDMA buffer to poll */
|
|
||||||
uint16_t tail; /**< Needed for credit managment */
|
|
||||||
int32_t credits; /**< number of RDMA credits */
|
|
||||||
#if OMPI_ENABLE_DEBUG
|
|
||||||
uint32_t seq;
|
|
||||||
#endif
|
|
||||||
opal_mutex_t lock; /**< guard access to RDMA buffer */
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_eager_rdma_local_t mca_btl_mvapi_eager_rdma_local_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_eager_rdma_remote_t {
|
|
||||||
ompi_ptr_t base; /**< address of remote buffer */
|
|
||||||
uint64_t rkey; /**< RKey for accessing remote buffer */
|
|
||||||
uint16_t head; /**< RDMA buffer to post to */
|
|
||||||
int32_t tokens; /**< number of rdam tokens */
|
|
||||||
#if OMPI_ENABLE_DEBUG
|
|
||||||
uint32_t seq;
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_eager_rdma_remote_t mca_btl_mvapi_eager_rdma_remote_t;
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_FRAG(F) ((F)->type == MCA_BTL_MVAPI_FRAG_EAGER_RDMA)
|
|
||||||
|
|
||||||
#define EAGER_RDMA_BUFFER_REMOTE (0)
|
|
||||||
#define EAGER_RDMA_BUFFER_LOCAL (0xff)
|
|
||||||
|
|
||||||
#ifdef WORDS_BIGENDIAN
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_FRAG_GET_SIZE(F) ((F)->u.size >> 8)
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_FRAG_SET_SIZE(F, S) \
|
|
||||||
((F)->u.size = (S) << 8)
|
|
||||||
#else
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_FRAG_GET_SIZE(F) ((F)->u.size & 0x00ffffff)
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_FRAG_SET_SIZE(F, S) \
|
|
||||||
((F)->u.size = (S) & 0x00ffffff)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_FRAG_LOCAL(F) \
|
|
||||||
(((volatile uint8_t*)(F)->ftr->u.buf)[3] != EAGER_RDMA_BUFFER_REMOTE)
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_FRAG_REMOTE(F) \
|
|
||||||
(!MCA_BTL_MVAPI_RDMA_FRAG_LOCAL(F))
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_MAKE_REMOTE(F) do { \
|
|
||||||
((volatile uint8_t*)(F)->u.buf)[3] = EAGER_RDMA_BUFFER_REMOTE; \
|
|
||||||
}while (0)
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_MAKE_LOCAL(F) do { \
|
|
||||||
((volatile uint8_t*)(F)->u.buf)[3] = EAGER_RDMA_BUFFER_LOCAL; \
|
|
||||||
}while (0)
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_GET_LOCAL_RDMA_FRAG(E, I) \
|
|
||||||
(&(E)->eager_rdma_local.frags[(I)])
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_NEXT_INDEX(I) do { \
|
|
||||||
(I) = ((I) + 1) % \
|
|
||||||
mca_btl_mvapi_component.eager_rdma_num; \
|
|
||||||
} while (0)
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,257 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2007 Cisco, Inc. All Rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MCA_BTL_IB_ENDPOINT_H
|
|
||||||
#define MCA_BTL_IB_ENDPOINT_H
|
|
||||||
|
|
||||||
#include "opal/class/opal_list.h"
|
|
||||||
#include "opal/event/event.h"
|
|
||||||
#include "ompi/mca/pml/pml.h"
|
|
||||||
#include "ompi/mca/btl/btl.h"
|
|
||||||
#include "btl_mvapi_frag.h"
|
|
||||||
#include "btl_mvapi.h"
|
|
||||||
#include "btl_mvapi_eager_rdma.h"
|
|
||||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
|
||||||
|
|
||||||
#include <vapi.h>
|
|
||||||
#include <mtl_common.h>
|
|
||||||
#include <vapi_common.h>
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_endpoint_t);
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_frag_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_port_info_t {
|
|
||||||
uint32_t subnet;
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_port_info_t mca_btl_mvapi_port_info_t;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* State of IB endpoint connection.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
/* Defines the state in which this BTL instance
|
|
||||||
* has started the process of connection */
|
|
||||||
MCA_BTL_IB_CONNECTING,
|
|
||||||
|
|
||||||
/* Waiting for ack from endpoint */
|
|
||||||
MCA_BTL_IB_CONNECT_ACK,
|
|
||||||
|
|
||||||
/*Waiting for final connection ACK from endpoint */
|
|
||||||
MCA_BTL_IB_WAITING_ACK,
|
|
||||||
|
|
||||||
/* Connected ... both sender & receiver have
|
|
||||||
* buffers associated with this connection */
|
|
||||||
MCA_BTL_IB_CONNECTED,
|
|
||||||
|
|
||||||
/* Connection is closed, there are no resources
|
|
||||||
* associated with this */
|
|
||||||
MCA_BTL_IB_CLOSED,
|
|
||||||
|
|
||||||
/* Maximum number of retries have been used.
|
|
||||||
* Report failure on send to upper layer */
|
|
||||||
MCA_BTL_IB_FAILED
|
|
||||||
} mca_btl_mvapi_endpoint_state_t;
|
|
||||||
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_rem_info_t {
|
|
||||||
|
|
||||||
VAPI_qp_num_t rem_qp_num_hp;
|
|
||||||
/* High priority remote side QP number */
|
|
||||||
|
|
||||||
VAPI_qp_num_t rem_qp_num_lp;
|
|
||||||
/* Low prioirty remote size QP number */
|
|
||||||
|
|
||||||
IB_lid_t rem_lid;
|
|
||||||
/* Local identifier of the remote process */
|
|
||||||
|
|
||||||
uint32_t rem_subnet;
|
|
||||||
/* subnet of remote process */
|
|
||||||
|
|
||||||
} ;
|
|
||||||
typedef struct mca_btl_mvapi_rem_info_t mca_btl_mvapi_rem_info_t;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An abstraction that represents a connection to a endpoint process.
|
|
||||||
* An instance of mca_btl_base_endpoint_t is associated w/ each process
|
|
||||||
* and BTL pair at startup. However, connections to the endpoint
|
|
||||||
* are established dynamically on an as-needed basis:
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct mca_btl_base_endpoint_t {
|
|
||||||
opal_list_item_t super;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_module_t* endpoint_btl;
|
|
||||||
/**< BTL instance that created this connection */
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_proc_t* endpoint_proc;
|
|
||||||
/**< proc structure corresponding to endpoint */
|
|
||||||
|
|
||||||
mca_btl_mvapi_endpoint_state_t endpoint_state;
|
|
||||||
/**< current state of the connection */
|
|
||||||
|
|
||||||
size_t endpoint_retries;
|
|
||||||
/**< number of connection retries attempted */
|
|
||||||
|
|
||||||
double endpoint_tstamp;
|
|
||||||
/**< timestamp of when the first connection was attempted */
|
|
||||||
|
|
||||||
opal_mutex_t endpoint_lock;
|
|
||||||
/**< lock for concurrent access to endpoint state */
|
|
||||||
|
|
||||||
opal_list_t pending_send_frags;
|
|
||||||
/**< list of pending send frags for this endpoint */
|
|
||||||
|
|
||||||
opal_list_t pending_frags_hp; /**< list of pending high priority frags */
|
|
||||||
opal_list_t pending_frags_lp; /**< list of pending low priority frags */
|
|
||||||
|
|
||||||
mca_btl_mvapi_rem_info_t rem_info;
|
|
||||||
|
|
||||||
VAPI_qp_hndl_t lcl_qp_hndl_hp; /* High priority local QP handle */
|
|
||||||
VAPI_qp_hndl_t lcl_qp_hndl_lp; /* Low priority local QP handle */
|
|
||||||
|
|
||||||
VAPI_qp_prop_t lcl_qp_prop_hp; /* High priority local QP properties */
|
|
||||||
VAPI_qp_prop_t lcl_qp_prop_lp; /* Low priority local QP properties */
|
|
||||||
|
|
||||||
int32_t sd_tokens_hp; /**< number of high priority send tokens */
|
|
||||||
int32_t sd_tokens_lp; /**< number of low priority send tokens */
|
|
||||||
int32_t get_tokens; /**< number of available get tokens */
|
|
||||||
|
|
||||||
int32_t rd_posted_hp; /**< number of high priority descriptors posted to the nic*/
|
|
||||||
int32_t rd_posted_lp; /**< number of low priority descriptors posted to the nic*/
|
|
||||||
int32_t rd_credits_hp; /**< number of high priority credits to return to peer */
|
|
||||||
int32_t rd_credits_lp; /**< number of low priority credits to return to peer */
|
|
||||||
int32_t sd_credits_hp; /**< number of send wqe entries being used to return credits */
|
|
||||||
int32_t sd_credits_lp; /**< number of send wqe entries being used to return credits */
|
|
||||||
int32_t sd_wqe_hp; /**< number of available high priority send wqe entries */
|
|
||||||
int32_t sd_wqe_lp; /**< number of available low priority send wqe entries */
|
|
||||||
|
|
||||||
uint32_t subnet;
|
|
||||||
|
|
||||||
uint32_t eager_recv_count; /**< number of eager received */
|
|
||||||
mca_btl_mvapi_eager_rdma_remote_t eager_rdma_remote;
|
|
||||||
/**< info about remote RDMA buffer */
|
|
||||||
mca_btl_mvapi_eager_rdma_local_t eager_rdma_local;
|
|
||||||
/**< info about local RDMA buffer */
|
|
||||||
int32_t eager_rdma_index; /**< index into RDMA buffers pointer array */
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
|
||||||
typedef mca_btl_base_endpoint_t mca_btl_mvapi_endpoint_t;
|
|
||||||
int mca_btl_mvapi_endpoint_send(mca_btl_base_endpoint_t* endpoint, struct mca_btl_mvapi_frag_t* frag);
|
|
||||||
int mca_btl_mvapi_endpoint_connect(mca_btl_base_endpoint_t*);
|
|
||||||
void mca_btl_mvapi_endpoint_send_credits_hp(mca_btl_base_endpoint_t*);
|
|
||||||
void mca_btl_mvapi_endpoint_send_credits_lp(mca_btl_base_endpoint_t*);
|
|
||||||
void mca_btl_mvapi_post_recv(void);
|
|
||||||
void mca_btl_mvapi_endpoint_connect_eager_rdma(mca_btl_mvapi_endpoint_t*);
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, \
|
|
||||||
additional) \
|
|
||||||
{ \
|
|
||||||
do { \
|
|
||||||
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
|
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
|
||||||
if(endpoint->rd_posted_hp <= mca_btl_mvapi_component.rd_low+additional && \
|
|
||||||
endpoint->rd_posted_hp < mvapi_btl->rd_num){ \
|
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_num - \
|
|
||||||
endpoint->rd_posted_hp, \
|
|
||||||
endpoint, \
|
|
||||||
&mvapi_btl->recv_free_eager, \
|
|
||||||
endpoint->rd_posted_hp, \
|
|
||||||
endpoint->rd_credits_hp, \
|
|
||||||
mvapi_btl->nic, \
|
|
||||||
endpoint->lcl_qp_hndl_hp); \
|
|
||||||
} \
|
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
|
||||||
} while(0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, \
|
|
||||||
additional) \
|
|
||||||
{ \
|
|
||||||
do { \
|
|
||||||
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
|
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
|
||||||
if(endpoint->rd_posted_lp <= mca_btl_mvapi_component.rd_low+additional && \
|
|
||||||
endpoint->rd_posted_lp < mvapi_btl->rd_num){ \
|
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_num - \
|
|
||||||
endpoint->rd_posted_lp, \
|
|
||||||
endpoint, \
|
|
||||||
&mvapi_btl->recv_free_max, \
|
|
||||||
endpoint->rd_posted_lp, \
|
|
||||||
endpoint->rd_credits_lp, \
|
|
||||||
mvapi_btl->nic, \
|
|
||||||
endpoint->lcl_qp_hndl_lp); \
|
|
||||||
} \
|
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
|
||||||
} while(0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(cnt, \
|
|
||||||
my_endpoint, \
|
|
||||||
frag_list, \
|
|
||||||
rd_posted, \
|
|
||||||
rd_credits, \
|
|
||||||
nic, \
|
|
||||||
qp ) \
|
|
||||||
{ \
|
|
||||||
do { \
|
|
||||||
int32_t i; \
|
|
||||||
int rc; \
|
|
||||||
int32_t num_post = cnt; \
|
|
||||||
mca_btl_mvapi_module_t *mvapi_btl = my_endpoint->endpoint_btl; \
|
|
||||||
VAPI_rr_desc_t* desc_post = mvapi_btl->rr_desc_post; \
|
|
||||||
for(i = 0; i < num_post; i++) { \
|
|
||||||
ompi_free_list_item_t* item; \
|
|
||||||
mca_btl_mvapi_frag_t* frag = NULL; \
|
|
||||||
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) item; \
|
|
||||||
frag->endpoint = my_endpoint; \
|
|
||||||
frag->sg_entry.len = frag->size + \
|
|
||||||
((unsigned char*) frag->segment.seg_addr.pval- \
|
|
||||||
(unsigned char*) frag->hdr); \
|
|
||||||
desc_post[i] = frag->desc.rr_desc; \
|
|
||||||
}\
|
|
||||||
rc = EVAPI_post_rr_list( nic, \
|
|
||||||
qp, \
|
|
||||||
num_post, \
|
|
||||||
desc_post); \
|
|
||||||
if(VAPI_OK != rc) { \
|
|
||||||
BTL_ERROR(("error posting receive descriptors: %s",\
|
|
||||||
VAPI_strerror(rc))); \
|
|
||||||
} else { \
|
|
||||||
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
|
|
||||||
OPAL_THREAD_ADD32(&(rd_credits), num_post); \
|
|
||||||
}\
|
|
||||||
} while(0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
@ -1,160 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "btl_mvapi_frag.h"
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_reg_t* mem_hndl =
|
|
||||||
(mca_btl_mvapi_reg_t*)frag->base.super.registration;
|
|
||||||
frag->hdr = (mca_btl_mvapi_header_t*)frag->base.super.ptr;
|
|
||||||
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_mvapi_header_t);
|
|
||||||
/* init the segment address to start after the btl header */
|
|
||||||
|
|
||||||
frag->segment.seg_len = frag->size;
|
|
||||||
frag->sg_entry.lkey = mem_hndl->l_key;
|
|
||||||
frag->segment.seg_key.key32[0] = frag->sg_entry.lkey;
|
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
|
|
||||||
frag->base.des_flags = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_send_frag_common_constructor(mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
|
|
||||||
mca_btl_mvapi_frag_common_constructor(frag);
|
|
||||||
frag->base.des_src = &frag->segment;
|
|
||||||
frag->base.des_src_cnt = 1;
|
|
||||||
frag->base.des_dst = NULL;
|
|
||||||
frag->base.des_dst_cnt = 0;
|
|
||||||
|
|
||||||
frag->desc.sr_desc.comp_type = VAPI_SIGNALED;
|
|
||||||
frag->desc.sr_desc.opcode = VAPI_SEND;
|
|
||||||
frag->desc.sr_desc.remote_qkey = 0;
|
|
||||||
frag->desc.sr_desc.sg_lst_len = 1;
|
|
||||||
frag->desc.sr_desc.sg_lst_p = &frag->sg_entry;
|
|
||||||
frag->desc.sr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_recv_frag_common_constructor(mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
|
|
||||||
mca_btl_mvapi_frag_common_constructor(frag);
|
|
||||||
frag->base.des_dst = &frag->segment;
|
|
||||||
frag->base.des_dst_cnt = 1;
|
|
||||||
frag->base.des_src = NULL;
|
|
||||||
frag->base.des_src_cnt = 0;
|
|
||||||
|
|
||||||
frag->desc.rr_desc.comp_type = VAPI_SIGNALED;
|
|
||||||
frag->desc.rr_desc.opcode = VAPI_RECEIVE;
|
|
||||||
frag->desc.rr_desc.sg_lst_len = 1;
|
|
||||||
frag->desc.rr_desc.sg_lst_p = &frag->sg_entry;
|
|
||||||
frag->desc.rr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_send_frag_eager_constructor(mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
|
|
||||||
frag->size = mca_btl_mvapi_component.eager_limit;
|
|
||||||
frag->type = MCA_BTL_MVAPI_FRAG_EAGER;
|
|
||||||
mca_btl_mvapi_send_frag_common_constructor(frag);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_send_frag_max_constructor(mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
|
|
||||||
frag->size = mca_btl_mvapi_component.max_send_size;
|
|
||||||
frag->type = MCA_BTL_MVAPI_FRAG_MAX;
|
|
||||||
mca_btl_mvapi_send_frag_common_constructor(frag);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_recv_frag_max_constructor(mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
frag->size = mca_btl_mvapi_component.max_send_size;
|
|
||||||
frag->type = MCA_BTL_MVAPI_FRAG_MAX;
|
|
||||||
mca_btl_mvapi_recv_frag_common_constructor(frag);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_recv_frag_eager_constructor(mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
frag->size = mca_btl_mvapi_component.eager_limit;
|
|
||||||
frag->type = MCA_BTL_MVAPI_FRAG_EAGER;
|
|
||||||
mca_btl_mvapi_recv_frag_common_constructor(frag);
|
|
||||||
frag->ftr = (mca_btl_mvapi_footer_t*)((char*)frag->segment.seg_addr.pval
|
|
||||||
+ frag->size);
|
|
||||||
MCA_BTL_MVAPI_RDMA_MAKE_REMOTE(frag->ftr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_send_frag_frag_constructor(mca_btl_mvapi_frag_t* frag)
|
|
||||||
{
|
|
||||||
|
|
||||||
frag->size = 0;
|
|
||||||
frag->type = MCA_BTL_MVAPI_FRAG_FRAG;
|
|
||||||
frag->registration = NULL;
|
|
||||||
mca_btl_mvapi_send_frag_common_constructor(frag);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
|
||||||
mca_btl_mvapi_frag_t,
|
|
||||||
mca_btl_base_descriptor_t,
|
|
||||||
NULL,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
|
||||||
mca_btl_mvapi_send_frag_eager_t,
|
|
||||||
mca_btl_base_descriptor_t,
|
|
||||||
mca_btl_mvapi_send_frag_eager_constructor,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
|
||||||
mca_btl_mvapi_send_frag_max_t,
|
|
||||||
mca_btl_base_descriptor_t,
|
|
||||||
mca_btl_mvapi_send_frag_max_constructor,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
|
||||||
mca_btl_mvapi_send_frag_frag_t,
|
|
||||||
mca_btl_base_descriptor_t,
|
|
||||||
mca_btl_mvapi_send_frag_frag_constructor,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
|
||||||
mca_btl_mvapi_recv_frag_eager_t,
|
|
||||||
mca_btl_base_descriptor_t,
|
|
||||||
mca_btl_mvapi_recv_frag_eager_constructor,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
|
||||||
mca_btl_mvapi_recv_frag_max_t,
|
|
||||||
mca_btl_base_descriptor_t,
|
|
||||||
mca_btl_mvapi_recv_frag_max_constructor,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
|
|
@ -1,204 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2007 Cisco, Inc. All Rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MCA_BTL_IB_FRAG_H
|
|
||||||
#define MCA_BTL_IB_FRAG_H
|
|
||||||
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_FRAG_ALIGN (8)
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "btl_mvapi.h"
|
|
||||||
|
|
||||||
#include <vapi.h>
|
|
||||||
#include <mtl_common.h>
|
|
||||||
#include <vapi_common.h>
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_reg_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_header_t {
|
|
||||||
mca_btl_base_tag_t tag;
|
|
||||||
int16_t credits;
|
|
||||||
int16_t rdma_credits;
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_header_t mca_btl_mvapi_header_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_footer_t {
|
|
||||||
#if OMPI_ENABLE_DEBUG
|
|
||||||
uint32_t seq;
|
|
||||||
#endif
|
|
||||||
union {
|
|
||||||
uint32_t size;
|
|
||||||
uint8_t buf[4];
|
|
||||||
} u;
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_footer_t mca_btl_mvapi_footer_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
MCA_BTL_MVAPI_CONTROL_NOOP,
|
|
||||||
MCA_BTL_MVAPI_CONTROL_RDMA
|
|
||||||
} mca_btl_mvapi_control_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_control_header_t {
|
|
||||||
mca_btl_mvapi_control_t type;
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_control_header_t mca_btl_mvapi_control_header_t;
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_eager_rdma_header_t {
|
|
||||||
mca_btl_mvapi_control_header_t control;
|
|
||||||
ompi_ptr_t rdma_start;
|
|
||||||
uint64_t rkey;
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_eager_rdma_header_t mca_btl_mvapi_eager_rdma_header_t;
|
|
||||||
|
|
||||||
|
|
||||||
enum mca_btl_mvapi_frag_type_t {
|
|
||||||
MCA_BTL_MVAPI_FRAG_EAGER,
|
|
||||||
MCA_BTL_MVAPI_FRAG_MAX,
|
|
||||||
MCA_BTL_MVAPI_FRAG_FRAG,
|
|
||||||
MCA_BTL_MVAPI_FRAG_EAGER_RDMA
|
|
||||||
};
|
|
||||||
typedef enum mca_btl_mvapi_frag_type_t mca_btl_mvapi_frag_type_t;
|
|
||||||
|
|
||||||
union mca_btl_mvapi_frag_desc_t {
|
|
||||||
VAPI_rr_desc_t rr_desc;
|
|
||||||
VAPI_sr_desc_t sr_desc;
|
|
||||||
};
|
|
||||||
typedef union mca_btl_mvapi_frag_desc_t mca_btl_mvapi_frag_desc_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IB send fragment derived type.
|
|
||||||
*/
|
|
||||||
struct mca_btl_mvapi_frag_t {
|
|
||||||
mca_btl_base_descriptor_t base;
|
|
||||||
mca_btl_base_segment_t segment;
|
|
||||||
struct mca_btl_base_endpoint_t *endpoint;
|
|
||||||
size_t size;
|
|
||||||
int rc;
|
|
||||||
mca_btl_mvapi_frag_type_t type;
|
|
||||||
|
|
||||||
mca_btl_mvapi_frag_desc_t desc;
|
|
||||||
VAPI_sg_lst_entry_t sg_entry;
|
|
||||||
mca_btl_mvapi_header_t *hdr;
|
|
||||||
mca_btl_mvapi_footer_t *ftr;
|
|
||||||
struct mca_btl_mvapi_reg_t *registration;
|
|
||||||
ompi_free_list_t* my_list;
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_frag_t;
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_frag_t);
|
|
||||||
|
|
||||||
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_send_frag_eager_t;
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_send_frag_eager_t);
|
|
||||||
|
|
||||||
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_send_frag_max_t;
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_send_frag_max_t);
|
|
||||||
|
|
||||||
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_send_frag_frag_t;
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_send_frag_frag_t);
|
|
||||||
|
|
||||||
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_recv_frag_eager_t;
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_recv_frag_eager_t);
|
|
||||||
|
|
||||||
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_recv_frag_max_t;
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_recv_frag_max_t);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate an IB send descriptor
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc) \
|
|
||||||
{ \
|
|
||||||
\
|
|
||||||
ompi_free_list_item_t *item; \
|
|
||||||
OMPI_FREE_LIST_WAIT(&((mca_btl_mvapi_module_t*)btl)->send_free_eager, item, rc); \
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) item; \
|
|
||||||
frag->my_list = &((mca_btl_mvapi_module_t*)btl)->send_free_eager; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc) \
|
|
||||||
{ \
|
|
||||||
\
|
|
||||||
ompi_free_list_item_t *item; \
|
|
||||||
OMPI_FREE_LIST_WAIT(&((mca_btl_mvapi_module_t*)btl)->send_free_max, item, rc); \
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) item; \
|
|
||||||
frag->my_list = &((mca_btl_mvapi_module_t*)btl)->send_free_max; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc) \
|
|
||||||
{ \
|
|
||||||
\
|
|
||||||
ompi_free_list_item_t *item; \
|
|
||||||
OMPI_FREE_LIST_WAIT(&((mca_btl_mvapi_module_t*)btl)->send_free_frag, item, rc); \
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) item; \
|
|
||||||
frag->my_list = &((mca_btl_mvapi_module_t*)btl)->send_free_frag; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_FRAG_RETURN(btl, frag) \
|
|
||||||
{ \
|
|
||||||
OMPI_FREE_LIST_RETURN(frag->my_list, \
|
|
||||||
(ompi_free_list_item_t*)(frag)); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define MCA_BTL_IB_FRAG_PROGRESS(frag) \
|
|
||||||
do { \
|
|
||||||
switch(frag->desc.sr_desc.opcode) { \
|
|
||||||
case VAPI_SEND: \
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) { \
|
|
||||||
BTL_ERROR(("error in posting pending send\n")); \
|
|
||||||
} \
|
|
||||||
break; \
|
|
||||||
case VAPI_RDMA_WRITE: \
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t*) mvapi_btl, \
|
|
||||||
frag->endpoint, \
|
|
||||||
(mca_btl_base_descriptor_t*) frag)) { \
|
|
||||||
BTL_ERROR(("error in posting pending rdma write\n")); \
|
|
||||||
} \
|
|
||||||
break; \
|
|
||||||
case VAPI_RDMA_READ: \
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_get((mca_btl_base_module_t *) mvapi_btl, \
|
|
||||||
frag->endpoint, \
|
|
||||||
(mca_btl_base_descriptor_t*) frag)) { \
|
|
||||||
BTL_ERROR(("error in posting pending rdma read\n")); \
|
|
||||||
} \
|
|
||||||
break; \
|
|
||||||
default: \
|
|
||||||
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->desc.sr_desc.opcode)); \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_module_t;
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
@ -1,192 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
|
|
||||||
#include "opal/class/opal_hash_table.h"
|
|
||||||
#include "ompi/runtime/ompi_module_exchange.h"
|
|
||||||
|
|
||||||
#include "btl_mvapi.h"
|
|
||||||
#include "btl_mvapi_proc.h"
|
|
||||||
|
|
||||||
static void mca_btl_mvapi_proc_construct(mca_btl_mvapi_proc_t* proc);
|
|
||||||
static void mca_btl_mvapi_proc_destruct(mca_btl_mvapi_proc_t* proc);
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(mca_btl_mvapi_proc_t,
|
|
||||||
opal_list_item_t, mca_btl_mvapi_proc_construct,
|
|
||||||
mca_btl_mvapi_proc_destruct);
|
|
||||||
|
|
||||||
void mca_btl_mvapi_proc_construct(mca_btl_mvapi_proc_t* proc)
|
|
||||||
{
|
|
||||||
proc->proc_ompi = 0;
|
|
||||||
proc->proc_port_count = 0;
|
|
||||||
proc->proc_endpoints = 0;
|
|
||||||
proc->proc_endpoint_count = 0;
|
|
||||||
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
|
|
||||||
/* add to list of all proc instance */
|
|
||||||
OPAL_THREAD_LOCK(&mca_btl_mvapi_component.ib_lock);
|
|
||||||
opal_list_append(&mca_btl_mvapi_component.ib_procs, &proc->super);
|
|
||||||
OPAL_THREAD_UNLOCK(&mca_btl_mvapi_component.ib_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Cleanup ib proc instance
|
|
||||||
*/
|
|
||||||
|
|
||||||
void mca_btl_mvapi_proc_destruct(mca_btl_mvapi_proc_t* proc)
|
|
||||||
{
|
|
||||||
/* remove from list of all proc instances */
|
|
||||||
OPAL_THREAD_LOCK(&mca_btl_mvapi_component.ib_lock);
|
|
||||||
opal_list_remove_item(&mca_btl_mvapi_component.ib_procs, &proc->super);
|
|
||||||
OPAL_THREAD_UNLOCK(&mca_btl_mvapi_component.ib_lock);
|
|
||||||
|
|
||||||
/* release resources */
|
|
||||||
if(NULL != proc->proc_endpoints) {
|
|
||||||
free(proc->proc_endpoints);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Look for an existing IB process instances based on the associated
|
|
||||||
* ompi_proc_t instance.
|
|
||||||
*/
|
|
||||||
static mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_lookup_ompi(ompi_proc_t* ompi_proc)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_proc_t* ib_proc;
|
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&mca_btl_mvapi_component.ib_lock);
|
|
||||||
|
|
||||||
for(ib_proc = (mca_btl_mvapi_proc_t*)
|
|
||||||
opal_list_get_first(&mca_btl_mvapi_component.ib_procs);
|
|
||||||
ib_proc != (mca_btl_mvapi_proc_t*)
|
|
||||||
opal_list_get_end(&mca_btl_mvapi_component.ib_procs);
|
|
||||||
ib_proc = (mca_btl_mvapi_proc_t*)opal_list_get_next(ib_proc)) {
|
|
||||||
|
|
||||||
if(ib_proc->proc_ompi == ompi_proc) {
|
|
||||||
OPAL_THREAD_UNLOCK(&mca_btl_mvapi_component.ib_lock);
|
|
||||||
return ib_proc;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&mca_btl_mvapi_component.ib_lock);
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create a IB process structure. There is a one-to-one correspondence
|
|
||||||
* between a ompi_proc_t and a mca_btl_mvapi_proc_t instance. We cache
|
|
||||||
* additional data (specifically the list of mca_btl_mvapi_endpoint_t instances,
|
|
||||||
* and published addresses) associated w/ a given destination on this
|
|
||||||
* datastructure.
|
|
||||||
*/
|
|
||||||
|
|
||||||
mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_create(ompi_proc_t* ompi_proc)
|
|
||||||
{
|
|
||||||
mca_btl_mvapi_proc_t* mvapi_proc = NULL;
|
|
||||||
size_t size;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
|
|
||||||
/* Check if we have already created a IB proc
|
|
||||||
* structure for this ompi process */
|
|
||||||
mvapi_proc = mca_btl_mvapi_proc_lookup_ompi(ompi_proc);
|
|
||||||
|
|
||||||
if(mvapi_proc != NULL) {
|
|
||||||
|
|
||||||
/* Gotcha! */
|
|
||||||
return mvapi_proc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Oops! First time, gotta create a new IB proc
|
|
||||||
* out of the ompi_proc ... */
|
|
||||||
|
|
||||||
mvapi_proc = OBJ_NEW(mca_btl_mvapi_proc_t);
|
|
||||||
|
|
||||||
/* Initialize number of peer */
|
|
||||||
mvapi_proc->proc_endpoint_count = 0;
|
|
||||||
|
|
||||||
mvapi_proc->proc_ompi = ompi_proc;
|
|
||||||
|
|
||||||
/* build a unique identifier (of arbitrary
|
|
||||||
* size) to represent the proc */
|
|
||||||
mvapi_proc->proc_guid = ompi_proc->proc_name;
|
|
||||||
|
|
||||||
/* query for the peer address info */
|
|
||||||
rc = ompi_modex_recv(
|
|
||||||
&mca_btl_mvapi_component.super.btl_version,
|
|
||||||
ompi_proc,
|
|
||||||
(void*)&mvapi_proc->proc_ports,
|
|
||||||
&size
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if(OMPI_SUCCESS != rc) {
|
|
||||||
opal_output(0, "[%s:%d] ompi_modex_recv failed for peer %s",
|
|
||||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
|
||||||
OBJ_RELEASE(mvapi_proc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if((size % sizeof(mca_btl_mvapi_port_info_t)) != 0) {
|
|
||||||
opal_output(0, "[%s:%d] invalid mvapi address for peer %s",
|
|
||||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
|
||||||
OBJ_RELEASE(mvapi_proc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
mvapi_proc->proc_port_count = size/sizeof(mca_btl_mvapi_port_info_t);
|
|
||||||
|
|
||||||
|
|
||||||
if (0 == mvapi_proc->proc_port_count) {
|
|
||||||
mvapi_proc->proc_endpoints = NULL;
|
|
||||||
} else {
|
|
||||||
mvapi_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
|
||||||
malloc(mvapi_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*));
|
|
||||||
}
|
|
||||||
|
|
||||||
if(NULL == mvapi_proc->proc_endpoints) {
|
|
||||||
OBJ_RELEASE(mvapi_proc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return mvapi_proc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note that this routine must be called with the lock on the process
|
|
||||||
* already held. Insert a btl instance into the proc array and assign
|
|
||||||
* it an address.
|
|
||||||
*/
|
|
||||||
int mca_btl_mvapi_proc_insert(mca_btl_mvapi_proc_t* mvapi_proc,
|
|
||||||
mca_btl_base_endpoint_t* mvapi_endpoint)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* insert into endpoint array */
|
|
||||||
if(mvapi_proc->proc_port_count <= mvapi_proc->proc_endpoint_count)
|
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
||||||
|
|
||||||
mvapi_endpoint->endpoint_proc = mvapi_proc;
|
|
||||||
mvapi_proc->proc_endpoints[mvapi_proc->proc_endpoint_count++] = mvapi_endpoint;
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
@ -1,72 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MCA_BTL_IB_PROC_H
|
|
||||||
#define MCA_BTL_IB_PROC_H
|
|
||||||
|
|
||||||
#include "orte/mca/ns/ns.h"
|
|
||||||
#include "opal/class/opal_object.h"
|
|
||||||
#include "ompi/proc/proc.h"
|
|
||||||
#include "btl_mvapi.h"
|
|
||||||
#include "btl_mvapi_endpoint.h"
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents the state of a remote process and the set of addresses
|
|
||||||
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
|
|
||||||
* each
|
|
||||||
* BTL instance that attempts to open a connection to the process.
|
|
||||||
*/
|
|
||||||
struct mca_btl_mvapi_proc_t {
|
|
||||||
opal_list_item_t super;
|
|
||||||
/**< allow proc to be placed on a list */
|
|
||||||
|
|
||||||
ompi_proc_t *proc_ompi;
|
|
||||||
/**< pointer to corresponding ompi_proc_t */
|
|
||||||
|
|
||||||
orte_process_name_t proc_guid;
|
|
||||||
/**< globally unique identifier for the process */
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_port_info_t* proc_ports;
|
|
||||||
size_t proc_port_count;
|
|
||||||
/**< number of ports published by endpoint */
|
|
||||||
|
|
||||||
struct mca_btl_base_endpoint_t **proc_endpoints;
|
|
||||||
/**< array of endpoints that have been created to access this proc */
|
|
||||||
|
|
||||||
size_t proc_endpoint_count;
|
|
||||||
/**< number of endpoints */
|
|
||||||
|
|
||||||
opal_mutex_t proc_lock;
|
|
||||||
/**< lock to protect against concurrent access to proc state */
|
|
||||||
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_proc_t mca_btl_mvapi_proc_t;
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_proc_t);
|
|
||||||
|
|
||||||
mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_create(ompi_proc_t* ompi_proc);
|
|
||||||
int mca_btl_mvapi_proc_insert(mca_btl_mvapi_proc_t*, mca_btl_base_endpoint_t*);
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
@ -1,50 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MCA_BTL_MVAPI_RDMA_BUF_H
|
|
||||||
#define MCA_BTL_MVAPI_RDMA_BUF_H
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "btl_mvapi.h"
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
#if 0
|
|
||||||
struct mca_btl_mvapi_rdma_buf_t {
|
|
||||||
void* base;
|
|
||||||
size_t entry_size;
|
|
||||||
uint32_t entry_cnt;
|
|
||||||
void* current;
|
|
||||||
opal_mutex_t lock;
|
|
||||||
mca_mpool_base_registration_t* reg;
|
|
||||||
uint32_t tokens;
|
|
||||||
void* rem_addr;
|
|
||||||
size_t rem_size;
|
|
||||||
uint32_t rem_cnt;
|
|
||||||
void* rem_current;
|
|
||||||
VAPI_rkey_t r_key;
|
|
||||||
|
|
||||||
};
|
|
||||||
typedef struct mca_btl_mvapi_rdma_buf_t mca_btl_mvapi_rdma_buf_t;
|
|
||||||
#endif
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
@ -1,51 +0,0 @@
|
|||||||
# -*- shell-script -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
# University Research and Technology
|
|
||||||
# Corporation. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
# of Tennessee Research Foundation. All rights
|
|
||||||
# reserved.
|
|
||||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
# University of Stuttgart. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
# All rights reserved.
|
|
||||||
# $COPYRIGHT$
|
|
||||||
#
|
|
||||||
# Additional copyrights may follow
|
|
||||||
#
|
|
||||||
# $HEADER$
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
# MCA_btl_mvapi_CONFIG([action-if-can-compile],
|
|
||||||
# [action-if-cant-compile])
|
|
||||||
# ------------------------------------------------
|
|
||||||
AC_DEFUN([MCA_btl_mvapi_CONFIG],[
|
|
||||||
OMPI_CHECK_MVAPI([btl_mvapi],
|
|
||||||
[btl_mvapi_happy="yes"],
|
|
||||||
[btl_mvapi_happy="no"])
|
|
||||||
|
|
||||||
AS_IF([test "$btl_mvapi_happy" = "yes"],
|
|
||||||
[btl_mvapi_WRAPPER_EXTRA_LDFLAGS="$btl_mvapi_LDFLAGS"
|
|
||||||
btl_mvapi_WRAPPER_EXTRA_LIBS="$btl_mvapi_LIBS"
|
|
||||||
$1],
|
|
||||||
[$2])
|
|
||||||
|
|
||||||
# Many of the vapi.h files floating around don't obey ISO99 C
|
|
||||||
# standard, so cause oodles of warnings with -pedantic and
|
|
||||||
# -Wundef. Remove them from CFLAGS, which is then used to
|
|
||||||
# forcefully override CFLAGS in the makefile for MVAPI
|
|
||||||
# components
|
|
||||||
btl_mvapi_CFLAGS="`echo $CFLAGS | sed 's/-pedantic//g'`"
|
|
||||||
btl_mvapi_CFLAGS="`echo $btl_mvapi_CFLAGS | sed 's/-Wundef//g'`"
|
|
||||||
AS_IF([test "$btl_mvapi_CFLAGS" != "$CFLAGS" -a "$btl_mvapi_happy" = "yes"],
|
|
||||||
[AC_MSG_WARN([Removed -pedantic and -Wundef from CFLAGS for
|
|
||||||
mvapi component because some vapi.h files are not really ANSI C])])
|
|
||||||
|
|
||||||
# substitute in the things needed to build mvapi
|
|
||||||
AC_SUBST([btl_mvapi_CFLAGS])
|
|
||||||
AC_SUBST([btl_mvapi_CPPFLAGS])
|
|
||||||
AC_SUBST([btl_mvapi_LDFLAGS])
|
|
||||||
AC_SUBST([btl_mvapi_LIBS])
|
|
||||||
])dnl
|
|
@ -1,24 +0,0 @@
|
|||||||
# -*- shell-script -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
# University Research and Technology
|
|
||||||
# Corporation. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
# of Tennessee Research Foundation. All rights
|
|
||||||
# reserved.
|
|
||||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
# University of Stuttgart. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
# All rights reserved.
|
|
||||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
|
||||||
# reserved.
|
|
||||||
# $COPYRIGHT$
|
|
||||||
#
|
|
||||||
# Additional copyrights may follow
|
|
||||||
#
|
|
||||||
# $HEADER$
|
|
||||||
#
|
|
||||||
|
|
||||||
# Specific to this module
|
|
||||||
|
|
||||||
PARAM_CONFIG_FILES="Makefile"
|
|
@ -1,41 +0,0 @@
|
|||||||
# -*- text -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
# University Research and Technology
|
|
||||||
# Corporation. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
# of Tennessee Research Foundation. All rights
|
|
||||||
# reserved.
|
|
||||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
# University of Stuttgart. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
# All rights reserved.
|
|
||||||
# $COPYRIGHT$
|
|
||||||
#
|
|
||||||
# Additional copyrights may follow
|
|
||||||
#
|
|
||||||
# $HEADER$
|
|
||||||
#
|
|
||||||
# This is the US/English general help file for Open MPI.
|
|
||||||
#
|
|
||||||
[btl_mvapi:retry-exceeded]
|
|
||||||
The retry count is a down counter initialized on creation of the QP. Retry
|
|
||||||
count is defined in the InfiniBand Spec 1.2 (12.7.38):
|
|
||||||
The total number of times that the sender wishes the receiver to retry tim-
|
|
||||||
eout, packet sequence, etc. errors before posting a completion error.
|
|
||||||
|
|
||||||
Note that two mca parameters are involved here:
|
|
||||||
btl_mvapi_ib_retry_count - The number of times the sender will attempt to
|
|
||||||
retry (defaulted to 7, the maximum value).
|
|
||||||
|
|
||||||
btl_mvapi_ib_timeout - The local ack timeout parameter (defaulted to 10). The
|
|
||||||
actual timeout value used is calculated as:
|
|
||||||
(4.096 micro-seconds * 2^btl_mvapi_ib_timeout).
|
|
||||||
See InfiniBand Spec 1.2 (12.7.34) for more details.
|
|
||||||
|
|
||||||
What to do next:
|
|
||||||
One item to note is the hosts on which this error has occured, it has been
|
|
||||||
observed that rebooting or removing a particular host from the job can resolve
|
|
||||||
this issue. Should you be able to identify a specific cause or additional
|
|
||||||
trouble shooting information please report this to devel@open-mpi.org.
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче
Block a user