1
1

removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.
Этот коммит содержится в:
Galen Shipman 2005-06-30 21:28:35 +00:00
родитель 5f981fb76c
Коммит 0c7f67f67e
56 изменённых файлов: 3815 добавлений и 4148 удалений

Просмотреть файл

@ -29,7 +29,7 @@
#include "mca/common/vapi/vapi_mem_reg.h"
#include "mca/mpool/base/base.h"
#include "mca/mpool/mpool.h"
#include "mca/mpool/vapi/mpool_vapi.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
mca_btl_mvapi_module_t mca_btl_mvapi_module = {
{
@ -220,7 +220,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
{
mca_btl_mvapi_module_t* mvapi_btl;
mca_btl_mvapi_frag_t* frag;
mca_mpool_vapi_registration_t * vapi_reg;
mca_mpool_mvapi_registration_t * vapi_reg;
struct iovec iov;
int32_t iov_count = 1;
size_t max_data = *size;
@ -229,7 +229,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
vapi_reg = (mca_mpool_vapi_registration_t*) registration;
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
/** if the data fits in the eager limit and we aren't told to pinn then we
simply pack, if the data fits in the eager limit and the data is non contiguous
@ -347,8 +347,8 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
if(mca_btl_mvapi_component.leave_pinned) {
if(mca_btl_mvapi_component.reg_mru_len <= mvapi_btl->reg_mru_list.ompi_list_length ) {
mca_mpool_vapi_registration_t* old_reg =
(mca_mpool_vapi_registration_t*)
mca_mpool_mvapi_registration_t* old_reg =
(mca_mpool_mvapi_registration_t*)
ompi_list_remove_last(&mvapi_btl->reg_mru_list);
if( NULL == old_reg) {
@ -488,12 +488,12 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
{
mca_btl_mvapi_module_t* mvapi_btl;
mca_btl_mvapi_frag_t* frag;
mca_mpool_vapi_registration_t * vapi_reg;
mca_mpool_mvapi_registration_t * vapi_reg;
int rc;
size_t reg_len;
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
vapi_reg = (mca_mpool_vapi_registration_t*) registration;
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
@ -567,8 +567,8 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
if( mca_btl_mvapi_component.reg_mru_len <= mvapi_btl->reg_mru_list.ompi_list_length ) {
mca_mpool_vapi_registration_t* old_reg =
(mca_mpool_vapi_registration_t*)
mca_mpool_mvapi_registration_t* old_reg =
(mca_mpool_mvapi_registration_t*)
ompi_list_remove_last(&mvapi_btl->reg_mru_list);
if( NULL == old_reg) {

Просмотреть файл

@ -130,7 +130,7 @@ struct mca_btl_mvapi_module_t {
ompi_free_list_t recv_free_eager; /**< High priority free list of buffer descriptors */
ompi_free_list_t recv_free_max; /**< Low priority free list of buffer descriptors */
ompi_list_t reg_mru_list; /**< a most recently used list of mca_mpool_vapi_registration_t
ompi_list_t reg_mru_list; /**< a most recently used list of mca_mpool_mvapi_registration_t
entries, this allows us to keep a working set of memory pinned */
ompi_list_t repost; /**< list of buffers to repost */

Просмотреть файл

@ -36,7 +36,7 @@
#include <vapi.h>
#include <vapi_common.h>
#include "datatype/convertor.h"
#include "mca/mpool/vapi/mpool_vapi.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
mca_btl_mvapi_component_t mca_btl_mvapi_component = {
{

Просмотреть файл

@ -1,12 +1,12 @@
#include "btl_mvapi_frag.h"
#include "mca/common/vapi/vapi_mem_reg.h"
#include "mca/mpool/vapi/mpool_vapi.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
{
mca_mpool_vapi_registration_t* mem_hndl = (mca_mpool_vapi_registration_t*) frag->base.super.user_data;
mca_mpool_mvapi_registration_t* mem_hndl = (mca_mpool_mvapi_registration_t*) frag->base.super.user_data;
frag->hdr = (mca_btl_mvapi_header_t*) (frag+1); /* initialize the btl header to point to start at end of frag */
#if 0
mod = (unsigned long) frag->hdr % MCA_BTL_IB_FRAG_ALIGN;

Просмотреть файл

@ -25,7 +25,7 @@
#include <vapi.h>
#include <mtl_common.h>
#include <vapi_common.h>
#include "mca/mpool/vapi/mpool_vapi.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -62,7 +62,7 @@ struct mca_btl_mvapi_frag_t {
VAPI_mr_hndl_t mem_hndl;
VAPI_ret_t ret;
mca_btl_mvapi_header_t *hdr;
mca_mpool_vapi_registration_t * vapi_reg;
mca_mpool_mvapi_registration_t * vapi_reg;
};
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_mvapi_frag_t);

Просмотреть файл

@ -18,5 +18,5 @@
# Specific to this module
PARAM_INIT_FILE=btl_mvapi.c
PARAM_CONFIG_HEADER_FILE="ib_config.h"
PARAM_CONFIG_HEADER_FILE="mvapi_config.h"
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -3,8 +3,6 @@
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004 The Ohio State University.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
@ -20,20 +18,29 @@
include $(top_ompi_srcdir)/config/Makefile.options
sources =
include src/Makefile.extra
sources = \
btl_openib.c \
btl_openib.h \
btl_openib_component.c \
btl_openib_endpoint.c \
btl_openib_endpoint.h \
btl_openib_frag.c \
btl_openib_frag.h \
btl_openib_proc.c \
btl_openib_proc.h \
btl_openib_error.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ptl_ib_DSO
if OMPI_BUILD_btl_openib_DSO
lib =
lib_sources =
component = mca_ptl_ib.la
lib_sources =
component = mca_btl_openib.la
component_sources = $(sources)
else
lib = libmca_ptl_ib.la
lib = libmca_btl_openib.la
lib_sources = $(sources)
component =
component_sources =
@ -41,9 +48,9 @@ endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component)
mca_ptl_ib_la_SOURCES = $(component_sources)
mca_ptl_ib_la_LDFLAGS = -module -avoid-version
mca_btl_openib_la_SOURCES = $(component_sources)
mca_btl_openib_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(lib)
libmca_ptl_ib_la_SOURCES = $(lib_sources)
libmca_ptl_ib_la_LDFLAGS = -module -avoid-version
libmca_btl_openib_la_SOURCES = $(lib_sources)
libmca_btl_openib_la_LDFLAGS = -module -avoid-version

832
src/mca/btl/openib/btl_openib.c Обычный файл
Просмотреть файл

@ -0,0 +1,832 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "util/output.h"
#include "util/if.h"
#include "mca/pml/pml.h"
#include "mca/btl/btl.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
#include "btl_openib_proc.h"
#include "btl_openib_endpoint.h"
#include "datatype/convertor.h"
#include "mca/common/vapi/vapi_mem_reg.h"
#include "mca/mpool/base/base.h"
#include "mca/mpool/mpool.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
mca_btl_openib_module_t mca_btl_openib_module = {
{
&mca_btl_openib_component.super,
0, /* max size of first fragment */
0, /* min send fragment size */
0, /* max send fragment size */
0, /* min rdma fragment size */
0, /* max rdma fragment size */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
0, /* TODO this should be PUT btl flags */
mca_btl_openib_add_procs,
mca_btl_openib_del_procs,
mca_btl_openib_register,
mca_btl_openib_finalize,
/* we need alloc free, pack */
mca_btl_openib_alloc,
mca_btl_openib_free,
mca_btl_openib_prepare_src,
mca_btl_openib_prepare_dst,
mca_btl_openib_send,
mca_btl_openib_put,
NULL /* get */
}
};
int mca_btl_openib_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **ompi_procs,
struct mca_btl_base_endpoint_t** peers,
ompi_bitmap_t* reachable)
{
mca_btl_openib_module_t* mvapi_btl = (mca_btl_openib_module_t*)btl;
int i, rc;
for(i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_openib_proc_t* ib_proc;
mca_btl_base_endpoint_t* ib_peer;
if(NULL == (ib_proc = mca_btl_openib_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
* Check to make sure that the peer has at least as many interface
* addresses exported as we are trying to use. If not, then
* don't bind this PTL instance to the proc.
*/
OMPI_THREAD_LOCK(&ib_proc->proc_lock);
/* The btl_proc datastructure is shared by all IB PTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
ib_peer = OBJ_NEW(mca_btl_openib_endpoint_t);
if(NULL == ib_peer) {
OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
ib_peer->endpoint_btl = mvapi_btl;
rc = mca_btl_openib_proc_insert(ib_proc, ib_peer);
if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(ib_peer);
OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
continue;
}
ompi_bitmap_set_bit(reachable, i);
OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
peers[i] = ib_peer;
}
return OMPI_SUCCESS;
}
int mca_btl_openib_del_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t ** peers)
{
/* Stub */
DEBUG_OUT("Stub\n");
return OMPI_SUCCESS;
}
int mca_btl_openib_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
/* TODO add register stuff here... */
mca_btl_openib_module_t* mvapi_btl = (mca_btl_openib_module_t*) btl;
OMPI_THREAD_LOCK(&ib->btl.ib_lock);
mvapi_btl->ib_reg[tag].cbfunc = cbfunc;
mvapi_btl->ib_reg[tag].cbdata = cbdata;
OMPI_THREAD_UNLOCK(&ib->btl.ib_lock);
return OMPI_SUCCESS;
}
/**
* Allocate a segment.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
mca_btl_base_descriptor_t* mca_btl_openib_alloc(
struct mca_btl_base_module_t* btl,
size_t size)
{
mca_btl_openib_frag_t* frag;
mca_btl_openib_module_t* mvapi_btl;
int rc;
mvapi_btl = (mca_btl_openib_module_t*) btl;
if(size <= mca_btl_openib_component.eager_limit){
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
frag->segment.seg_len =
size <= mca_btl_openib_component.eager_limit ?
size: mca_btl_openib_component.eager_limit ;
} else {
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
frag->segment.seg_len =
size <= mca_btl_openib_component.max_send_size ?
size: mca_btl_openib_component.max_send_size ;
}
frag->segment.seg_len = size <= mvapi_btl->super.btl_eager_limit ? size : mvapi_btl->super.btl_eager_limit;
frag->base.des_flags = 0;
return (mca_btl_base_descriptor_t*)frag;
}
/**
*
*
*/
int mca_btl_openib_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)des;
if(frag->size == 0) {
MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag);
OBJ_RELEASE(frag->vapi_reg);
}
else if(frag->size == mca_btl_openib_component.max_send_size){
MCA_BTL_IB_FRAG_RETURN_MAX(btl, frag);
} else if(frag->size == mca_btl_openib_component.eager_limit){
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
}
return OMPI_SUCCESS;
}
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size
)
{
mca_btl_openib_module_t* mvapi_btl;
mca_btl_openib_frag_t* frag;
mca_mpool_mvapi_registration_t * vapi_reg;
struct iovec iov;
int32_t iov_count = 1;
size_t max_data = *size;
int32_t free_after;
int rc;
mvapi_btl = (mca_btl_openib_module_t*) btl;
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
/** if the data fits in the eager limit and we aren't told to pinn then we
simply pack, if the data fits in the eager limit and the data is non contiguous
then we pack **/
if(NULL != vapi_reg && 0 == ompi_convertor_need_buffers(convertor)){
bool is_leave_pinned = vapi_reg->is_leave_pinned;
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
/* first we will try to find this address in the memory tree (from MPI_Alloc_mem) */
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
size_t reg_len;
reg_len = (unsigned char*)vapi_reg->base_reg.bound - (unsigned char*)iov.iov_base + 1;
if(frag->segment.seg_len > reg_len) {
size_t new_len = vapi_reg->base_reg.bound - vapi_reg->base_reg.base + 1
+ frag->segment.seg_len - reg_len;
void * base_addr = vapi_reg->base_reg.base;
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
if(OMPI_SUCCESS != rc) {
ompi_output(0, "%s:%d:%s error removing memory region from memory pool tree", __FILE__, __LINE__, __func__);
return NULL;
}
if(is_leave_pinned) {
if(NULL == ompi_list_remove_item(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg)){
ompi_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
return NULL;
}
}
OBJ_RELEASE(vapi_reg);
mvapi_btl->ib_pool->mpool_register(mvapi_btl->ib_pool,
base_addr,
new_len,
(mca_mpool_base_registration_t**) &vapi_reg);
rc = mca_mpool_base_insert(vapi_reg->base_reg.base,
vapi_reg->base_reg.bound - vapi_reg->base_reg.base + 1,
mvapi_btl->ib_pool,
(void*) (&mvapi_btl->super),
(mca_mpool_base_registration_t*) vapi_reg);
if(rc != OMPI_SUCCESS) {
ompi_output(0,"%s:%d:%s error inserting memory region into memory pool tree", __FILE__, __LINE__, __func__);
return NULL;
}
OBJ_RETAIN(vapi_reg);
if(is_leave_pinned) {
vapi_reg->is_leave_pinned = is_leave_pinned;
ompi_list_append(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg);
}
}
else if(is_leave_pinned) {
if(NULL == ompi_list_remove_item(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg)) {
ompi_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
return NULL;
}
ompi_list_append(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg);
}
frag->mem_hndl = vapi_reg->hndl;
frag->sg_entry.len = max_data;
frag->sg_entry.lkey = vapi_reg->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->l_key;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->vapi_reg = vapi_reg;
OBJ_RETAIN(vapi_reg);
return &frag->base;
} else if((mca_btl_openib_component.leave_pinned || max_data > btl->btl_max_send_size) &&
ompi_convertor_need_buffers(convertor) == 0 &&
reserve == 0)
{
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->base.des_flags = 0;
if(mca_btl_openib_component.leave_pinned) {
if(mca_btl_openib_component.reg_mru_len <= mvapi_btl->reg_mru_list.ompi_list_length ) {
mca_mpool_mvapi_registration_t* old_reg =
(mca_mpool_mvapi_registration_t*)
ompi_list_remove_last(&mvapi_btl->reg_mru_list);
if( NULL == old_reg) {
ompi_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
return NULL;
}
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
if(OMPI_SUCCESS != rc) {
ompi_output(0,"%s:%d:%s error removing memory region from memory pool tree", __FILE__, __LINE__, __func__);
return NULL;
}
OBJ_RELEASE(old_reg);
}
mvapi_btl->ib_pool->mpool_register(mvapi_btl->ib_pool,
iov.iov_base,
max_data,
(mca_mpool_base_registration_t**) &vapi_reg);
rc = mca_mpool_base_insert(vapi_reg->base_reg.base,
vapi_reg->base_reg.bound - vapi_reg->base_reg.base + 1,
mvapi_btl->ib_pool,
(void*) (&mvapi_btl->super),
(mca_mpool_base_registration_t*) vapi_reg);
if(rc != OMPI_SUCCESS)
return NULL;
OBJ_RETAIN(vapi_reg);
vapi_reg->is_leave_pinned = true;
ompi_list_append(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg);
} else {
mvapi_btl->ib_pool->mpool_register(mvapi_btl->ib_pool,
iov.iov_base,
max_data,
(mca_mpool_base_registration_t**) &vapi_reg);
vapi_reg->is_leave_pinned = false;
}
frag->mem_hndl = vapi_reg->hndl;
frag->sg_entry.len = max_data;
frag->sg_entry.lkey = vapi_reg->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->l_key;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->vapi_reg = vapi_reg;
OBJ_RETAIN(vapi_reg);
return &frag->base;
} else if (max_data+reserve <= btl->btl_eager_limit) {
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
*size = max_data;
if( rc < 0 ) {
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
/** if the data fits in the max limit and we aren't told to pinn then we
simply pack, if the data is non contiguous then we pack **/
else if(max_data + reserve <= mvapi_btl->super.btl_max_send_size) {
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
if(max_data + reserve > frag->size){
max_data = frag->size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
*size = max_data;
if( rc < 0 ) {
MCA_BTL_IB_FRAG_RETURN_MAX(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags=0;
return &frag->base;
}
return NULL;
}
/**
* Pack data
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size)
{
mca_btl_openib_module_t* mvapi_btl;
mca_btl_openib_frag_t* frag;
mca_mpool_mvapi_registration_t * vapi_reg;
int rc;
size_t reg_len;
mvapi_btl = (mca_btl_openib_module_t*) btl;
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
}
frag->segment.seg_len = *size;
frag->segment.seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;
frag->base.des_flags = 0;
if(NULL!= vapi_reg){
reg_len = (unsigned char*)vapi_reg->base_reg.bound - (unsigned char*)frag->segment.seg_addr.pval + 1;
bool is_leave_pinned = vapi_reg->is_leave_pinned;
if(frag->segment.seg_len > reg_len ) {
size_t new_len = vapi_reg->base_reg.bound - vapi_reg->base_reg.base + 1
+ frag->segment.seg_len - reg_len;
void * base_addr = vapi_reg->base_reg.base;
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
if(OMPI_SUCCESS != rc) {
ompi_output(0,"%s:%d:%s error removing memory region from memory pool tree", __FILE__, __LINE__, __func__);
return NULL;
}
if(is_leave_pinned) {
if(NULL == ompi_list_remove_item(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg)) {
ompi_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
return NULL;
}
}
OBJ_RELEASE(vapi_reg);
mvapi_btl->ib_pool->mpool_register(mvapi_btl->ib_pool,
base_addr,
new_len,
(mca_mpool_base_registration_t**) &vapi_reg);
rc = mca_mpool_base_insert(vapi_reg->base_reg.base,
vapi_reg->base_reg.bound - vapi_reg->base_reg.base + 1,
mvapi_btl->ib_pool,
(void*) (&mvapi_btl->super),
(mca_mpool_base_registration_t*) vapi_reg);
if(OMPI_SUCCESS != rc) {
ompi_output(0,"%s:%d:%s error inserting memory region into memory pool tree", __FILE__, __LINE__, __func__);
return NULL;
}
OBJ_RETAIN(vapi_reg);
if(is_leave_pinned) {
vapi_reg->is_leave_pinned = is_leave_pinned;
ompi_list_append(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg);
}
}
else if(is_leave_pinned){
if(NULL == ompi_list_remove_item(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg)) {
ompi_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
return NULL;
}
ompi_list_append(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg);
}
} else {
if(mca_btl_openib_component.leave_pinned) {
if( mca_btl_openib_component.reg_mru_len <= mvapi_btl->reg_mru_list.ompi_list_length ) {
mca_mpool_mvapi_registration_t* old_reg =
(mca_mpool_mvapi_registration_t*)
ompi_list_remove_last(&mvapi_btl->reg_mru_list);
if( NULL == old_reg) {
ompi_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
return NULL;
}
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
if(OMPI_SUCCESS !=rc ) {
ompi_output(0,"%s:%d:%s error removing memory region from memory pool tree", __FILE__, __LINE__, __func__);
return NULL;
}
OBJ_RELEASE(old_reg);
}
mvapi_btl->ib_pool->mpool_register(mvapi_btl->ib_pool,
frag->segment.seg_addr.pval,
*size,
(mca_mpool_base_registration_t**) &vapi_reg);
vapi_reg->is_leave_pinned = true;
rc = mca_mpool_base_insert(vapi_reg->base_reg.base,
vapi_reg->base_reg.bound - vapi_reg->base_reg.base + 1,
mvapi_btl->ib_pool,
(void*) (&mvapi_btl->super),
(mca_mpool_base_registration_t*) vapi_reg);
if(OMPI_SUCCESS != rc){
ompi_output(0,"%s:%d:%s error inserting memory region into memory pool", __FILE__, __LINE__, __func__);
return NULL;
}
OBJ_RETAIN(vapi_reg);
ompi_list_append(&mvapi_btl->reg_mru_list, (ompi_list_item_t*) vapi_reg);
} else {
mvapi_btl->ib_pool->mpool_register(mvapi_btl->ib_pool,
frag->segment.seg_addr.pval,
*size,
(mca_mpool_base_registration_t**) &vapi_reg);
vapi_reg->is_leave_pinned=false;
}
}
frag->mem_hndl = vapi_reg->hndl;
frag->sg_entry.len = *size;
frag->sg_entry.lkey = vapi_reg->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->segment.seg_addr.pval;
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->l_key;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->vapi_reg = vapi_reg;
OBJ_RETAIN(vapi_reg);
return &frag->base;
}
int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
{
mca_btl_openib_module_t* mvapi_btl;
mvapi_btl = (mca_btl_openib_module_t*) btl;
if(mvapi_btl->send_free_eager.fl_num_allocated !=
mvapi_btl->send_free_eager.super.ompi_list_length){
ompi_output(0, "btl ib send_free_eager frags: %d allocated %d returned \n",
mvapi_btl->send_free_eager.fl_num_allocated,
mvapi_btl->send_free_eager.super.ompi_list_length);
}
if(mvapi_btl->send_free_max.fl_num_allocated !=
mvapi_btl->send_free_max.super.ompi_list_length){
ompi_output(0, "btl ib send_free_max frags: %d allocated %d returned \n",
mvapi_btl->send_free_max.fl_num_allocated,
mvapi_btl->send_free_max.super.ompi_list_length);
}
if(mvapi_btl->send_free_frag.fl_num_allocated !=
mvapi_btl->send_free_frag.super.ompi_list_length){
ompi_output(0, "btl ib send_free_frag frags: %d allocated %d returned \n",
mvapi_btl->send_free_frag.fl_num_allocated,
mvapi_btl->send_free_frag.super.ompi_list_length);
}
if(mvapi_btl->recv_free_eager.fl_num_allocated !=
mvapi_btl->recv_free_eager.super.ompi_list_length){
ompi_output(0, "btl ib recv_free_eager frags: %d allocated %d returned \n",
mvapi_btl->recv_free_eager.fl_num_allocated,
mvapi_btl->recv_free_eager.super.ompi_list_length);
}
if(mvapi_btl->recv_free_max.fl_num_allocated !=
mvapi_btl->recv_free_max.super.ompi_list_length){
ompi_output(0, "btl ib recv_free_max frags: %d allocated %d returned \n",
mvapi_btl->recv_free_max.fl_num_allocated,
mvapi_btl->recv_free_max.super.ompi_list_length);
}
return OMPI_SUCCESS;
}
/*
* Initiate a send. If this is the first fragment, use the fragment
* descriptor allocated with the send requests, otherwise obtain
* one from the free list. Initialize the fragment and foward
* on to the peer.
*/
int mca_btl_openib_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)descriptor;
frag->endpoint = endpoint;
frag->hdr->tag = tag;
frag->type = MCA_BTL_IB_FRAG_SEND;
frag->rc = mca_btl_openib_endpoint_send(endpoint, frag);
return frag->rc;
}
/*
* RDMA local buffer to remote buffer address.
*/
int mca_btl_openib_put( mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_openib_module_t* mvapi_btl = (mca_btl_openib_module_t*) btl;
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) descriptor;
frag->endpoint = endpoint;
frag->sr_desc.opcode = VAPI_RDMA_WRITE;
frag->sr_desc.remote_qp = endpoint->rem_qp_num_low;
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
frag->sr_desc.r_key = frag->base.des_dst->seg_key.key32[0];
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
frag->sg_entry.len = frag->base.des_src->seg_len;
frag->ret = VAPI_post_sr(mvapi_btl->nic,
endpoint->lcl_qp_hndl_low,
&frag->sr_desc);
if(VAPI_OK != frag->ret){
return OMPI_ERROR;
}
mca_btl_openib_endpoint_post_rr(endpoint, 1);
return OMPI_SUCCESS;
}
/*
* Asynchronous event handler to detect unforseen
* events. Usually, such events are catastrophic.
* Should have a robust mechanism to handle these
* events and abort the OMPI application if necessary.
*
*/
static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
VAPI_event_record_t * event_p,
void *priv_data)
{
switch (event_p->type) {
case VAPI_QP_PATH_MIGRATED:
case VAPI_EEC_PATH_MIGRATED:
case VAPI_QP_COMM_ESTABLISHED:
case VAPI_EEC_COMM_ESTABLISHED:
case VAPI_SEND_QUEUE_DRAINED:
case VAPI_PORT_ACTIVE:
{
DEBUG_OUT("Got an asynchronous event: %s\n",
VAPI_event_record_sym(event_p->type));
break;
}
case VAPI_CQ_ERROR:
case VAPI_LOCAL_WQ_INV_REQUEST_ERROR:
case VAPI_LOCAL_WQ_ACCESS_VIOL_ERROR:
case VAPI_LOCAL_WQ_CATASTROPHIC_ERROR:
case VAPI_PATH_MIG_REQ_ERROR:
case VAPI_LOCAL_EEC_CATASTROPHIC_ERROR:
case VAPI_LOCAL_CATASTROPHIC_ERROR:
case VAPI_PORT_ERROR:
{
ompi_output(0, "Got an asynchronous event: %s (%s)",
VAPI_event_record_sym(event_p->type),
VAPI_event_syndrome_sym(event_p->
syndrome));
break;
}
default:
ompi_output(0, "Warning!! Got an undefined "
"asynchronous event\n");
}
}
int mca_btl_openib_module_init(mca_btl_openib_module_t *mvapi_btl)
{
/* Allocate Protection Domain */
VAPI_ret_t ret;
uint32_t cqe_cnt = 0;
ret = VAPI_alloc_pd(mvapi_btl->nic, &mvapi_btl->ptag);
if(ret != VAPI_OK) {
MCA_BTL_IB_VAPI_ERROR(ret, "VAPI_alloc_pd");
return OMPI_ERROR;
}
ret = VAPI_create_cq(mvapi_btl->nic, mvapi_btl->ib_cq_size,
&mvapi_btl->cq_hndl_low, &cqe_cnt);
if( VAPI_OK != ret) {
MCA_BTL_IB_VAPI_ERROR(ret, "VAPI_create_cq");
return OMPI_ERROR;
}
ret = VAPI_create_cq(mvapi_btl->nic, mvapi_btl->ib_cq_size,
&mvapi_btl->cq_hndl_high, &cqe_cnt);
if( VAPI_OK != ret) {
MCA_BTL_IB_VAPI_ERROR(ret, "VAPI_create_cq");
return OMPI_ERROR;
}
if(cqe_cnt <= 0) {
ompi_output(0, "%s: error creating completion queue ", __func__);
return OMPI_ERROR;
}
ret = EVAPI_set_async_event_handler(mvapi_btl->nic,
async_event_handler, 0, &mvapi_btl->async_handler);
if(VAPI_OK != ret) {
MCA_BTL_IB_VAPI_ERROR(ret, "EVAPI_set_async_event_handler");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

388
src/mca/btl/openib/btl_openib.h Обычный файл
Просмотреть файл

@ -0,0 +1,388 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_IB_H
#define MCA_PTL_IB_H
/* Standard system includes */
#include <sys/types.h>
#include <string.h>
/* Open MPI includes */
#include "class/ompi_free_list.h"
#include "class/ompi_bitmap.h"
#include "event/event.h"
#include "mca/pml/pml.h"
#include "mca/btl/btl.h"
#include "util/output.h"
#include "mca/mpool/mpool.h"
#include "btl_openib_error.h"
#include "mca/btl/btl.h"
#include "mca/btl/base/base.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#define MCA_BTL_IB_LEAVE_PINNED 1
/**
* Infiniband (IB) BTL component.
*/
struct mca_btl_openib_component_t {
mca_btl_base_component_1_0_0_t super; /**< base BTL component */
uint32_t ib_num_btls;
/**< number of hcas available to the IB component */
struct mca_btl_openib_module_t *mvapi_btls;
/**< array of available PTLs */
int ib_free_list_num;
/**< initial size of free lists */
int ib_free_list_max;
/**< maximum size of free lists */
int ib_free_list_inc;
/**< number of elements to alloc when growing free lists */
ompi_list_t ib_procs;
/**< list of ib proc structures */
ompi_event_t ib_send_event;
/**< event structure for sends */
ompi_event_t ib_recv_event;
/**< event structure for recvs */
ompi_mutex_t ib_lock;
/**< lock for accessing module state */
int ib_mem_registry_hints_log_size;
/**< log2 size of hints hash array used by memory registry */
char* ib_mpool_name;
/**< name of ib memory pool */
uint32_t ib_rr_buf_max;
/**< the maximum number of posted rr */
uint32_t ib_rr_buf_min;
/**< the minimum number of posted rr */
size_t eager_limit;
size_t max_send_size;
uint32_t leave_pinned;
uint32_t reg_mru_len;
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
extern mca_btl_openib_component_t mca_btl_openib_component;
typedef mca_btl_base_recv_reg_t mca_btl_openib_recv_reg_t;
/**
* IB PTL Interface
*/
struct mca_btl_openib_module_t {
mca_btl_base_module_t super; /**< base PTL interface */
bool btl_inited;
mca_btl_openib_recv_reg_t ib_reg[256];
VAPI_hca_id_t hca_id; /**< ID of HCA */
int port; /**< ID of the PORT */
struct ibv_device; /* the ib device */
VAPI_hca_hndl_t nic; /**< NIC handle */
VAPI_pd_hndl_t ptag; /**< Protection Domain tag */
VAPI_cq_hndl_t cq_hndl_high; /**< High Priority Completion Queue handle */
VAPI_cq_hndl_t cq_hndl_low; /**< Low Priority Completion Queue handle */
EVAPI_async_handler_hndl_t async_handler;
/**< Async event handler used to detect weird/unknown events */
ompi_free_list_t send_free_eager; /**< free list of eager buffer descriptors */
ompi_free_list_t send_free_max; /**< free list of max buffer descriptors */
ompi_free_list_t send_free_frag; /**< free list of frags only... used for pining memory */
ompi_free_list_t recv_free_eager; /**< High priority free list of buffer descriptors */
ompi_free_list_t recv_free_max; /**< Low priority free list of buffer descriptors */
ompi_list_t reg_mru_list; /**< a most recently used list of mca_mpool_mvapi_registration_t
entries, this allows us to keep a working set of memory pinned */
ompi_list_t repost; /**< list of buffers to repost */
ompi_mutex_t ib_lock; /**< module level lock */
mca_mpool_base_module_t* ib_pool; /**< ib memory pool */
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
VAPI_rr_desc_t* rr_desc_post;
/**< an array to allow posting of rr in one swoop */
size_t ib_inline_max; /**< max size of inline send*/
size_t ib_pin_min; /**< min size to pin memory*/
uint32_t ib_cq_size; /**< Max outstanding CQE on the CQ */
uint32_t ib_wq_size; /**< Max outstanding WR on the WQ */
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ*/
uint32_t ib_pkey_ix;
uint32_t ib_psn;
uint32_t ib_qp_ous_rd_atom;
uint32_t ib_mtu;
uint32_t ib_min_rnr_timer;
uint32_t ib_timeout;
uint32_t ib_retry_count;
uint32_t ib_rnr_retry;
uint32_t ib_max_rdma_dst_ops;
uint32_t ib_service_level;
uint32_t ib_static_rate;
uint32_t ib_src_path_bits;
}; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
struct mca_btl_openib_frag_t;
extern mca_btl_openib_module_t mca_btl_openib_module;
/**
* Register IB component parameters with the MCA framework
*/
extern int mca_btl_openib_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_openib_component_close(void);
/**
* IB component initialization.
*
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
*
* (1) read interface list from kernel and compare against component parameters
* then create a BTL instance for selected interfaces
* (2) setup IB listen socket for incoming connection attempts
* (3) publish BTL addressing info
*
*/
extern mca_btl_base_module_t** mca_btl_openib_component_init(
int *num_btl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
* IB component progress.
*/
extern int mca_btl_openib_component_progress(
void
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param btl (IN) BTL module
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BTL of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
int mca_btl_openib_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata
);
/**
* Cleanup any resources held by the BTL.
*
* @param btl BTL instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_btl_openib_finalize(
struct mca_btl_base_module_t* btl
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN)
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param peers (OUT) Set of (optional) peer addressing info.
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_btl_openib_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers,
ompi_bitmap_t* reachable
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN) BTL instance
* @param nproc (IN) Number of processes.
* @param procs (IN) Set of processes.
* @param peers (IN) Set of peer data structures.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_btl_openib_del_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers
);
/**
* PML->BTL Initiate a send of the specified size.
*
* @param btl (IN) BTL instance
* @param btl_base_peer (IN) BTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting BTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
*/
extern int mca_btl_openib_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag
);
/**
* PML->BTL Initiate a put of the specified size.
*
* @param btl (IN) BTL instance
* @param btl_base_peer (IN) BTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting BTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
*/
extern int mca_btl_openib_put(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/**
* Allocate a descriptor.
*
* @param btl (IN) BTL module
* @param size (IN) Requested descriptor size.
*/
extern mca_btl_base_descriptor_t* mca_btl_openib_alloc(
struct mca_btl_base_module_t* btl,
size_t size);
/**
* Return a segment allocated by this BTL.
*
* @param btl (IN) BTL module
* @param descriptor (IN) Allocated descriptor.
*/
extern int mca_btl_openib_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des);
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size
);
/**
* Allocate a descriptor initialized for RDMA write.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
extern mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size);
/**
* Return a send fragment to the modules free list.
*
* @param btl (IN) BTL instance
* @param frag (IN) IB send fragment
*
*/
extern void mca_btl_openib_send_frag_return(
struct mca_btl_base_module_t* btl,
struct mca_btl_openib_frag_t*
);
int mca_btl_openib_module_init(mca_btl_openib_module_t* mvapi_btl);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

608
src/mca/btl/openib/btl_openib_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,608 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/constants.h"
#include "event/event.h"
#include "util/if.h"
#include "util/argv.h"
#include "util/output.h"
#include "mca/pml/pml.h"
#include "mca/btl/btl.h"
#include "mca/base/mca_base_param.h"
#include "mca/base/mca_base_module_exchange.h"
#include "mca/errmgr/errmgr.h"
#include "mca/common/vapi/vapi_mem_reg.h"
#include "mca/mpool/base/base.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
#include "btl_openib_endpoint.h"
#include "mca/btl/base/base.h"
#include <vapi.h>
#include <vapi_common.h>
#include "datatype/convertor.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
mca_btl_openib_component_t mca_btl_openib_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v1.0.0 component (which also implies a
specific MCA version) */
MCA_BTL_BASE_VERSION_1_0_0,
"ib", /* MCA component name */
1, /* MCA component major version */
0, /* MCA component minor version */
0, /* MCA component release version */
mca_btl_openib_component_open, /* component open */
mca_btl_openib_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_btl_openib_component_init,
mca_btl_openib_component_progress,
}
};
/*
* utility routines for parameter registration
*/
static inline char* mca_btl_openib_param_register_string(
const char* param_name,
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string("btl","ib",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
static inline int mca_btl_openib_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("btl","ib",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_btl_openib_component_open(void)
{
int param, value;
/* initialize state */
mca_btl_openib_component.ib_num_btls=0;
mca_btl_openib_component.mvapi_btls=NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_procs, ompi_list_t);
/* OBJ_CONSTRUCT (&mca_btl_openib_component.ib_recv_frags, ompi_free_list_t); */
/* register IB component parameters */
mca_btl_openib_component.ib_free_list_num =
mca_btl_openib_param_register_int ("free_list_num", 8);
mca_btl_openib_component.ib_free_list_max =
mca_btl_openib_param_register_int ("free_list_max", 1024);
mca_btl_openib_component.ib_free_list_inc =
mca_btl_openib_param_register_int ("free_list_inc", 32);
mca_btl_openib_component.ib_mem_registry_hints_log_size =
mca_btl_openib_param_register_int ("hints_log_size", 8);
mca_btl_openib_component.ib_mpool_name =
mca_btl_openib_param_register_string("mpool", "ib");
mca_btl_openib_component.ib_rr_buf_max =
mca_btl_openib_param_register_int("rr_buf_max", 16);
mca_btl_openib_component.ib_rr_buf_min =
mca_btl_openib_param_register_int("rr_buf_min", 8);
mca_btl_openib_component.reg_mru_len =
mca_btl_openib_param_register_int("reg_mru_len", 16);
mca_btl_openib_module.super.btl_exclusivity =
mca_btl_openib_param_register_int ("exclusivity", 0);
mca_btl_openib_module.super.btl_eager_limit =
mca_btl_openib_param_register_int ("eager_limit", (64*1024))
- sizeof(mca_btl_openib_header_t);
mca_btl_openib_module.super.btl_min_send_size =
mca_btl_openib_param_register_int ("min_send_size", (64*1024))
- sizeof(mca_btl_openib_header_t);
mca_btl_openib_module.super.btl_max_send_size =
mca_btl_openib_param_register_int ("max_send_size", (128*1024))
- sizeof(mca_btl_openib_header_t);
mca_btl_openib_module.ib_pin_min =
mca_btl_openib_param_register_int("ib_pin_min", 128*1024);
mca_btl_openib_module.ib_cq_size =
mca_btl_openib_param_register_int("ib_cq_size",
40000);
mca_btl_openib_module.ib_wq_size =
mca_btl_openib_param_register_int("ib_wq_size",
10000);
mca_btl_openib_module.ib_sg_list_size =
mca_btl_openib_param_register_int("ib_sg_list_size",
1);
mca_btl_openib_module.ib_pkey_ix =
mca_btl_openib_param_register_int("ib_pkey_ix",
0);
mca_btl_openib_module.ib_psn =
mca_btl_openib_param_register_int("ib_psn",
0);
mca_btl_openib_module.ib_qp_ous_rd_atom =
mca_btl_openib_param_register_int("ib_qp_ous_rd_atom",
1);
mca_btl_openib_module.ib_mtu =
mca_btl_openib_param_register_int("ib_mtu",
MTU1024);
mca_btl_openib_module.ib_min_rnr_timer =
mca_btl_openib_param_register_int("ib_min_rnr_timer",
5);
mca_btl_openib_module.ib_timeout =
mca_btl_openib_param_register_int("ib_timeout",
10);
mca_btl_openib_module.ib_retry_count =
mca_btl_openib_param_register_int("ib_retry_count",
7);
mca_btl_openib_module.ib_rnr_retry =
mca_btl_openib_param_register_int("ib_rnr_retry",
7);
mca_btl_openib_module.ib_max_rdma_dst_ops =
mca_btl_openib_param_register_int("ib_max_rdma_dst_ops",
16);
mca_btl_openib_module.ib_service_level =
mca_btl_openib_param_register_int("ib_service_level",
0);
mca_btl_openib_module.ib_static_rate =
mca_btl_openib_param_register_int("ib_static_rate",
0);
mca_btl_openib_module.ib_src_path_bits =
mca_btl_openib_param_register_int("ib_src_path_bits",
0);
mca_btl_openib_module.super.btl_min_rdma_size =
mca_btl_openib_param_register_int("min_rdma_size",
1024*1024);
mca_btl_openib_module.super.btl_max_rdma_size =
mca_btl_openib_param_register_int("max_rdma_size",
1024*1024);
mca_btl_openib_module.super.btl_flags =
mca_btl_openib_param_register_int("flags",
MCA_BTL_FLAGS_RDMA);
param = mca_base_param_find("mpi", NULL, "leave_pinned");
mca_base_param_lookup_int(param, &value);
mca_btl_openib_component.leave_pinned = value;
mca_btl_openib_component.max_send_size = mca_btl_openib_module.super.btl_max_send_size;
mca_btl_openib_component.eager_limit = mca_btl_openib_module.super.btl_eager_limit;
return OMPI_SUCCESS;
}
/*
* component cleanup - sanity checking of queue lengths
*/
int mca_btl_openib_component_close(void)
{
return OMPI_SUCCESS;
}
/*
* IB component initialization:
* (1) read interface list from kernel and compare against component parameters
* then create a BTL instance for selected interfaces
* (2) setup IB listen socket for incoming connection attempts
* (3) register BTL parameters with the MCA
*/
mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
VAPI_ret_t vapi_ret;
struct ibv_device **ib_devs;
VAPI_hca_hndl_t hca_hndl;
VAPI_hca_vendor_t hca_vendor;
VAPI_hca_cap_t hca_cap;
VAPI_hca_port_t hca_port;
uint32_t num_devs;
mca_btl_base_module_t** btls;
uint32_t i,j, length;
struct mca_mpool_base_resources_t hca_pd;
ompi_list_t btl_list;
mca_btl_openib_module_t * mvapi_btl;
mca_btl_base_selected_module_t* ib_selected;
ompi_list_item_t* item;
/* initialization */
*num_btl_modules = 0;
num_devs = 0;
struct dlist *dev_list;
struct ibv_device* ib_dev;
/* Determine the number of hca's available on the host */
dev_list = ibv_get_devices();
dlist_start(dev_list);
dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
num_devs++;
if(0 == num_devs) {
ompi_output(0, "No hca's found on this host! \n");
return NULL;
}
/* Allocate space for the ib devices */
ib_devs = (struct ibv_dev**) malloc(num_devs * sizeof(struct ibv_dev*));
if(NULL == ib_devs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
dlist_start(dev_list);
int i = 0;
dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
ib_devs[i++] = ib_dev;
/** We must loop through all the hca id's, get there handles and
for each hca we query the number of ports on the hca and set up
a distinct btl module for each hca port */
OBJ_CONSTRUCT(&btl_list, ompi_list_t);
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, ompi_mutex_t);
for(i = 0; i < num_devs; i++){
ib_dev = ib_devs[i];
bv_open_device(ib_dev);
vapi_ret = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap);
if(VAPI_OK != vapi_ret) {
ompi_output(0, "%s:error getting hca properties\n", __func__);
return NULL;
}
/* Note ports are 1 based hence j = 1 */
for(j = 1; j <= hca_cap.phys_port_num; j++){
vapi_ret = VAPI_query_hca_port_prop(hca_hndl, (IB_port_t) j, &hca_port);
if(VAPI_OK != vapi_ret) {
ompi_output(0, "%s:error getting hca port properties\n", __func__);
return NULL;
}
if( PORT_ACTIVE == hca_port.state ){
mvapi_btl = (mca_btl_openib_module_t*) malloc(sizeof(mca_btl_openib_module_t));
memcpy(mvapi_btl, &mca_btl_openib_module, sizeof(mca_btl_openib_module));
ib_selected = OBJ_NEW(mca_btl_base_selected_module_t);
ib_selected->btl_module = (mca_btl_base_module_t*) mvapi_btl;
memcpy(mvapi_btl->hca_id, hca_ids[i], sizeof(VAPI_hca_id_t));
mvapi_btl->nic = hca_hndl;
mvapi_btl->port_id = (IB_port_t) j;
mvapi_btl->port = hca_port;
ompi_list_append(&btl_list, (ompi_list_item_t*) ib_selected);
mca_btl_openib_component.ib_num_btls ++;
}
}
}
/* Allocate space for btl modules */
mca_btl_openib_component.mvapi_btls = (mca_btl_openib_module_t*) malloc(sizeof(mca_btl_openib_module_t) *
mca_btl_openib_component.ib_num_btls);
if(NULL == mca_btl_openib_component.mvapi_btls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
btls = (struct mca_btl_base_module_t**)
malloc(mca_btl_openib_component.ib_num_btls * sizeof(struct mca_btl_openib_module_t*));
if(NULL == btls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++){
item = ompi_list_remove_first(&btl_list);
ib_selected = (mca_btl_base_selected_module_t*)item;
mvapi_btl = (mca_btl_openib_module_t*) ib_selected->btl_module;
memcpy(&(mca_btl_openib_component.mvapi_btls[i]), mvapi_btl , sizeof(mca_btl_openib_module_t));
free(ib_selected);
free(mvapi_btl);
mvapi_btl = &mca_btl_openib_component.mvapi_btls[i];
/* Initialize the modules function pointers */
/* Initialize module state */
OBJ_CONSTRUCT(&mvapi_btl->ib_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&mvapi_btl->send_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->send_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->send_free_frag, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->recv_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->recv_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->repost, ompi_list_t);
OBJ_CONSTRUCT(&mvapi_btl->reg_mru_list, ompi_list_t);
if(mca_btl_openib_module_init(mvapi_btl) != OMPI_SUCCESS) {
free(hca_ids);
return NULL;
}
hca_pd.hca = mvapi_btl->nic;
hca_pd.pd_tag = mvapi_btl->ptag;
/* initialize the memory pool using the hca */
mvapi_btl->ib_pool =
mca_mpool_base_module_create(mca_btl_openib_component.ib_mpool_name,
&mvapi_btl->super,
&hca_pd);
if(NULL == mvapi_btl->ib_pool) {
ompi_output(0, "%s: error creating vapi memory pool! aborting ib btl initialization", __func__);
return NULL;
}
/* Initialize pool of send fragments */
length = sizeof(mca_btl_openib_frag_t) +
sizeof(mca_btl_openib_header_t) +
mvapi_btl->super.btl_eager_limit+
2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&mvapi_btl->send_free_eager,
length,
OBJ_CLASS(mca_btl_openib_send_frag_eager_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
mvapi_btl->ib_pool);
ompi_free_list_init(&mvapi_btl->recv_free_eager,
length,
OBJ_CLASS(mca_btl_openib_recv_frag_eager_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
mvapi_btl->ib_pool);
length = sizeof(mca_btl_openib_frag_t) +
sizeof(mca_btl_openib_header_t) +
mvapi_btl->super.btl_max_send_size+
2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&mvapi_btl->send_free_max,
length,
OBJ_CLASS(mca_btl_openib_send_frag_max_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
mvapi_btl->ib_pool);
/* Initialize pool of receive fragments */
ompi_free_list_init (&mvapi_btl->recv_free_max,
length,
OBJ_CLASS (mca_btl_openib_recv_frag_max_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc, mvapi_btl->ib_pool);
length = sizeof(mca_btl_openib_frag_t) +
sizeof(mca_btl_openib_header_t)+
2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&mvapi_btl->send_free_frag,
length,
OBJ_CLASS(mca_btl_openib_send_frag_frag_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
mvapi_btl->ib_pool);
/* Initialize the rr_desc_post array for posting of rr*/
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mca_btl_openib_component.ib_rr_buf_max * sizeof(VAPI_rr_desc_t)));
/* This is now done by the memory pool passed to free_list_init.. Initialize the send descriptors */
/* if(mca_btl_openib_send_frag_register(mvapi_btl) != OMPI_SUCCESS) { */
/* free(hca_ids); */
/* return NULL; */
/* } */
btls[i] = &mvapi_btl->super;
}
/* Post OOB receive to support dynamic connection setup */
mca_btl_openib_post_recv();
*num_btl_modules = mca_btl_openib_component.ib_num_btls;
free(hca_ids);
return btls;
}
/*
* IB component progress.
*/
int mca_btl_openib_component_progress()
{
uint32_t i;
int count = 0;
mca_btl_openib_frag_t* frag;
/* Poll for completions */
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
VAPI_ret_t ret;
VAPI_wc_desc_t comp;
mca_btl_openib_module_t* mvapi_btl = &mca_btl_openib_component.mvapi_btls[i];
do{
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_high, &comp);
if(VAPI_OK == ret) {
if(comp.status != VAPI_SUCCESS) {
ompi_output(0, "Got error : %s, Vendor code : %d Frag : %p",
VAPI_wc_status_sym(comp.status),
comp.vendor_err_syndrome, comp.id);
return OMPI_ERROR;
}
/* Handle n/w completions */
switch(comp.opcode) {
case VAPI_CQE_RQ_RDMA_WITH_IMM:
if(comp.imm_data_valid){
ompi_output(0, "Got an RQ_RDMA_WITH_IMM!\n");
}
break;
case VAPI_CQE_SQ_RDMA_WRITE:
case VAPI_CQE_SQ_SEND_DATA :
/* Process a completed send */
frag = (mca_btl_openib_frag_t*) comp.id;
frag->rc = OMPI_SUCCESS;
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
count++;
break;
case VAPI_CQE_RQ_SEND_DATA:
DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
frag = (mca_btl_openib_frag_t*) comp.id;
frag->rc=OMPI_SUCCESS;
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
/* advance the segment address past the header and subtract from the length..*/
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_eager), (ompi_list_item_t*) frag);
OMPI_THREAD_ADD32(&mvapi_btl->rr_posted_high, -1);
mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)comp.id)->endpoint, 0);
count++;
break;
default:
ompi_output(0, "Errorneous network completion");
break;
}
}
}
while(VAPI_OK == ret);
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_low, &comp);
if(VAPI_OK == ret) {
if(comp.status != VAPI_SUCCESS) {
ompi_output(0, "Got error : %s, Vendor code : %d Frag : %p",
VAPI_wc_status_sym(comp.status),
comp.vendor_err_syndrome, comp.id);
return OMPI_ERROR;
}
/* Handle n/w completions */
switch(comp.opcode) {
case VAPI_CQE_SQ_RDMA_WRITE:
case VAPI_CQE_SQ_SEND_DATA :
/* Process a completed send */
frag = (mca_btl_openib_frag_t*) comp.id;
frag->rc = OMPI_SUCCESS;
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
count++;
break;
case VAPI_CQE_RQ_SEND_DATA:
DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
frag = (mca_btl_openib_frag_t*) comp.id;
frag->rc=OMPI_SUCCESS;
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
/* advance the segment address past the header and subtract from the length..*/
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_max), (ompi_list_item_t*) frag);
OMPI_THREAD_ADD32(&mvapi_btl->rr_posted_low, -1);
mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)comp.id)->endpoint, 0);
count++;
break;
default:
ompi_output(0, "Errorneous network completion");
break;
}
}
}
return count;
}

800
src/mca/btl/openib/btl_openib_endpoint.c Обычный файл
Просмотреть файл

@ -0,0 +1,800 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "include/types.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "mca/ns/base/base.h"
#include "mca/oob/base/base.h"
#include "mca/rml/rml.h"
#include "mca/errmgr/errmgr.h"
#include "dps/dps.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_proc.h"
#include "btl_openib_frag.h"
#include "class/ompi_free_list.h"
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
int mca_btl_openib_endpoint_create_qp(
mca_btl_openib_module_t* mvapi_btl,
VAPI_hca_hndl_t nic,
VAPI_pd_hndl_t ptag,
VAPI_cq_hndl_t cq_hndl,
VAPI_qp_hndl_t* qp_hndl,
VAPI_qp_prop_t* qp_prop,
int transport_type);
int mca_btl_openib_endpoint_qp_init_query(
mca_btl_openib_module_t* mvapi_btl,
VAPI_hca_hndl_t nic,
VAPI_qp_hndl_t qp_hndl,
VAPI_qp_num_t remote_qp_num,
IB_lid_t remote_lid,
IB_port_t port_id
);
static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* mvapi_btl, mca_btl_openib_endpoint_t * endpoint, mca_btl_openib_frag_t * frag)
{
frag->sr_desc.remote_qkey = 0;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
VAPI_qp_hndl_t qp_hndl;
if(frag->base.des_flags && MCA_BTL_DES_FLAGS_PRIORITY && frag->size <= mvapi_btl->super.btl_eager_limit){
frag->sr_desc.remote_qp = endpoint->rem_qp_num_high;
qp_hndl = endpoint->lcl_qp_hndl_high;
} else {
frag->sr_desc.remote_qp = endpoint->rem_qp_num_low;
qp_hndl = endpoint->lcl_qp_hndl_low;
}
frag->sr_desc.opcode = VAPI_SEND;
frag->sg_entry.len = frag->segment.seg_len + ((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); /* sizeof(mca_btl_openib_header_t); */
if(frag->sg_entry.len <= mvapi_btl->ib_inline_max) {
frag->ret = EVAPI_post_inline_sr(mvapi_btl->nic,
qp_hndl,
&frag->sr_desc);
}else {
frag->ret = VAPI_post_sr(mvapi_btl->nic,
qp_hndl,
&frag->sr_desc);
}
if(VAPI_OK != frag->ret)
return OMPI_ERROR;
mca_btl_openib_endpoint_post_rr(endpoint, 1);
return OMPI_SUCCESS;
}
OBJ_CLASS_INSTANCE(mca_btl_openib_endpoint_t,
ompi_list_item_t, mca_btl_openib_endpoint_construct,
mca_btl_openib_endpoint_destruct);
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
{
endpoint->endpoint_btl = 0;
endpoint->endpoint_proc = 0;
endpoint->endpoint_tstamp = 0.0;
endpoint->endpoint_state = MCA_BTL_IB_CLOSED;
endpoint->endpoint_retries = 0;
OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&endpoint->pending_send_frags, ompi_list_t);
}
/*
* Destroy a endpoint
*
*/
static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
{
}
/*
* Send connection information to remote endpoint using OOB
*
*/
static void mca_btl_openib_endpoint_send_cb(
int status,
orte_process_name_t* endpoint,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
OBJ_RELEASE(buffer);
}
static int mca_btl_openib_endpoint_send_connect_req(mca_btl_base_endpoint_t* endpoint)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
int rc;
if(NULL == buffer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* pack the info in the send buffer */
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_high.qp_num, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_low.qp_num, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.pack(buffer, &endpoint->endpoint_btl->port.lid, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send to endpoint */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
mca_btl_openib_endpoint_send_cb, NULL);
DEBUG_OUT("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_prop_high.qp_num,
endpoint->lcl_qp_prop_low.qp_num,
endpoint->endpoint_btl->port.lid);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
* Send connect ACK to remote endpoint
*
*/
static int mca_btl_openib_endpoint_send_connect_ack(mca_btl_base_endpoint_t* endpoint)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
int rc;
uint32_t zero = 0;
/* pack the info in the send buffer */
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send to endpoint */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
mca_btl_openib_endpoint_send_cb, NULL);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/*
* Set remote connection info
*
* XXX: Currently size is unutilized, this shall change
* as soon as we add more info to be exchanged at connection
* setup.
*
*/
static int mca_btl_openib_endpoint_set_remote_info(mca_btl_base_endpoint_t* endpoint, orte_buffer_t* buffer)
{
int rc;
size_t cnt = 1;
rc = orte_dps.unpack(buffer, &endpoint->rem_qp_num_high, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.unpack(buffer, &endpoint->rem_qp_num_low, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.unpack(buffer, &endpoint->rem_lid, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
DEBUG_OUT("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
endpoint->rem_qp_num_high,
endpoint->rem_qp_num_low,
endpoint->rem_lid);
return ORTE_SUCCESS;
}
/*
* Start to connect to the endpoint. We send our Queue Pair
* information over the TCP OOB communication mechanism.
* On completion of our send, a send completion handler
* is called.
*
*/
static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoint)
{
int rc;
/* Create the High Priority Queue Pair */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(endpoint->endpoint_btl,
endpoint->endpoint_btl->nic,
endpoint->endpoint_btl->ptag,
endpoint->endpoint_btl->cq_hndl_high,
&endpoint->lcl_qp_hndl_high,
&endpoint->lcl_qp_prop_high,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
/* Create the Low Priority Queue Pair */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(endpoint->endpoint_btl,
endpoint->endpoint_btl->nic,
endpoint->endpoint_btl->ptag,
endpoint->endpoint_btl->cq_hndl_low,
&endpoint->lcl_qp_hndl_low,
&endpoint->lcl_qp_prop_low,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_prop_high.qp_num,
endpoint->lcl_qp_prop_low.qp_num,
mvapi_btl->port.lid);
/* Send connection info over to remote endpoint */
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
* Reply to a `start - connect' message
*
*/
static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t *endpoint, orte_buffer_t* buffer)
{
int rc;
/* Create the High Priority Queue Pair */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(endpoint->endpoint_btl,
endpoint->endpoint_btl->nic,
endpoint->endpoint_btl->ptag,
endpoint->endpoint_btl->cq_hndl_high,
&endpoint->lcl_qp_hndl_high,
&endpoint->lcl_qp_prop_high,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
/* Create the Low Priority Queue Pair */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(endpoint->endpoint_btl,
endpoint->endpoint_btl->nic,
endpoint->endpoint_btl->ptag,
endpoint->endpoint_btl->cq_hndl_low,
&endpoint->lcl_qp_hndl_low,
&endpoint->lcl_qp_prop_low,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_prop_high.qp_num,
endpoint->lcl_qp_prop_low.qp_num,
mvapi_btl->port.lid);
/* Set the remote side info */
mca_btl_openib_endpoint_set_remote_info(endpoint, buffer);
/* Connect to endpoint */
rc = mca_btl_openib_endpoint_connect(endpoint);
if(rc != OMPI_SUCCESS) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
/* Send connection info over to remote endpoint */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
*
*/
static void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
{
endpoint->endpoint_state = MCA_BTL_IB_CONNECTED;
mca_btl_openib_progress_send_frags(endpoint);
}
/*
* Non blocking OOB recv callback.
* Read incoming QP and other info, and if this endpoint
* is trying to connect, reply with our QP info,
* otherwise try to modify QP's and establish
* reliable connection
*
*/
static void mca_btl_openib_endpoint_recv(
int status,
orte_process_name_t* endpoint,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
mca_btl_openib_proc_t *ib_proc;
mca_btl_openib_endpoint_t *ib_endpoint;
int endpoint_state;
int rc;
for(ib_proc = (mca_btl_openib_proc_t*)
ompi_list_get_first(&mca_btl_openib_component.ib_procs);
ib_proc != (mca_btl_openib_proc_t*)
ompi_list_get_end(&mca_btl_openib_component.ib_procs);
ib_proc = (mca_btl_openib_proc_t*)ompi_list_get_next(ib_proc)) {
if(ib_proc->proc_guid.vpid == endpoint->vpid) {
/* Try to get the endpoint instance of this proc */
/* Limitation: Right now, we have only 1 endpoint
* for every process. Need several changes, some
* in PML/BTL interface to set this right */
ib_endpoint = ib_proc->proc_endpoints[0];
endpoint_state = ib_endpoint->endpoint_state;
/* Update status */
switch(endpoint_state) {
case MCA_BTL_IB_CLOSED :
/* We had this connection closed before.
* The endpoint is trying to connect. Move the
* status of this connection to CONNECTING,
* and then reply with our QP information */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_reply_start_connect(ib_endpoint, buffer))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
break;
}
/* Setup state as connected */
ib_endpoint->endpoint_state = MCA_BTL_IB_CONNECT_ACK;
break;
case MCA_BTL_IB_CONNECTING :
mca_btl_openib_endpoint_set_remote_info(ib_endpoint, buffer);
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_connect(ib_endpoint))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
break;
}
/* Setup state as connected */
mca_btl_openib_endpoint_connected(ib_endpoint);
/* Send him an ack */
mca_btl_openib_endpoint_send_connect_ack(ib_endpoint);
break;
case MCA_BTL_IB_CONNECT_ACK:
mca_btl_openib_endpoint_connected(ib_endpoint);
break;
case MCA_BTL_IB_CONNECTED :
break;
default :
ompi_output(0, "Connected -> Connecting not possible.\n");
}
break;
}
}
/* Okay, now that we are done receiving,
* re-post the buffer */
mca_btl_openib_post_recv();
}
void mca_btl_openib_post_recv()
{
DEBUG_OUT("");
orte_rml.recv_buffer_nb(
ORTE_RML_NAME_ANY,
ORTE_RML_TAG_DYNAMIC-1,
0,
mca_btl_openib_endpoint_recv,
NULL);
}
/*
* Attempt to send a fragment using a given endpoint. If the endpoint is not
* connected, queue the fragment and start the connection as required.
*/
int mca_btl_openib_endpoint_send(
mca_btl_base_endpoint_t* endpoint,
mca_btl_openib_frag_t* frag
)
{
int rc;
mca_btl_openib_module_t *mvapi_btl;
OMPI_THREAD_LOCK(&endpoint->endpoint_send_lock);
switch(endpoint->endpoint_state) {
case MCA_BTL_IB_CONNECTING:
DEBUG_OUT("Queing because state is connecting");
ompi_list_append(&endpoint->pending_send_frags,
(ompi_list_item_t *)frag);
rc = OMPI_SUCCESS;
break;
case MCA_BTL_IB_CONNECT_ACK:
DEBUG_OUT("Queuing because waiting for ack");
ompi_list_append(&endpoint->pending_send_frags,
(ompi_list_item_t *)frag);
rc = OMPI_SUCCESS;
break;
case MCA_BTL_IB_CLOSED:
DEBUG_OUT("Connection to endpoint closed ... connecting ...");
ompi_list_append(&endpoint->pending_send_frags,
(ompi_list_item_t *)frag);
rc = mca_btl_openib_endpoint_start_connect(endpoint);
break;
case MCA_BTL_IB_FAILED:
rc = OMPI_ERR_UNREACH;
break;
case MCA_BTL_IB_CONNECTED:
{
mvapi_btl = endpoint->endpoint_btl;
DEBUG_OUT("Send to : %d, len : %d, frag : %p",
endpoint->endpoint_proc->proc_guid.vpid,
frag->ib_buf.desc.sg_entry.len,
frag);
rc = mca_btl_openib_endpoint_post_send(mvapi_btl, endpoint, frag);
break;
}
default:
rc = OMPI_ERR_UNREACH;
}
OMPI_THREAD_UNLOCK(&endpoint->endpoint_send_lock);
return rc;
}
void mca_btl_openib_progress_send_frags(mca_btl_openib_endpoint_t* endpoint)
{
ompi_list_item_t *frag_item;
mca_btl_openib_frag_t *frag;
mca_btl_openib_module_t* mvapi_btl;
/*Check if endpoint is connected */
if(endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) {
return;
}
/* While there are frags in the list,
* process them */
while(!ompi_list_is_empty(&(endpoint->pending_send_frags))) {
frag_item = ompi_list_remove_first(&(endpoint->pending_send_frags));
frag = (mca_btl_openib_frag_t *) frag_item;
mvapi_btl = endpoint->endpoint_btl;
/* We need to post this one */
if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(mvapi_btl, endpoint, frag))
ompi_output(0, "error in mca_btl_openib_endpoint_send");
}
}
/*
* Complete connection to endpoint.
*/
int mca_btl_openib_endpoint_connect(
mca_btl_openib_endpoint_t *endpoint)
{
int rc;
/* Connection establishment RC */
rc = mca_btl_openib_endpoint_qp_init_query(endpoint->endpoint_btl,
endpoint->endpoint_btl->nic,
endpoint->lcl_qp_hndl_high,
endpoint->rem_qp_num_high,
endpoint->rem_lid,
endpoint->endpoint_btl->port_id);
rc = mca_btl_openib_endpoint_qp_init_query(endpoint->endpoint_btl,
endpoint->endpoint_btl->nic,
endpoint->lcl_qp_hndl_low,
endpoint->rem_qp_num_low,
endpoint->rem_lid,
endpoint->endpoint_btl->port_id);
if(rc != OMPI_SUCCESS) {
return rc;
}
mca_btl_openib_endpoint_post_rr(endpoint, 0);
return OMPI_SUCCESS;
}
int mca_btl_openib_endpoint_create_qp(
mca_btl_openib_module_t* mvapi_btl,
VAPI_hca_hndl_t nic,
VAPI_pd_hndl_t ptag,
VAPI_cq_hndl_t cq_hndl,
VAPI_qp_hndl_t* qp_hndl,
VAPI_qp_prop_t* qp_prop,
int transport_type)
{
VAPI_ret_t ret;
VAPI_qp_init_attr_t qp_init_attr;
switch(transport_type) {
case VAPI_TS_RC: /* Set up RC qp parameters */
qp_init_attr.cap.max_oust_wr_rq = mvapi_btl->ib_wq_size;
qp_init_attr.cap.max_oust_wr_sq = mvapi_btl->ib_wq_size;
qp_init_attr.cap.max_sg_size_rq = mvapi_btl->ib_sg_list_size;
qp_init_attr.cap.max_sg_size_sq = mvapi_btl->ib_sg_list_size;
qp_init_attr.pd_hndl = ptag;
/* We don't have Reliable Datagram Handle right now */
qp_init_attr.rdd_hndl = 0;
/* Signal all work requests on this queue pair */
qp_init_attr.rq_sig_type = VAPI_SIGNAL_REQ_WR;
qp_init_attr.sq_sig_type = VAPI_SIGNAL_REQ_WR;
/* Use Reliable Connected transport service */
qp_init_attr.ts_type = VAPI_TS_RC;
/* Set Send and Recv completion queues */
qp_init_attr.rq_cq_hndl = cq_hndl;
qp_init_attr.sq_cq_hndl = cq_hndl;
break;
case VAPI_TS_UD: /* Set up UD qp parameters */
default:
return OMPI_ERR_NOT_IMPLEMENTED;
}
ret = VAPI_create_qp(nic, &qp_init_attr,
qp_hndl, qp_prop);
if(VAPI_OK != ret) {
MCA_BTL_IB_VAPI_ERROR(ret, "VAPI_create_qp");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int mca_btl_openib_endpoint_qp_init_query(
mca_btl_openib_module_t* mvapi_btl,
VAPI_hca_hndl_t nic,
VAPI_qp_hndl_t qp_hndl,
VAPI_qp_num_t remote_qp_num,
IB_lid_t remote_lid,
IB_port_t port_id
)
{
VAPI_ret_t ret;
VAPI_qp_attr_t qp_attr;
VAPI_qp_attr_mask_t qp_attr_mask;
VAPI_qp_init_attr_t qp_init_attr;
VAPI_qp_cap_t qp_cap;
/* Modifying QP to INIT */
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
qp_attr.qp_state = VAPI_INIT;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
qp_attr.pkey_ix = mvapi_btl->ib_pkey_ix;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);
qp_attr.port = port_id;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PORT);
qp_attr.remote_atomic_flags = VAPI_EN_REM_WRITE | VAPI_EN_REM_READ;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_REMOTE_ATOMIC_FLAGS);
ret = VAPI_modify_qp(nic, qp_hndl,
&qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) {
MCA_BTL_IB_VAPI_ERROR(ret, "VAPI_modify_qp");
return OMPI_ERROR;
}
DEBUG_OUT("Modified to init..Qp %d", qp_hndl);
/********************** INIT --> RTR ************************/
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
qp_attr.qp_state = VAPI_RTR;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
qp_attr.qp_ous_rd_atom = mvapi_btl->ib_qp_ous_rd_atom;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_OUS_RD_ATOM);
qp_attr.path_mtu = mvapi_btl->ib_mtu;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PATH_MTU);
qp_attr.rq_psn = mvapi_btl->ib_psn;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RQ_PSN);
qp_attr.pkey_ix = mvapi_btl->ib_pkey_ix;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);
qp_attr.min_rnr_timer = mvapi_btl->ib_min_rnr_timer;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_MIN_RNR_TIMER);
qp_attr.av.sl = mvapi_btl->ib_service_level;
qp_attr.av.grh_flag = FALSE;
qp_attr.av.static_rate = mvapi_btl->ib_static_rate;
qp_attr.av.src_path_bits = mvapi_btl->ib_src_path_bits;
qp_attr.dest_qp_num = remote_qp_num;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_DEST_QP_NUM);
qp_attr.av.dlid = remote_lid;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_AV);
ret = VAPI_modify_qp(nic, qp_hndl,
&qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) {
MCA_BTL_IB_VAPI_ERROR(ret, "VAPI_modify_qp");
return OMPI_ERROR;
}
DEBUG_OUT("Modified to RTR..Qp %d", qp_hndl);
/************** RTS *******************/
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
qp_attr.qp_state = VAPI_RTS;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
qp_attr.sq_psn = mvapi_btl->ib_psn;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_SQ_PSN);
qp_attr.timeout = mvapi_btl->ib_timeout;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_TIMEOUT);
qp_attr.retry_count = mvapi_btl->ib_retry_count;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RETRY_COUNT);
qp_attr.rnr_retry = mvapi_btl->ib_rnr_retry;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RNR_RETRY);
qp_attr.ous_dst_rd_atom = mvapi_btl->ib_max_rdma_dst_ops;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_OUS_DST_RD_ATOM);
ret = VAPI_modify_qp(nic, qp_hndl,
&qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) {
MCA_BTL_IB_VAPI_ERROR(ret, "VAPI_modify_qp");
return OMPI_ERROR;
}
DEBUG_OUT("Modified to RTS..Qp %d", qp_hndl);
ret = VAPI_query_qp(nic, qp_hndl, &qp_attr, &qp_attr_mask, &qp_init_attr );
if (ret != VAPI_OK) {
ompi_output(0, "error querying the queue pair");
return OMPI_ERROR;
}
mvapi_btl->ib_inline_max = qp_init_attr.cap.max_inline_data_sq;
return OMPI_SUCCESS;
}

220
src/mca/btl/openib/btl_openib_endpoint.h Обычный файл
Просмотреть файл

@ -0,0 +1,220 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_IB_ENDPOINT_H
#define MCA_BTL_IB_ENDPOINT_H
#include "class/ompi_list.h"
#include "event/event.h"
#include "mca/pml/pml.h"
#include "mca/btl/btl.h"
#include "btl_openib_frag.h"
#include "btl_openib.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#define MAX_POST_RR (16)
OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
/**
* State of IB endpoint connection.
*/
typedef enum {
/* Defines the state in which this BTL instance
* has started the process of connection */
MCA_BTL_IB_CONNECTING,
/* Waiting for ack from endpoint */
MCA_BTL_IB_CONNECT_ACK,
/* Connected ... both sender & receiver have
* buffers associated with this connection */
MCA_BTL_IB_CONNECTED,
/* Connection is closed, there are no resources
* associated with this */
MCA_BTL_IB_CLOSED,
/* Maximum number of retries have been used.
* Report failure on send to upper layer */
MCA_BTL_IB_FAILED
} mca_btl_openib_endpoint_state_t;
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
ompi_list_item_t super;
struct mca_btl_openib_module_t* endpoint_btl;
/**< BTL instance that created this connection */
struct mca_btl_openib_proc_t* endpoint_proc;
/**< proc structure corresponding to endpoint */
mca_btl_openib_endpoint_state_t endpoint_state;
/**< current state of the connection */
size_t endpoint_retries;
/**< number of connection retries attempted */
double endpoint_tstamp;
/**< timestamp of when the first connection was attempted */
ompi_mutex_t endpoint_send_lock;
/**< lock for concurrent access to endpoint state */
ompi_mutex_t endpoint_recv_lock;
/**< lock for concurrent access to endpoint state */
ompi_list_t pending_send_frags;
/**< list of pending send frags for this endpoint */
VAPI_qp_num_t rem_qp_num_high;
/* High priority remote side QP number */
VAPI_qp_num_t rem_qp_num_low;
/* Low prioirty remote size QP number */
IB_lid_t rem_lid;
/* Local identifier of the remote process */
VAPI_qp_hndl_t lcl_qp_hndl_high;
/* High priority local QP handle */
VAPI_qp_hndl_t lcl_qp_hndl_low;
/* Low priority local QP handle */
VAPI_qp_prop_t lcl_qp_prop_high;
/* High priority local QP properties */
VAPI_qp_prop_t lcl_qp_prop_low;
/* Low priority local QP properties */
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_openib_endpoint_t;
int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint, struct mca_btl_openib_frag_t* frag);
int mca_btl_openib_endpoint_connect(mca_btl_base_endpoint_t*);
void mca_btl_openib_post_recv(void);
void mca_btl_openib_progress_send_frags(mca_btl_openib_endpoint_t*);
static inline int mca_btl_openib_endpoint_post_rr_sub(int cnt,
mca_btl_openib_endpoint_t* endpoint,
ompi_free_list_t* frag_list,
uint32_t* rr_posted,
VAPI_hca_hndl_t nic,
VAPI_qp_hndl_t qp_hndl
)
{
int rc, i;
ompi_list_item_t* item;
mca_btl_openib_frag_t* frag;
mca_btl_openib_module_t *mvapi_btl = endpoint->endpoint_btl;
VAPI_rr_desc_t* rr_desc_post = mvapi_btl->rr_desc_post;
/* prepare frags and post receive requests */
for(i = 0; i < cnt; i++) {
OMPI_FREE_LIST_WAIT(frag_list, item, rc);
frag = (mca_btl_openib_frag_t*) item;
frag->endpoint = endpoint;
frag->sg_entry.len = frag->size + ((unsigned char*) frag->segment.seg_addr.pval- (unsigned char*) frag->hdr); /* sizeof(mca_btl_openib_header_t); */
rr_desc_post[i] = frag->rr_desc;
}
frag->ret = EVAPI_post_rr_list(nic,
qp_hndl,
cnt,
rr_desc_post);
if(VAPI_OK != frag->ret) {
MCA_BTL_IB_VAPI_ERROR(frag->ret, "EVAPI_post_rr_list");
return OMPI_ERROR;
}
OMPI_THREAD_ADD32(rr_posted, cnt);
return OMPI_SUCCESS;
}
static inline int mca_btl_openib_endpoint_post_rr( mca_btl_openib_endpoint_t * endpoint, int additional){
mca_btl_openib_module_t * mvapi_btl = endpoint->endpoint_btl;
int rc;
OMPI_THREAD_LOCK(&mvapi_btl->ib_lock);
if(mvapi_btl->rr_posted_high <= mca_btl_openib_component.ib_rr_buf_min+additional && mvapi_btl->rr_posted_high < mca_btl_openib_component.ib_rr_buf_max){
rc = mca_btl_openib_endpoint_post_rr_sub(mca_btl_openib_component.ib_rr_buf_max - mvapi_btl->rr_posted_high,
endpoint,
&mvapi_btl->recv_free_eager,
&mvapi_btl->rr_posted_high,
mvapi_btl->nic,
endpoint->lcl_qp_hndl_high
);
if(rc != OMPI_SUCCESS){
OMPI_THREAD_UNLOCK(&mvapi_btl->ib_lock);
return rc;
}
}
if(mvapi_btl->rr_posted_low <= mca_btl_openib_component.ib_rr_buf_min+additional && mvapi_btl->rr_posted_low < mca_btl_openib_component.ib_rr_buf_max){
rc = mca_btl_openib_endpoint_post_rr_sub(mca_btl_openib_component.ib_rr_buf_max - mvapi_btl->rr_posted_low,
endpoint,
&mvapi_btl->recv_free_max,
&mvapi_btl->rr_posted_low,
mvapi_btl->nic,
endpoint->lcl_qp_hndl_low
);
if(rc != OMPI_SUCCESS) {
OMPI_THREAD_UNLOCK(&mvapi_btl->ib_lock);
return rc;
}
}
OMPI_THREAD_UNLOCK(&mvapi_btl->ib_lock);
return OMPI_SUCCESS;
}
#define DUMP_ENDPOINT(endpoint_ptr) { \
ompi_output(0, "[%s:%d] ", __FILE__, __LINE__); \
ompi_output(0, "Dumping endpoint %d state", \
endpoint->endpoint_proc->proc_guid.vpid); \
ompi_output(0, "Local QP hndl : %d", \
endpoint_ptr->endpoint_conn->lres->qp_hndl); \
ompi_output(0, "Local QP num : %d", \
endpoint_ptr->endpoint_conn->lres->qp_prop.qp_num); \
ompi_output(0, "Remote QP num : %d", \
endpoint_ptr->endpoint_conn->rres->qp_num); \
ompi_output(0, "Remote LID : %d", \
endpoint_ptr->endpoint_conn->rres->lid); \
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

43
src/mca/btl/openib/btl_openib_error.h Обычный файл
Просмотреть файл

@ -0,0 +1,43 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_IB_ERROR_H
#define MCA_BTL_IB_ERROR_H
#include <vapi.h>
#include <mtl_common.h>
#include <vapi_common.h>
/*
*
*
*/
#define MCA_BTL_IB_VAPI_ERROR(vapi_ret, func_name) { \
ompi_output(0,"[%s:%d] ", __FILE__, __LINE__); \
ompi_output(0,"%s : %s",func_name,VAPI_strerror(vapi_ret)); \
}
/* Debug Print */
#if 0
#define DEBUG_OUT(fmt, args...) { \
ompi_output(0, "[%s:%d:%s] " fmt, __FILE__, __LINE__, __func__, \
##args); \
}
#else
#define DEBUG_OUT(fmt, args...)
#endif
#endif

152
src/mca/btl/openib/btl_openib_frag.c Обычный файл
Просмотреть файл

@ -0,0 +1,152 @@
#include "btl_openib_frag.h"
#include "mca/common/vapi/vapi_mem_reg.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
static void mca_btl_openib_frag_common_constructor( mca_btl_openib_frag_t* frag)
{
mca_mpool_mvapi_registration_t* mem_hndl = (mca_mpool_mvapi_registration_t*) frag->base.super.user_data;
frag->hdr = (mca_btl_openib_header_t*) (frag+1); /* initialize the btl header to point to start at end of frag */
#if 0
mod = (unsigned long) frag->hdr % MCA_BTL_IB_FRAG_ALIGN;
if(mod != 0) {
frag->hdr = (mca_btl_openib_header_t*) ((unsigned char*) frag->hdr + (MCA_BTL_IB_FRAG_ALIGN - mod));
}
#endif
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t); /* init the segment address to start after the btl header */
#if 0
mod = (frag->segment.seg_addr.lval) % MCA_BTL_IB_FRAG_ALIGN;
if(mod != 0) {
frag->segment.seg_addr.lval += (MCA_BTL_IB_FRAG_ALIGN - mod);
}
#endif
frag->mem_hndl = mem_hndl->hndl;
frag->segment.seg_len = frag->size;
frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl->l_key;
frag->sg_entry.lkey = mem_hndl->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
frag->base.des_flags = 0;
}
static void mca_btl_openib_send_frag_common_constructor(mca_btl_openib_frag_t* frag)
{
mca_btl_openib_frag_common_constructor(frag);
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->sr_desc.comp_type = VAPI_SIGNALED;
frag->sr_desc.opcode = VAPI_SEND;
frag->sr_desc.remote_qkey = 0;
frag->sr_desc.sg_lst_len = 1;
frag->sr_desc.sg_lst_p = &frag->sg_entry;
frag->sr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
}
static void mca_btl_openib_recv_frag_common_constructor(mca_btl_openib_frag_t* frag)
{
mca_btl_openib_frag_common_constructor(frag);
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->rr_desc.comp_type = VAPI_SIGNALED;
frag->rr_desc.opcode = VAPI_RECEIVE;
frag->rr_desc.sg_lst_len = 1;
frag->rr_desc.sg_lst_p = &frag->sg_entry;
frag->rr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
}
static void mca_btl_openib_send_frag_eager_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = mca_btl_openib_component.eager_limit;
mca_btl_openib_send_frag_common_constructor(frag);
}
static void mca_btl_openib_send_frag_max_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = mca_btl_openib_component.max_send_size;
mca_btl_openib_send_frag_common_constructor(frag);
}
static void mca_btl_openib_recv_frag_max_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = mca_btl_openib_component.max_send_size;
mca_btl_openib_recv_frag_common_constructor(frag);
}
static void mca_btl_openib_recv_frag_eager_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = mca_btl_openib_component.eager_limit;
mca_btl_openib_recv_frag_common_constructor(frag);
}
static void mca_btl_openib_send_frag_frag_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = 0;
mca_btl_openib_send_frag_common_constructor(frag);
}
OBJ_CLASS_INSTANCE(
mca_btl_openib_frag_t,
mca_btl_base_descriptor_t,
NULL,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_send_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_openib_send_frag_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_send_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_openib_send_frag_max_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_send_frag_frag_t,
mca_btl_base_descriptor_t,
mca_btl_openib_send_frag_frag_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_recv_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_openib_recv_frag_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_recv_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_openib_recv_frag_max_constructor,
NULL);

149
src/mca/btl/openib/btl_openib_frag.h Обычный файл
Просмотреть файл

@ -0,0 +1,149 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_IB_FRAG_H
#define MCA_BTL_IB_FRAG_H
#define MCA_BTL_IB_FRAG_ALIGN (8)
#include "ompi_config.h"
#include "btl_openib.h"
#include <vapi.h>
#include <mtl_common.h>
#include <vapi_common.h>
#include "mca/mpool/mvapi/mpool_mvapi.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_openib_frag_t);
typedef mca_btl_base_header_t mca_btl_openib_header_t;
typedef enum {
MCA_BTL_IB_FRAG_SEND,
MCA_BTL_IB_FRAG_PUT,
MCA_BTL_IB_FRAG_GET,
MCA_BTL_IB_FRAG_ACK
} mca_btl_openib_frag_type_t;
/**
* IB send fragment derived type.
*/
struct mca_btl_openib_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
mca_btl_openib_frag_type_t type;
size_t size;
int rc;
union{
VAPI_rr_desc_t rr_desc;
VAPI_sr_desc_t sr_desc;
};
VAPI_sg_lst_entry_t sg_entry;
VAPI_mr_hndl_t mem_hndl;
VAPI_ret_t ret;
mca_btl_openib_header_t *hdr;
mca_mpool_mvapi_registration_t * vapi_reg;
};
typedef struct mca_btl_openib_frag_t mca_btl_openib_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_send_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_eager_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_send_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_max_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_send_frag_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_recv_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_recv_frag_eager_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_recv_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_recv_frag_max_t);
/*
* Allocate an IB send descriptor
*
*/
#define MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc) \
{ \
\
ompi_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&((mca_btl_openib_module_t*)btl)->send_free_eager, item, rc); \
frag = (mca_btl_openib_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag) \
{ \
OMPI_FREE_LIST_RETURN(&((mca_btl_openib_module_t*)btl)->send_free_eager, (ompi_list_item_t*)(frag)); \
}
#define MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc) \
{ \
\
ompi_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&((mca_btl_openib_module_t*)btl)->send_free_max, item, rc); \
frag = (mca_btl_openib_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_RETURN_MAX(btl, frag) \
{ \
OMPI_FREE_LIST_RETURN(&((mca_btl_openib_module_t*)btl)->send_free_max, (ompi_list_item_t*)(frag)); \
}
#define MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc) \
{ \
\
ompi_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&((mca_btl_openib_module_t*)btl)->send_free_frag, item, rc); \
frag = (mca_btl_openib_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag) \
{ \
OMPI_FREE_LIST_RETURN(&((mca_btl_openib_module_t*)btl)->send_free_frag, (ompi_list_item_t*)(frag)); \
}
struct mca_btl_openib_module_t;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

161
src/mca/btl/openib/btl_openib_proc.c Обычный файл
Просмотреть файл

@ -0,0 +1,161 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "class/ompi_hash_table.h"
#include "mca/base/mca_base_module_exchange.h"
#include "btl_openib.h"
#include "btl_openib_proc.h"
static void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc);
static void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc);
OBJ_CLASS_INSTANCE(mca_btl_openib_proc_t,
ompi_list_item_t, mca_btl_openib_proc_construct,
mca_btl_openib_proc_destruct);
void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_addr_count = 0;
proc->proc_endpoints = 0;
proc->proc_endpoint_count = 0;
OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t);
/* add to list of all proc instance */
OMPI_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
ompi_list_append(&mca_btl_openib_component.ib_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
}
/*
* Cleanup ib proc instance
*/
void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc)
{
/* remove from list of all proc instances */
OMPI_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
ompi_list_remove_item(&mca_btl_openib_component.ib_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
/* release resources */
if(NULL != proc->proc_endpoints) {
free(proc->proc_endpoints);
}
}
/*
* Look for an existing IB process instances based on the associated
* ompi_proc_t instance.
*/
static mca_btl_openib_proc_t* mca_btl_openib_proc_lookup_ompi(ompi_proc_t* ompi_proc)
{
mca_btl_openib_proc_t* ib_proc;
OMPI_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
for(ib_proc = (mca_btl_openib_proc_t*)
ompi_list_get_first(&mca_btl_openib_component.ib_procs);
ib_proc != (mca_btl_openib_proc_t*)
ompi_list_get_end(&mca_btl_openib_component.ib_procs);
ib_proc = (mca_btl_openib_proc_t*)ompi_list_get_next(ib_proc)) {
if(ib_proc->proc_ompi == ompi_proc) {
OMPI_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
return ib_proc;
}
}
OMPI_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
return NULL;
}
/*
* Create a IB process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_btl_openib_proc_t instance. We cache
* additional data (specifically the list of mca_btl_openib_endpoint_t instances,
* and published addresses) associated w/ a given destination on this
* datastructure.
*/
mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
{
mca_btl_openib_proc_t* module_proc = NULL;
/* Check if we have already created a IB proc
* structure for this ompi process */
module_proc = mca_btl_openib_proc_lookup_ompi(ompi_proc);
if(module_proc != NULL) {
/* Gotcha! */
return module_proc;
}
/* Oops! First time, gotta create a new IB proc
* out of the ompi_proc ... */
module_proc = OBJ_NEW(mca_btl_openib_proc_t);
/* Initialize number of peer */
module_proc->proc_endpoint_count = 0;
module_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary
* size) to represent the proc */
module_proc->proc_guid = ompi_proc->proc_name;
/* IB module doesn't have addresses exported at
* initialization, so the addr_count is set to one. */
module_proc->proc_addr_count = 1;
/* XXX: Right now, there can be only 1 peer associated
* with a proc. Needs a little bit change in
* mca_btl_openib_proc_t to allow on demand increasing of
* number of endpoints for this proc */
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(module_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
if(NULL == module_proc->proc_endpoints) {
OBJ_RELEASE(module_proc);
return NULL;
}
return module_proc;
}
/*
* Note that this routine must be called with the lock on the process
* already held. Insert a btl instance into the proc array and assign
* it an address.
*/
int mca_btl_openib_proc_insert(mca_btl_openib_proc_t* module_proc,
mca_btl_base_endpoint_t* module_endpoint)
{
/* insert into endpoint array */
module_endpoint->endpoint_proc = module_proc;
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -3,8 +3,6 @@
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
@ -16,29 +14,27 @@
* $HEADER$
*/
#ifndef MCA_PTL_IB_PROC_H
#define MCA_PTL_IB_PROC_H
#ifndef MCA_BTL_IB_PROC_H
#define MCA_BTL_IB_PROC_H
#include "mca/ns/ns.h"
#include "class/ompi_object.h"
#include "proc/proc.h"
#include "ptl_ib.h"
#include "ptl_ib_vapi.h"
#include "ptl_ib_addr.h"
#include "ptl_ib_peer.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_ptl_ib_proc_t);
OBJ_CLASS_DECLARATION(mca_btl_openib_proc_t);
/**
* Represents the state of a remote process and the set of addresses
* that it exports. Also cache an instance of mca_ptl_base_peer_t for
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
* each
* PTL instance that attempts to open a connection to the process.
* BTL instance that attempts to open a connection to the process.
*/
struct mca_ptl_ib_proc_t {
struct mca_btl_openib_proc_t {
ompi_list_item_t super;
/**< allow proc to be placed on a list */
@ -49,21 +45,21 @@ struct mca_ptl_ib_proc_t {
/**< globally unique identifier for the process */
size_t proc_addr_count;
/**< number of addresses published by peer */
/**< number of addresses published by endpoint */
struct mca_ptl_base_peer_t **proc_peers;
/**< array of peers that have been created to access this proc */
struct mca_btl_base_endpoint_t **proc_endpoints;
/**< array of endpoints that have been created to access this proc */
size_t proc_peer_count;
/**< number of peers */
size_t proc_endpoint_count;
/**< number of endpoints */
ompi_mutex_t proc_lock;
/**< lock to protect against concurrent access to proc state */
};
typedef struct mca_ptl_ib_proc_t mca_ptl_ib_proc_t;
typedef struct mca_btl_openib_proc_t mca_btl_openib_proc_t;
mca_ptl_ib_proc_t* mca_ptl_ib_proc_create(ompi_proc_t* ompi_proc);
int mca_ptl_ib_proc_insert(mca_ptl_ib_proc_t*, mca_ptl_base_peer_t*);
mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc);
int mca_btl_openib_proc_insert(mca_btl_openib_proc_t*, mca_btl_base_endpoint_t*);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -17,6 +17,6 @@
# Specific to this module
PARAM_INIT_FILE=mpool_vapi_component.c
PARAM_CONFIG_HEADER_FILE="vapi_config.h"
PARAM_INIT_FILE=btl_openib.c
PARAM_CONFIG_HEADER_FILE="openib_config.h"
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -4,8 +4,6 @@
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004 The Ohio State University.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
@ -25,29 +23,29 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[
# Additional --with flags that can be specified
AC_ARG_WITH(ptl-ib,
AC_HELP_STRING([--with-ptl-ib=IBDIR],
AC_ARG_WITH(btl-openib,
AC_HELP_STRING([--with-btl-openib=IBDIR],
[Specify the installation directory of IB (should enable the correct automatic determination of using the 32 or 64 bit library, if both are present under IBDIR/lib and IBDIR/lib64)]))
AC_ARG_WITH(ptl-ib-libdir,
AC_HELP_STRING([--with-ptl-ib-libdir=IBLIBDIR],
AC_ARG_WITH(btl-openib-libdir,
AC_HELP_STRING([--with-btl-openib-libdir=IBLIBDIR],
[directory where the IB library can be found, if it is not in $IBDIR/lib or $IBDIR/lib64]))
# Quick sanity check
if test "$with_ptl_ib" = "no"; then
AC_MSG_WARN([*** --without-ptl-ib specified -- aborting])
if test "$with_btl_openib" = "no"; then
AC_MSG_WARN([*** --without-btl-openib specified -- aborting])
AC_MSG_ERROR([Will not continue])
fi
# Find the right IBDIR
if test "$with_ptl_ib" != "" -a "$with_ptl_ib" != "yes" ; then
IBDIR="$with_ptl_ib"
IBLIBDIR="$with_ptl_ib"
if test "$with_btl_openib" != "" -a "$with_btl_openib" != "yes" ; then
IBDIR="$with_btl_openib"
IBLIBDIR="$with_btl_openib"
fi
if test "$with_ptl_ib_libdir" != "" -a "$with_ptl_ib_libdir" != "yes" -a \
"$with_ptl_ib_libdir" != "no"; then
IBLIBDIR="$with_ptl_ib_libdir"
if test "$with_btl_openib_libdir" != "" -a "$with_btl_openib_libdir" != "yes" -a \
"$with_btl_openib_libdir" != "no"; then
IBLIBDIR="$with_btl_openib_libdir"
fi
# Add to CPPFLAGS if necessary
@ -58,13 +56,13 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[
EXTRA_CPPFLAGS="-I$IBDIR/include"
else
AC_MSG_WARN([*** Warning: cannot find $IBDIR/include])
AC_MSG_WARN([*** Will still try to configure ib ptl anyway...])
AC_MSG_WARN([*** Will still try to configure openib btl anyway...])
fi
if test "$IBDIR" != "" -a -d "$IBDIR/wrap"; then
EXTRA_CPPFLAGS="-I$IBDIR/wrap $EXTRA_CPPFLAGS"
else
AC_MSG_WARN([*** Warning: cannot find $IBDIR/wrap])
AC_MSG_WARN([*** Will still try to configure ib ptl anyway...])
AC_MSG_WARN([*** Will still try to configure openib btl anyway...])
fi
fi
@ -86,7 +84,13 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[
# some versions of Mellanox (v3.1), we need to expliitly link in
# the thread libraries. #$%#@$%@%#$!!!
mca_ptl_ib_try_find_libvapi() {
# Many vapi.h's have horrid semantics and don't obey ISOC99
# standards. So we have to turn off flags like -pedantic. Sigh.
CFLAGS="`echo $CFLAGS | sed 's/-pedantic//g'`"
mca_btl_openib_try_find_libvapi() {
func1=[$]1
func2=[$]2
@ -117,7 +121,7 @@ mca_ptl_ib_try_find_libvapi() {
if test -d "$IBLIBDIR/lib"; then
EXTRA_LDFLAGS="-L$IBLIBDIR/lib"
LDFLAGS="$LDFLAGS $EXTRA_LDFLAGS"
mca_ptl_ib_try_find_libvapi VAPI_open_hca VAPI_query_hca_cap
mca_btl_openib_try_find_libvapi VAPI_open_hca VAPI_query_hca_cap
if test "$LIBS" != "$LIBS_orig"; then
echo "--> found libvapi in $IBLIBDIR/lib"
fi
@ -126,7 +130,7 @@ mca_ptl_ib_try_find_libvapi() {
if test "$LIBS" = "$LIBS_orig" -a -d "$IBLIBDIR/lib64"; then
EXTRA_LDFLAGS="-L$IBLIBDIR/lib64"
LDFLAGS="$LDFLAGS_save $EXTRA_LDFLAGS"
mca_ptl_ib_try_find_libvapi EVAPI_list_hcas EVAPI_open_hca
mca_btl_openib_try_find_libvapi EVAPI_list_hcas EVAPI_open_hca
if test "$LIBS" != "$LIBS_orig"; then
echo "--> found libvapi in $IBLIBDIR/lib64"
fi

Просмотреть файл

@ -1 +1,3 @@
ompi
gshipman
twoodall

Просмотреть файл

@ -19,32 +19,32 @@
include $(top_ompi_srcdir)/config/Makefile.options
sources = \
mpool_vapi.h \
mpool_vapi_module.c \
mpool_vapi_component.c
mpool_mvapi.h \
mpool_mvapi_module.c \
mpool_mvapi_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_mpool_vapi_DSO
if OMPI_BUILD_mpool_mvapi_DSO
component_noinst =
component_install = mca_mpool_vapi.la
component_install = mca_mpool_mvapi.la
else
component_noinst = libmca_mpool_vapi.la
component_noinst = libmca_mpool_mvapi.la
component_install =
endif
# See src/mca/ptl/vapi/Makefile.am for an explanation of
# libmca_common_vapi.la.
# See src/mca/ptl/mvapi/Makefile.am for an explanation of
# libmca_common_mvapi.la.
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_vapi_la_SOURCES = $(sources)
mca_mpool_vapi_la_LDFLAGS = -module -avoid-version
mca_mpool_mvapi_la_SOURCES = $(sources)
mca_mpool_mvapi_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_vapi_la_SOURCES = $(sources)
libmca_mpool_vapi_la_LDFLAGS = -module -avoid-version
libmca_mpool_mvapi_la_SOURCES = $(sources)
libmca_mpool_mvapi_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -17,6 +17,6 @@
# Specific to this module
PARAM_INIT_FILE=mpool_vapi_component.c
PARAM_CONFIG_HEADER_FILE="vapi_config.h"
PARAM_INIT_FILE=mpool_mvapi_component.c
PARAM_CONFIG_HEADER_FILE="mvapi_config.h"
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -41,16 +41,16 @@ static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) {
}
struct mca_mpool_vapi_component_t {
struct mca_mpool_mvapi_component_t {
mca_mpool_base_component_t super;
char* vapi_allocator_name;
long page_size;
long page_size_log;
};
typedef struct mca_mpool_vapi_component_t mca_mpool_vapi_component_t;
typedef struct mca_mpool_mvapi_component_t mca_mpool_mvapi_component_t;
OMPI_COMP_EXPORT extern mca_mpool_vapi_component_t mca_mpool_vapi_component;
OMPI_COMP_EXPORT extern mca_mpool_mvapi_component_t mca_mpool_mvapi_component;
@ -62,14 +62,14 @@ struct mca_mpool_base_resources_t {
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_vapi_module_t {
struct mca_mpool_mvapi_module_t {
mca_mpool_base_module_t super;
mca_allocator_base_module_t * vapi_allocator;
struct mca_mpool_base_resources_t hca_pd;
}; typedef struct mca_mpool_vapi_module_t mca_mpool_vapi_module_t;
}; typedef struct mca_mpool_mvapi_module_t mca_mpool_mvapi_module_t;
struct mca_mpool_vapi_registration_t {
struct mca_mpool_mvapi_registration_t {
mca_mpool_base_registration_t base_reg;
VAPI_mr_hndl_t hndl;
/* Memory region handle */
@ -84,8 +84,8 @@ struct mca_mpool_vapi_registration_t {
bool is_leave_pinned;
};
typedef struct mca_mpool_vapi_registration_t mca_mpool_vapi_registration_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_vapi_registration_t);
typedef struct mca_mpool_mvapi_registration_t mca_mpool_mvapi_registration_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_mvapi_registration_t);
@ -93,18 +93,18 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_vapi_registration_t);
/*
* Initializes the mpool module.
*/
void mca_mpool_vapi_module_init(mca_mpool_vapi_module_t* mpool);
void mca_mpool_mvapi_module_init(mca_mpool_mvapi_module_t* mpool);
/*
* Returns base address of shared memory mapping.
*/
void* mca_mpool_vapi_base(mca_mpool_base_module_t*);
void* mca_mpool_mvapi_base(mca_mpool_base_module_t*);
/**
* Allocate block of shared memory.
*/
void* mca_mpool_vapi_alloc(
void* mca_mpool_mvapi_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
@ -113,7 +113,7 @@ void* mca_mpool_vapi_alloc(
/**
* realloc function typedef
*/
void* mca_mpool_vapi_realloc(
void* mca_mpool_mvapi_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
@ -122,13 +122,13 @@ void* mca_mpool_vapi_realloc(
/**
* register function typedef
*/
int mca_mpool_vapi_register(
int mca_mpool_mvapi_register(
mca_mpool_base_module_t* mpool,
void *addr,
size_t size,
mca_mpool_base_registration_t** registration);
int mca_mpool_vapi_deregister(
int mca_mpool_mvapi_deregister(
mca_mpool_base_module_t* mpool,
void *addr,
size_t size,
@ -138,7 +138,7 @@ int mca_mpool_vapi_deregister(
/**
* free function typedef
*/
void mca_mpool_vapi_free(mca_mpool_base_module_t* mpool,
void mca_mpool_mvapi_free(mca_mpool_base_module_t* mpool,
void * addr,
mca_mpool_base_registration_t* registration);

Просмотреть файл

@ -19,7 +19,7 @@
#include "mca/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/allocator/base/base.h"
#include "mpool_vapi.h"
#include "mpool_mvapi.h"
#include "util/proc_info.h"
#include "util/sys_info.h"
#include <unistd.h>
@ -27,11 +27,11 @@
/*
* Local functions
*/
static int mca_mpool_vapi_open(void);
static mca_mpool_base_module_t* mca_mpool_vapi_init(
static int mca_mpool_mvapi_open(void);
static mca_mpool_base_module_t* mca_mpool_mvapi_init(
struct mca_mpool_base_resources_t* resources);
mca_mpool_vapi_component_t mca_mpool_vapi_component = {
mca_mpool_mvapi_component_t mca_mpool_mvapi_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
@ -46,7 +46,7 @@ mca_mpool_vapi_component_t mca_mpool_vapi_component = {
1, /* MCA component major version */
0, /* MCA component minor version */
0, /* MCA component release version */
mca_mpool_vapi_open, /* component open */
mca_mpool_mvapi_open, /* component open */
NULL
},
@ -57,37 +57,37 @@ mca_mpool_vapi_component_t mca_mpool_vapi_component = {
false
},
mca_mpool_vapi_init
mca_mpool_mvapi_init
}
};
static void mca_mpool_vapi_registration_constructor( mca_mpool_vapi_registration_t * registration )
static void mca_mpool_mvapi_registration_constructor( mca_mpool_mvapi_registration_t * registration )
{
registration->is_leave_pinned = false;
}
static void mca_mpool_vapi_registration_destructor( mca_mpool_vapi_registration_t * registration )
static void mca_mpool_mvapi_registration_destructor( mca_mpool_mvapi_registration_t * registration )
{
mca_mpool_base_remove((void*) registration);
registration->base_reg.base = NULL;
registration->base_reg.bound = NULL;
registration->is_leave_pinned=false;
}
OBJ_CLASS_INSTANCE(
mca_mpool_vapi_registration_t,
mca_mpool_mvapi_registration_t,
mca_mpool_base_registration_t,
mca_mpool_vapi_registration_constructor,
mca_mpool_vapi_registration_destructor
mca_mpool_mvapi_registration_constructor,
mca_mpool_mvapi_registration_destructor
);
static char* mca_mpool_vapi_param_register_string(
static char* mca_mpool_mvapi_param_register_string(
const char* param_name,
const char* default_value)
{
@ -101,14 +101,14 @@ static char* mca_mpool_vapi_param_register_string(
/**
* component open/close/init function
*/
static int mca_mpool_vapi_open(void)
static int mca_mpool_mvapi_open(void)
{
/* register VAPI component parameters */
mca_mpool_vapi_component.vapi_allocator_name =
mca_mpool_vapi_param_register_string("allocator", "bucket");
mca_mpool_mvapi_component.vapi_allocator_name =
mca_mpool_mvapi_param_register_string("allocator", "bucket");
/* get the page size for this architecture*/
mca_mpool_vapi_component.page_size = sysconf(_SC_PAGESIZE);
mca_mpool_mvapi_component.page_size = sysconf(_SC_PAGESIZE);
return OMPI_SUCCESS;
}
@ -119,8 +119,8 @@ void* mca_common_vapi_segment_alloc(
size_t* size,
mca_mpool_base_registration_t** registration)
{
void* addr_malloc = (void*)malloc((*size) + mca_mpool_vapi_component.page_size);
void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_vapi_component.page_size_log);
void* addr_malloc = (void*)malloc((*size) + mca_mpool_mvapi_component.page_size);
void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_mvapi_component.page_size_log);
if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, *size, registration)) {
free(addr_malloc);
return NULL;
@ -129,44 +129,44 @@ void* mca_common_vapi_segment_alloc(
}
/* Allocates a segment of memory and registers with IB, user_out returns the memory handle. */
static mca_mpool_base_module_t* mca_mpool_vapi_init(
static mca_mpool_base_module_t* mca_mpool_mvapi_init(
struct mca_mpool_base_resources_t* resources)
{
mca_mpool_vapi_module_t* mpool_module;
mca_mpool_mvapi_module_t* mpool_module;
mca_allocator_base_component_t* allocator_component;
long page_size = mca_mpool_vapi_component.page_size;
long page_size = mca_mpool_mvapi_component.page_size;
mca_mpool_vapi_component.page_size_log = 0;
mca_mpool_mvapi_component.page_size_log = 0;
while(page_size > 1){
page_size = page_size >> 1;
mca_mpool_vapi_component.page_size_log++;
mca_mpool_mvapi_component.page_size_log++;
}
/* if specified allocator cannout be loaded - look for an alternative */
allocator_component = mca_allocator_component_lookup(mca_mpool_vapi_component.vapi_allocator_name);
allocator_component = mca_allocator_component_lookup(mca_mpool_mvapi_component.vapi_allocator_name);
if(NULL == allocator_component) {
if(ompi_list_get_size(&mca_allocator_base_components) == 0) {
mca_base_component_list_item_t* item = (mca_base_component_list_item_t*)
ompi_list_get_first(&mca_allocator_base_components);
allocator_component = (mca_allocator_base_component_t*)item->cli_component;
ompi_output(0, "mca_mpool_vapi_init: unable to locate allocator: %s - using %s\n",
mca_mpool_vapi_component.vapi_allocator_name, allocator_component->allocator_version.mca_component_name);
ompi_output(0, "mca_mpool_mvapi_init: unable to locate allocator: %s - using %s\n",
mca_mpool_mvapi_component.vapi_allocator_name, allocator_component->allocator_version.mca_component_name);
} else {
ompi_output(0, "mca_mpool_vapi_init: unable to locate allocator: %s\n",
mca_mpool_vapi_component.vapi_allocator_name);
ompi_output(0, "mca_mpool_mvapi_init: unable to locate allocator: %s\n",
mca_mpool_mvapi_component.vapi_allocator_name);
return NULL;
}
}
mpool_module = (mca_mpool_vapi_module_t*)malloc(sizeof(mca_mpool_vapi_module_t));
mca_mpool_vapi_module_init(mpool_module);
mpool_module = (mca_mpool_mvapi_module_t*)malloc(sizeof(mca_mpool_mvapi_module_t));
mca_mpool_mvapi_module_init(mpool_module);
/* setup allocator TODO fix up */
mpool_module->hca_pd = *resources;
mpool_module->vapi_allocator =
allocator_component->allocator_init(true, mca_common_vapi_segment_alloc, NULL, &mpool_module->super);
if(NULL == mpool_module->vapi_allocator) {
ompi_output(0, "mca_mpool_vapi_init: unable to initialize allocator");
ompi_output(0, "mca_mpool_mvapi_init: unable to initialize allocator");
return NULL;
}
return &mpool_module->super;

Просмотреть файл

@ -17,22 +17,21 @@
#include "ompi_config.h"
#include <string.h>
#include "util/output.h"
#include "mca/mpool/vapi/mpool_vapi.h"
#include "mca/common/vapi/vapi_mem_reg.h"
#include "mca/mpool/mvapi/mpool_mvapi.h"
/*
* Initializes the mpool module.
*/
void mca_mpool_vapi_module_init(mca_mpool_vapi_module_t* mpool)
void mca_mpool_mvapi_module_init(mca_mpool_mvapi_module_t* mpool)
{
mpool->super.mpool_component = &mca_mpool_vapi_component.super;
mpool->super.mpool_component = &mca_mpool_mvapi_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_vapi_alloc;
mpool->super.mpool_realloc = mca_mpool_vapi_realloc;
mpool->super.mpool_free = mca_mpool_vapi_free;
mpool->super.mpool_register = mca_mpool_vapi_register;
mpool->super.mpool_deregister = mca_mpool_vapi_deregister;
mpool->super.mpool_alloc = mca_mpool_mvapi_alloc;
mpool->super.mpool_realloc = mca_mpool_mvapi_realloc;
mpool->super.mpool_free = mca_mpool_mvapi_free;
mpool->super.mpool_register = mca_mpool_mvapi_register;
mpool->super.mpool_deregister = mca_mpool_mvapi_deregister;
mpool->super.mpool_finalize = NULL;
}
@ -40,28 +39,28 @@ void mca_mpool_vapi_module_init(mca_mpool_vapi_module_t* mpool)
/**
* allocate function
*/
void* mca_mpool_vapi_alloc(
void* mca_mpool_mvapi_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
mca_mpool_base_registration_t** registration)
{
mca_mpool_vapi_module_t* mpool_vapi = (mca_mpool_vapi_module_t*)mpool;
return mpool_vapi->vapi_allocator->alc_alloc(mpool_vapi->vapi_allocator, size, align, registration);
mca_mpool_mvapi_module_t* mpool_mvapi = (mca_mpool_mvapi_module_t*)mpool;
return mpool_mvapi->vapi_allocator->alc_alloc(mpool_mvapi->vapi_allocator, size, align, registration);
}
/*
* register memory
*/
int mca_mpool_vapi_register(mca_mpool_base_module_t* mpool,
int mca_mpool_mvapi_register(mca_mpool_base_module_t* mpool,
void *addr,
size_t size,
mca_mpool_base_registration_t** registration){
mca_mpool_vapi_module_t * mpool_module = (mca_mpool_vapi_module_t*) mpool;
mca_mpool_vapi_registration_t * vapi_reg;
mca_mpool_mvapi_module_t * mpool_module = (mca_mpool_mvapi_module_t*) mpool;
mca_mpool_mvapi_registration_t * vapi_reg;
VAPI_mrw_t mr_in, mr_out;
VAPI_ret_t ret;
@ -69,8 +68,8 @@ int mca_mpool_vapi_register(mca_mpool_base_module_t* mpool,
memset(&mr_in, 0, sizeof(VAPI_mrw_t));
memset(&mr_out, 0, sizeof(VAPI_mrw_t));
*registration = (mca_mpool_base_registration_t*) OBJ_NEW(mca_mpool_vapi_registration_t); /* (void*) malloc(sizeof(mca_mpool_base_registration_t)); */
vapi_reg = (mca_mpool_vapi_registration_t*) *registration;
*registration = (mca_mpool_base_registration_t*) OBJ_NEW(mca_mpool_mvapi_registration_t); /* (void*) malloc(sizeof(mca_mpool_base_registration_t)); */
vapi_reg = (mca_mpool_mvapi_registration_t*) *registration;
vapi_reg->base_reg.mpool = mpool;
@ -110,15 +109,15 @@ int mca_mpool_vapi_register(mca_mpool_base_module_t* mpool,
/*
* deregister memory
*/
int mca_mpool_vapi_deregister(mca_mpool_base_module_t* mpool, void *addr, size_t size,
int mca_mpool_mvapi_deregister(mca_mpool_base_module_t* mpool, void *addr, size_t size,
mca_mpool_base_registration_t* registration){
VAPI_ret_t ret;
mca_mpool_vapi_module_t * mpool_vapi = (mca_mpool_vapi_module_t*) mpool;
mca_mpool_vapi_registration_t * vapi_reg;
vapi_reg = (mca_mpool_vapi_registration_t*) registration;
mca_mpool_mvapi_module_t * mpool_mvapi = (mca_mpool_mvapi_module_t*) mpool;
mca_mpool_mvapi_registration_t * vapi_reg;
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
ret = VAPI_deregister_mr(
mpool_vapi->hca_pd.hca,
mpool_mvapi->hca_pd.hca,
vapi_reg->hndl
);
@ -133,26 +132,26 @@ int mca_mpool_vapi_deregister(mca_mpool_base_module_t* mpool, void *addr, size_t
/**
* realloc function
*/
void* mca_mpool_vapi_realloc(
void* mca_mpool_mvapi_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration)
{
mca_mpool_vapi_module_t* mpool_vapi = (mca_mpool_vapi_module_t*)mpool;
return mpool_vapi->vapi_allocator->alc_realloc( mpool_vapi->vapi_allocator, addr, size, registration);
mca_mpool_mvapi_module_t* mpool_mvapi = (mca_mpool_mvapi_module_t*)mpool;
return mpool_mvapi->vapi_allocator->alc_realloc( mpool_mvapi->vapi_allocator, addr, size, registration);
}
/**
* free function
*/
void mca_mpool_vapi_free(mca_mpool_base_module_t* mpool, void * addr,
void mca_mpool_mvapi_free(mca_mpool_base_module_t* mpool, void * addr,
mca_mpool_base_registration_t* registration)
{
mca_mpool_vapi_module_t* mpool_vapi = (mca_mpool_vapi_module_t*)mpool;
mpool_vapi->super.mpool_deregister(mpool, addr, 0, registration);
mpool_vapi->vapi_allocator->alc_free(mpool_vapi->vapi_allocator, addr);
mca_mpool_mvapi_module_t* mpool_mvapi = (mca_mpool_mvapi_module_t*)mpool;
mpool_mvapi->super.mpool_deregister(mpool, addr, 0, registration);
mpool_mvapi->vapi_allocator->alc_free(mpool_mvapi->vapi_allocator, addr);
}

Просмотреть файл

@ -19,32 +19,32 @@
include $(top_ompi_srcdir)/config/Makefile.options
sources = \
mpool_vapi.h \
mpool_vapi_module.c \
mpool_vapi_component.c
mpool_openib.h \
mpool_openib_module.c \
mpool_openib_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_mpool_vapi_DSO
if OMPI_BUILD_mpool_openib_DSO
component_noinst =
component_install = mca_mpool_vapi.la
component_install = mca_mpool_openib.la
else
component_noinst = libmca_mpool_vapi.la
component_noinst = libmca_mpool_openib.la
component_install =
endif
# See src/mca/ptl/vapi/Makefile.am for an explanation of
# libmca_common_vapi.la.
# See src/mca/ptl/openib/Makefile.am for an explanation of
# libmca_common_openib.la.
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_vapi_la_SOURCES = $(sources)
mca_mpool_vapi_la_LDFLAGS = -module -avoid-version
mca_mpool_openib_la_SOURCES = $(sources)
mca_mpool_openib_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_vapi_la_SOURCES = $(sources)
libmca_mpool_vapi_la_LDFLAGS = -module -avoid-version
libmca_mpool_openib_la_SOURCES = $(sources)
libmca_mpool_openib_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -4,8 +4,6 @@
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004 The Ohio State University.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
@ -19,6 +17,6 @@
# Specific to this module
PARAM_INIT_FILE=src/ptl_ib.c
PARAM_CONFIG_HEADER_FILE="ptl_ib_config.h"
PARAM_INIT_FILE=mpool_openib_component.c
PARAM_CONFIG_HEADER_FILE="openib_config.h"
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -23,29 +23,29 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[
# Additional --with flags that can be specified
AC_ARG_WITH(btl-mvapi,
AC_HELP_STRING([--with-btl-mvapi=IBDIR],
AC_ARG_WITH(btl-openib,
AC_HELP_STRING([--with-btl-openib=IBDIR],
[Specify the installation directory of IB (should enable the correct automatic determination of using the 32 or 64 bit library, if both are present under IBDIR/lib and IBDIR/lib64)]))
AC_ARG_WITH(btl-mvapi-libdir,
AC_HELP_STRING([--with-btl-mvapi-libdir=IBLIBDIR],
AC_ARG_WITH(btl-openib-libdir,
AC_HELP_STRING([--with-btl-openib-libdir=IBLIBDIR],
[directory where the IB library can be found, if it is not in $IBDIR/lib or $IBDIR/lib64]))
# Quick sanity check
if test "$with_btl_mvapi" = "no"; then
AC_MSG_WARN([*** --without-btl-mvapi specified -- aborting])
if test "$with_btl_openib" = "no"; then
AC_MSG_WARN([*** --without-btl-openib specified -- aborting])
AC_MSG_ERROR([Will not continue])
fi
# Find the right IBDIR
if test "$with_btl_mvapi" != "" -a "$with_btl_mvapi" != "yes" ; then
IBDIR="$with_btl_mvapi"
IBLIBDIR="$with_btl_mvapi"
if test "$with_btl_openib" != "" -a "$with_btl_openib" != "yes" ; then
IBDIR="$with_btl_openib"
IBLIBDIR="$with_btl_openib"
fi
if test "$with_btl_mvapi_libdir" != "" -a "$with_btl_mvapi_libdir" != "yes" -a \
"$with_btl_mvapi_libdir" != "no"; then
IBLIBDIR="$with_btl_mvapi_libdir"
if test "$with_btl_openib_libdir" != "" -a "$with_btl_openib_libdir" != "yes" -a \
"$with_btl_openib_libdir" != "no"; then
IBLIBDIR="$with_btl_openib_libdir"
fi
# Add to CPPFLAGS if necessary
@ -92,7 +92,7 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[
mca_btl_mvapi_try_find_libvapi() {
mca_btl_openib_try_find_libvapi() {
func1=[$]1
func2=[$]2
@ -123,7 +123,7 @@ mca_btl_mvapi_try_find_libvapi() {
if test -d "$IBLIBDIR/lib"; then
EXTRA_LDFLAGS="-L$IBLIBDIR/lib"
LDFLAGS="$LDFLAGS $EXTRA_LDFLAGS"
mca_btl_mvapi_try_find_libvapi VAPI_open_hca VAPI_query_hca_cap
mca_btl_openib_try_find_libvapi VAPI_open_hca VAPI_query_hca_cap
if test "$LIBS" != "$LIBS_orig"; then
echo "--> found libvapi in $IBLIBDIR/lib"
fi
@ -132,7 +132,7 @@ mca_btl_mvapi_try_find_libvapi() {
if test "$LIBS" = "$LIBS_orig" -a -d "$IBLIBDIR/lib64"; then
EXTRA_LDFLAGS="-L$IBLIBDIR/lib64"
LDFLAGS="$LDFLAGS_save $EXTRA_LDFLAGS"
mca_btl_mvapi_try_find_libvapi EVAPI_list_hcas EVAPI_open_hca
mca_btl_openib_try_find_libvapi EVAPI_list_hcas EVAPI_open_hca
if test "$LIBS" != "$LIBS_orig"; then
echo "--> found libvapi in $IBLIBDIR/lib64"
fi

Просмотреть файл

@ -41,16 +41,16 @@ static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) {
}
struct mca_mpool_vapi_component_t {
struct mca_mpool_openib_component_t {
mca_mpool_base_component_t super;
char* vapi_allocator_name;
long page_size;
long page_size_log;
};
typedef struct mca_mpool_vapi_component_t mca_mpool_vapi_component_t;
typedef struct mca_mpool_openib_component_t mca_mpool_openib_component_t;
OMPI_COMP_EXPORT extern mca_mpool_vapi_component_t mca_mpool_vapi_component;
OMPI_COMP_EXPORT extern mca_mpool_openib_component_t mca_mpool_openib_component;
@ -62,14 +62,14 @@ struct mca_mpool_base_resources_t {
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_vapi_module_t {
struct mca_mpool_openib_module_t {
mca_mpool_base_module_t super;
mca_allocator_base_module_t * vapi_allocator;
struct mca_mpool_base_resources_t hca_pd;
}; typedef struct mca_mpool_vapi_module_t mca_mpool_vapi_module_t;
}; typedef struct mca_mpool_openib_module_t mca_mpool_openib_module_t;
struct mca_mpool_vapi_registration_t {
struct mca_mpool_openib_registration_t {
mca_mpool_base_registration_t base_reg;
VAPI_mr_hndl_t hndl;
/* Memory region handle */
@ -84,8 +84,8 @@ struct mca_mpool_vapi_registration_t {
bool is_leave_pinned;
};
typedef struct mca_mpool_vapi_registration_t mca_mpool_vapi_registration_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_vapi_registration_t);
typedef struct mca_mpool_openib_registration_t mca_mpool_openib_registration_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_openib_registration_t);
@ -93,18 +93,18 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_vapi_registration_t);
/*
* Initializes the mpool module.
*/
void mca_mpool_vapi_module_init(mca_mpool_vapi_module_t* mpool);
void mca_mpool_openib_module_init(mca_mpool_openib_module_t* mpool);
/*
* Returns base address of shared memory mapping.
*/
void* mca_mpool_vapi_base(mca_mpool_base_module_t*);
void* mca_mpool_openib_base(mca_mpool_base_module_t*);
/**
* Allocate block of shared memory.
*/
void* mca_mpool_vapi_alloc(
void* mca_mpool_openib_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
@ -113,7 +113,7 @@ void* mca_mpool_vapi_alloc(
/**
* realloc function typedef
*/
void* mca_mpool_vapi_realloc(
void* mca_mpool_openib_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
@ -122,13 +122,13 @@ void* mca_mpool_vapi_realloc(
/**
* register function typedef
*/
int mca_mpool_vapi_register(
int mca_mpool_openib_register(
mca_mpool_base_module_t* mpool,
void *addr,
size_t size,
mca_mpool_base_registration_t** registration);
int mca_mpool_vapi_deregister(
int mca_mpool_openib_deregister(
mca_mpool_base_module_t* mpool,
void *addr,
size_t size,
@ -138,7 +138,7 @@ int mca_mpool_vapi_deregister(
/**
* free function typedef
*/
void mca_mpool_vapi_free(mca_mpool_base_module_t* mpool,
void mca_mpool_openib_free(mca_mpool_base_module_t* mpool,
void * addr,
mca_mpool_base_registration_t* registration);

Просмотреть файл

@ -19,7 +19,7 @@
#include "mca/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/allocator/base/base.h"
#include "mpool_vapi.h"
#include "mpool_openib.h"
#include "util/proc_info.h"
#include "util/sys_info.h"
#include <unistd.h>
@ -27,11 +27,11 @@
/*
* Local functions
*/
static int mca_mpool_vapi_open(void);
static mca_mpool_base_module_t* mca_mpool_vapi_init(
static int mca_mpool_openib_open(void);
static mca_mpool_base_module_t* mca_mpool_openib_init(
struct mca_mpool_base_resources_t* resources);
mca_mpool_vapi_component_t mca_mpool_vapi_component = {
mca_mpool_openib_component_t mca_mpool_openib_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
@ -46,7 +46,7 @@ mca_mpool_vapi_component_t mca_mpool_vapi_component = {
1, /* MCA component major version */
0, /* MCA component minor version */
0, /* MCA component release version */
mca_mpool_vapi_open, /* component open */
mca_mpool_openib_open, /* component open */
NULL
},
@ -57,18 +57,18 @@ mca_mpool_vapi_component_t mca_mpool_vapi_component = {
false
},
mca_mpool_vapi_init
mca_mpool_openib_init
}
};
static void mca_mpool_vapi_registration_constructor( mca_mpool_vapi_registration_t * registration )
static void mca_mpool_openib_registration_constructor( mca_mpool_openib_registration_t * registration )
{
registration->is_leave_pinned = false;
}
static void mca_mpool_vapi_registration_destructor( mca_mpool_vapi_registration_t * registration )
static void mca_mpool_openib_registration_destructor( mca_mpool_openib_registration_t * registration )
{
mca_mpool_base_remove((void*) registration);
registration->base_reg.mpool->mpool_deregister(
@ -85,16 +85,16 @@ static void mca_mpool_vapi_registration_destructor( mca_mpool_vapi_registration_
OBJ_CLASS_INSTANCE(
mca_mpool_vapi_registration_t,
mca_mpool_openib_registration_t,
mca_mpool_base_registration_t,
mca_mpool_vapi_registration_constructor,
mca_mpool_vapi_registration_destructor
mca_mpool_openib_registration_constructor,
mca_mpool_openib_registration_destructor
);
static char* mca_mpool_vapi_param_register_string(
static char* mca_mpool_openib_param_register_string(
const char* param_name,
const char* default_value)
{
@ -108,14 +108,14 @@ static char* mca_mpool_vapi_param_register_string(
/**
* component open/close/init function
*/
static int mca_mpool_vapi_open(void)
static int mca_mpool_openib_open(void)
{
/* register VAPI component parameters */
mca_mpool_vapi_component.vapi_allocator_name =
mca_mpool_vapi_param_register_string("allocator", "bucket");
mca_mpool_openib_component.vapi_allocator_name =
mca_mpool_openib_param_register_string("allocator", "bucket");
/* get the page size for this architecture*/
mca_mpool_vapi_component.page_size = sysconf(_SC_PAGESIZE);
mca_mpool_openib_component.page_size = sysconf(_SC_PAGESIZE);
return OMPI_SUCCESS;
}
@ -126,8 +126,8 @@ void* mca_common_vapi_segment_alloc(
size_t* size,
mca_mpool_base_registration_t** registration)
{
void* addr_malloc = (void*)malloc((*size) + mca_mpool_vapi_component.page_size);
void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_vapi_component.page_size_log);
void* addr_malloc = (void*)malloc((*size) + mca_mpool_openib_component.page_size);
void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_openib_component.page_size_log);
if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, *size, registration)) {
free(addr_malloc);
return NULL;
@ -136,44 +136,44 @@ void* mca_common_vapi_segment_alloc(
}
/* Allocates a segment of memory and registers with IB, user_out returns the memory handle. */
static mca_mpool_base_module_t* mca_mpool_vapi_init(
static mca_mpool_base_module_t* mca_mpool_openib_init(
struct mca_mpool_base_resources_t* resources)
{
mca_mpool_vapi_module_t* mpool_module;
mca_mpool_openib_module_t* mpool_module;
mca_allocator_base_component_t* allocator_component;
long page_size = mca_mpool_vapi_component.page_size;
long page_size = mca_mpool_openib_component.page_size;
mca_mpool_vapi_component.page_size_log = 0;
mca_mpool_openib_component.page_size_log = 0;
while(page_size > 1){
page_size = page_size >> 1;
mca_mpool_vapi_component.page_size_log++;
mca_mpool_openib_component.page_size_log++;
}
/* if specified allocator cannout be loaded - look for an alternative */
allocator_component = mca_allocator_component_lookup(mca_mpool_vapi_component.vapi_allocator_name);
allocator_component = mca_allocator_component_lookup(mca_mpool_openib_component.vapi_allocator_name);
if(NULL == allocator_component) {
if(ompi_list_get_size(&mca_allocator_base_components) == 0) {
mca_base_component_list_item_t* item = (mca_base_component_list_item_t*)
ompi_list_get_first(&mca_allocator_base_components);
allocator_component = (mca_allocator_base_component_t*)item->cli_component;
ompi_output(0, "mca_mpool_vapi_init: unable to locate allocator: %s - using %s\n",
mca_mpool_vapi_component.vapi_allocator_name, allocator_component->allocator_version.mca_component_name);
ompi_output(0, "mca_mpool_openib_init: unable to locate allocator: %s - using %s\n",
mca_mpool_openib_component.vapi_allocator_name, allocator_component->allocator_version.mca_component_name);
} else {
ompi_output(0, "mca_mpool_vapi_init: unable to locate allocator: %s\n",
mca_mpool_vapi_component.vapi_allocator_name);
ompi_output(0, "mca_mpool_openib_init: unable to locate allocator: %s\n",
mca_mpool_openib_component.vapi_allocator_name);
return NULL;
}
}
mpool_module = (mca_mpool_vapi_module_t*)malloc(sizeof(mca_mpool_vapi_module_t));
mca_mpool_vapi_module_init(mpool_module);
mpool_module = (mca_mpool_openib_module_t*)malloc(sizeof(mca_mpool_openib_module_t));
mca_mpool_openib_module_init(mpool_module);
/* setup allocator TODO fix up */
mpool_module->hca_pd = *resources;
mpool_module->vapi_allocator =
allocator_component->allocator_init(true, mca_common_vapi_segment_alloc, NULL, &mpool_module->super);
if(NULL == mpool_module->vapi_allocator) {
ompi_output(0, "mca_mpool_vapi_init: unable to initialize allocator");
ompi_output(0, "mca_mpool_openib_init: unable to initialize allocator");
return NULL;
}
return &mpool_module->super;

Просмотреть файл

@ -17,22 +17,21 @@
#include "ompi_config.h"
#include <string.h>
#include "util/output.h"
#include "mca/mpool/vapi/mpool_vapi.h"
#include "mca/common/vapi/vapi_mem_reg.h"
#include "mca/mpool/mvapi/mpool_openib.h"
/*
* Initializes the mpool module.
*/
void mca_mpool_vapi_module_init(mca_mpool_vapi_module_t* mpool)
void mca_mpool_openib_module_init(mca_mpool_openib_module_t* mpool)
{
mpool->super.mpool_component = &mca_mpool_vapi_component.super;
mpool->super.mpool_component = &mca_mpool_openib_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_vapi_alloc;
mpool->super.mpool_realloc = mca_mpool_vapi_realloc;
mpool->super.mpool_free = mca_mpool_vapi_free;
mpool->super.mpool_register = mca_mpool_vapi_register;
mpool->super.mpool_deregister = mca_mpool_vapi_deregister;
mpool->super.mpool_alloc = mca_mpool_openib_alloc;
mpool->super.mpool_realloc = mca_mpool_openib_realloc;
mpool->super.mpool_free = mca_mpool_openib_free;
mpool->super.mpool_register = mca_mpool_openib_register;
mpool->super.mpool_deregister = mca_mpool_openib_deregister;
mpool->super.mpool_finalize = NULL;
}
@ -40,28 +39,28 @@ void mca_mpool_vapi_module_init(mca_mpool_vapi_module_t* mpool)
/**
* allocate function
*/
void* mca_mpool_vapi_alloc(
void* mca_mpool_openib_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
mca_mpool_base_registration_t** registration)
{
mca_mpool_vapi_module_t* mpool_vapi = (mca_mpool_vapi_module_t*)mpool;
return mpool_vapi->vapi_allocator->alc_alloc(mpool_vapi->vapi_allocator, size, align, registration);
mca_mpool_openib_module_t* mpool_openib = (mca_mpool_openib_module_t*)mpool;
return mpool_openib->vapi_allocator->alc_alloc(mpool_openib->vapi_allocator, size, align, registration);
}
/*
* register memory
*/
int mca_mpool_vapi_register(mca_mpool_base_module_t* mpool,
int mca_mpool_openib_register(mca_mpool_base_module_t* mpool,
void *addr,
size_t size,
mca_mpool_base_registration_t** registration){
mca_mpool_vapi_module_t * mpool_module = (mca_mpool_vapi_module_t*) mpool;
mca_mpool_vapi_registration_t * vapi_reg;
mca_mpool_openib_module_t * mpool_module = (mca_mpool_openib_module_t*) mpool;
mca_mpool_openib_registration_t * vapi_reg;
VAPI_mrw_t mr_in, mr_out;
VAPI_ret_t ret;
@ -69,8 +68,8 @@ int mca_mpool_vapi_register(mca_mpool_base_module_t* mpool,
memset(&mr_in, 0, sizeof(VAPI_mrw_t));
memset(&mr_out, 0, sizeof(VAPI_mrw_t));
*registration = (mca_mpool_base_registration_t*) OBJ_NEW(mca_mpool_vapi_registration_t); /* (void*) malloc(sizeof(mca_mpool_base_registration_t)); */
vapi_reg = (mca_mpool_vapi_registration_t*) *registration;
*registration = (mca_mpool_base_registration_t*) OBJ_NEW(mca_mpool_openib_registration_t); /* (void*) malloc(sizeof(mca_mpool_base_registration_t)); */
vapi_reg = (mca_mpool_openib_registration_t*) *registration;
vapi_reg->base_reg.mpool = mpool;
@ -110,15 +109,15 @@ int mca_mpool_vapi_register(mca_mpool_base_module_t* mpool,
/*
* deregister memory
*/
int mca_mpool_vapi_deregister(mca_mpool_base_module_t* mpool, void *addr, size_t size,
int mca_mpool_openib_deregister(mca_mpool_base_module_t* mpool, void *addr, size_t size,
mca_mpool_base_registration_t* registration){
VAPI_ret_t ret;
mca_mpool_vapi_module_t * mpool_vapi = (mca_mpool_vapi_module_t*) mpool;
mca_mpool_vapi_registration_t * vapi_reg;
vapi_reg = (mca_mpool_vapi_registration_t*) registration;
mca_mpool_openib_module_t * mpool_openib = (mca_mpool_openib_module_t*) mpool;
mca_mpool_openib_registration_t * vapi_reg;
vapi_reg = (mca_mpool_openib_registration_t*) registration;
ret = VAPI_deregister_mr(
mpool_vapi->hca_pd.hca,
mpool_openib->hca_pd.hca,
vapi_reg->hndl
);
@ -133,26 +132,26 @@ int mca_mpool_vapi_deregister(mca_mpool_base_module_t* mpool, void *addr, size_t
/**
* realloc function
*/
void* mca_mpool_vapi_realloc(
void* mca_mpool_openib_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration)
{
mca_mpool_vapi_module_t* mpool_vapi = (mca_mpool_vapi_module_t*)mpool;
return mpool_vapi->vapi_allocator->alc_realloc( mpool_vapi->vapi_allocator, addr, size, registration);
mca_mpool_openib_module_t* mpool_openib = (mca_mpool_openib_module_t*)mpool;
return mpool_openib->vapi_allocator->alc_realloc( mpool_openib->vapi_allocator, addr, size, registration);
}
/**
* free function
*/
void mca_mpool_vapi_free(mca_mpool_base_module_t* mpool, void * addr,
void mca_mpool_openib_free(mca_mpool_base_module_t* mpool, void * addr,
mca_mpool_base_registration_t* registration)
{
mca_mpool_vapi_module_t* mpool_vapi = (mca_mpool_vapi_module_t*)mpool;
mpool_vapi->super.mpool_deregister(mpool, addr, 0, registration);
mpool_vapi->vapi_allocator->alc_free(mpool_vapi->vapi_allocator, addr);
mca_mpool_openib_module_t* mpool_openib = (mca_mpool_openib_module_t*)mpool;
mpool_openib->super.mpool_deregister(mpool, addr, 0, registration);
mpool_openib->vapi_allocator->alc_free(mpool_openib->vapi_allocator, addr);
}

96
src/mca/mpool/openib/mvapi_config.h Обычный файл
Просмотреть файл

@ -0,0 +1,96 @@
/* mvapi_config.h. Generated by configure. */
/* mvapi_config.h.in. Generated from configure.ac by autoheader. */
/* -*- c -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* Open MPI configuation header file.
* MCA mpool: mvapi component
*/
#ifndef MCA_mpool_openib_CONFIG_H
#define MCA_mpool_openib_CONFIG_H
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `vapi' library (-lvapi). */
#define HAVE_LIBVAPI 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if you have the <vapi.h> header file. */
#define HAVE_VAPI_H 1
/* Whether the mvapi mpool is the default mpool or not */
#define MCA_mpool_openib_DEFAULT 0
/* OMPI architecture string */
#define OMPI_ARCH "x86_64-unknown-linux-gnu"
/* OMPI underlying C compiler */
#define OMPI_CC "gcc"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME ""
/* Define to the full name and version of this package. */
#define PACKAGE_STRING ""
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME ""
/* Define to the version of this package. */
#define PACKAGE_VERSION ""
/* Define to 1 if you have the ANSI C header files. */
/* #undef STDC_HEADERS */
/* Enable GNU extensions on systems that have them. */
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1
#endif
#endif /* _MCA_mpool_openib_CONFIG_H */

95
src/mca/mpool/openib/mvapi_config.h.in Обычный файл
Просмотреть файл

@ -0,0 +1,95 @@
/* mvapi_config.h.in. Generated from configure.ac by autoheader. */
/* -*- c -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* Open MPI configuation header file.
* MCA mpool: mvapi component
*/
#ifndef MCA_mpool_openib_CONFIG_H
#define MCA_mpool_openib_CONFIG_H
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `vapi' library (-lvapi). */
#undef HAVE_LIBVAPI
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if you have the <vapi.h> header file. */
#undef HAVE_VAPI_H
/* Whether the mvapi mpool is the default mpool or not */
#undef MCA_mpool_openib_DEFAULT
/* OMPI architecture string */
#undef OMPI_ARCH
/* OMPI underlying C compiler */
#undef OMPI_CC
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Enable GNU extensions on systems that have them. */
#ifndef _GNU_SOURCE
# undef _GNU_SOURCE
#endif
#endif /* _MCA_mpool_openib_CONFIG_H */

Просмотреть файл

Просмотреть файл

@ -1,3 +0,0 @@
ompi
gshipman
twoodall

Просмотреть файл

Просмотреть файл

@ -1,4 +0,0 @@
mitch
spoole
mlleinin
rlgraham

Просмотреть файл

@ -1,39 +0,0 @@
# -*- makefile -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004 The Ohio State University.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources += \
src/ptl_ib.c \
src/ptl_ib.h \
src/ptl_ib_addr.h \
src/ptl_ib_component.c \
src/ptl_ib_proc.c \
src/ptl_ib_proc.h \
src/ptl_ib_priv.c \
src/ptl_ib_priv.h \
src/ptl_ib_peer.c \
src/ptl_ib_peer.h \
src/ptl_ib_recvfrag.c \
src/ptl_ib_recvfrag.h \
src/ptl_ib_sendfrag.c \
src/ptl_ib_sendfrag.h \
src/ptl_ib_sendreq.c \
src/ptl_ib_sendreq.h \
src/ptl_ib_vapi.h \
src/ptl_ib_memory.c \
src/ptl_ib_memory.h

Просмотреть файл

@ -1,437 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "util/output.h"
#include "util/if.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/ptl_base_header.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "mca/ptl/base/ptl_base_sendfrag.h"
#include "mca/pml/base/pml_base_recvreq.h"
#include "mca/ptl/base/ptl_base_recvfrag.h"
#include "mca/base/mca_base_module_exchange.h"
#include "ptl_ib.h"
mca_ptl_ib_module_t mca_ptl_ib_module = {
{
&mca_ptl_ib_component.super,
1, /* max size of request cache */
sizeof(mca_ptl_ib_send_frag_t), /* bytes required by ptl for a request */
0, /* max size of first fragment */
0, /* min fragment size */
0, /* max fragment size */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
MCA_PTL_PUT, /* ptl flags */
mca_ptl_ib_add_procs,
mca_ptl_ib_del_procs,
mca_ptl_ib_finalize,
mca_ptl_ib_send,
mca_ptl_ib_put,
NULL,
mca_ptl_ib_matched,
mca_ptl_ib_request_init,
mca_ptl_ib_request_fini,
NULL,
NULL,
NULL
}
};
int mca_ptl_ib_add_procs(
struct mca_ptl_base_module_t* ptl,
size_t nprocs,
struct ompi_proc_t **ompi_procs,
struct mca_ptl_base_peer_t** peers,
ompi_bitmap_t* reachable)
{
mca_ptl_ib_module_t* ib_ptl = (mca_ptl_ib_module_t*)ptl;
int i, rc;
for(i = 0; i < nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_ptl_ib_proc_t* ib_proc;
mca_ptl_base_peer_t* ib_peer;
if(NULL == (ib_proc = mca_ptl_ib_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
* Check to make sure that the peer has at least as many interface
* addresses exported as we are trying to use. If not, then
* don't bind this PTL instance to the proc.
*/
OMPI_THREAD_LOCK(&ib_proc->proc_lock);
/* The ptl_proc datastructure is shared by all IB PTL
* instances that are trying to reach this destination.
* Cache the peer instance on the ptl_proc.
*/
ib_peer = OBJ_NEW(mca_ptl_ib_peer_t);
if(NULL == ib_peer) {
OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
ib_peer->peer_ptl = ib_ptl;
rc = mca_ptl_ib_proc_insert(ib_proc, ib_peer);
if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(ib_peer);
OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
continue;
}
ompi_bitmap_set_bit(reachable, i);
OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
peers[i] = ib_peer;
}
return OMPI_SUCCESS;
}
int mca_ptl_ib_del_procs(struct mca_ptl_base_module_t* ptl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t ** peers)
{
/* Stub */
D_PRINT("Stub\n");
return OMPI_SUCCESS;
}
int mca_ptl_ib_finalize(struct mca_ptl_base_module_t* ptl)
{
/* Stub */
D_PRINT("Stub\n");
return OMPI_SUCCESS;
}
int mca_ptl_ib_request_init( struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_send_request_t* request)
{
mca_ptl_ib_module_t* ib_ptl = (mca_ptl_ib_module_t*)ptl;
mca_ptl_ib_send_frag_t* sendfrag;
ompi_list_item_t* item;
int rc;
OMPI_FREE_LIST_GET(&ib_ptl->send_free, item, rc);
if(NULL == (sendfrag = (mca_ptl_ib_send_frag_t*)item)) {
return rc;
}
((mca_ptl_ib_send_request_t*) request)->req_frag = sendfrag;
return OMPI_SUCCESS;
}
void mca_ptl_ib_request_fini( struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_send_request_t* request)
{
mca_ptl_ib_module_t* ib_ptl = (mca_ptl_ib_module_t*)ptl;
mca_ptl_ib_send_request_t* sendreq = (mca_ptl_ib_send_request_t*)request;
OMPI_FREE_LIST_RETURN(&ib_ptl->send_free, (ompi_list_item_t*)sendreq->req_frag);
}
/*
* Initiate a send. If this is the first fragment, use the fragment
* descriptor allocated with the send requests, otherwise obtain
* one from the free list. Initialize the fragment and foward
* on to the peer.
*/
int mca_ptl_ib_send( struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t* ptl_peer,
struct mca_ptl_base_send_request_t* sendreq,
size_t offset,
size_t size,
int flags)
{
mca_ptl_ib_module_t* ib_ptl = (mca_ptl_ib_module_t*)ptl;
mca_ptl_ib_send_frag_t* sendfrag;
mca_ptl_base_header_t *hdr;
size_t hdr_length;
int rc = OMPI_SUCCESS;
if(sendreq->req_cached) {
sendfrag = ((mca_ptl_ib_send_request_t*)sendreq)->req_frag;
} else {
ompi_list_item_t* item;
OMPI_FREE_LIST_GET(&ib_ptl->send_free, item, rc);
if(NULL == (sendfrag = (mca_ptl_ib_send_frag_t*)item)) {
return rc;
}
}
/* initialize convertor */
if(size > 0) {
ompi_convertor_t *convertor;
int rc, freeAfter;
unsigned int iov_count, max_data;
struct iovec iov;
/* first fragment (eager send) and first fragment of long
* protocol can use the convertor initialized on the request,
* remaining fragments must copy/reinit the convertor as the
* transfer could be in parallel.
*/
if( offset <= mca_ptl_ib_module.super.ptl_first_frag_size ) {
convertor = &sendreq->req_send.req_convertor;
} else {
convertor = &sendfrag->frag_send.frag_base.frag_convertor;
ompi_convertor_copy(&sendreq->req_send.req_convertor, convertor);
ompi_convertor_init_for_send( convertor,
0,
sendreq->req_send.req_base.req_datatype,
sendreq->req_send.req_base.req_count,
sendreq->req_send.req_base.req_addr,
offset,
NULL );
}
/* if data is contigous, convertor will return an offset
* into users buffer - otherwise will return an allocated buffer
* that holds the packed data
*/
if((flags & MCA_PTL_FLAGS_ACK) == 0) {
iov.iov_base = &sendfrag->ib_buf.buf[sizeof(mca_ptl_base_match_header_t)];
} else {
iov.iov_base = &sendfrag->ib_buf.buf[sizeof(mca_ptl_base_rendezvous_header_t)];
}
iov.iov_len = size;
iov_count = 1;
max_data = size;
if((rc = ompi_convertor_pack(convertor,&iov, &iov_count, &max_data, &freeAfter)) < 0) {
ompi_output(0, "Unable to pack data");
return rc;
}
/* adjust size to reflect actual number of bytes packed by convertor */
size = iov.iov_len;
sendfrag->frag_send.frag_base.frag_addr = iov.iov_base;
sendfrag->frag_send.frag_base.frag_size = iov.iov_len;
} else {
sendfrag->frag_send.frag_base.frag_addr = NULL;
sendfrag->frag_send.frag_base.frag_size = 0;
}
/* fragment state */
sendfrag->frag_send.frag_base.frag_owner = &ptl_peer->peer_ptl->super;
sendfrag->frag_send.frag_request = sendreq;
sendfrag->frag_send.frag_base.frag_peer = ptl_peer;
sendfrag->frag_progressed = 0;
/* Initialize header */
hdr = (mca_ptl_base_header_t *) &sendfrag->ib_buf.buf[0];
hdr->hdr_common.hdr_flags = flags;
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
if((flags & MCA_PTL_FLAGS_ACK) == 0) {
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH;
hdr_length = sizeof(mca_ptl_base_match_header_t);
} else {
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH;
hdr->hdr_rndv.hdr_frag_length = sendfrag->frag_send.frag_base.frag_size;
hdr->hdr_rndv.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_rndv.hdr_src_ptr.pval = sendfrag;
hdr_length = sizeof(mca_ptl_base_rendezvous_header_t);
}
/* Update the offset after actual fragment size is determined,
* and before attempting to send the fragment */
sendreq->req_offset += size;
IB_SET_SEND_DESC_LEN((&sendfrag->ib_buf), (hdr_length + size));
if(OMPI_SUCCESS != (rc = mca_ptl_ib_peer_send(ptl_peer, sendfrag))) {
return rc;
}
/* if this is the entire message - signal request is complete */
if(sendreq->req_send.req_bytes_packed == size) {
ompi_request_complete( &(sendreq->req_send.req_base.req_ompi) );
}
return OMPI_SUCCESS;
}
/*
* RDMA local buffer to remote buffer address.
*/
int mca_ptl_ib_put( struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t* ptl_peer,
struct mca_ptl_base_send_request_t* req, size_t offset,
size_t size, int flags)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
/*
* On a match send an ack to the peer.
*/
static void mca_ptl_ib_ack(
mca_ptl_ib_module_t *ib_ptl,
mca_ptl_ib_send_frag_t *send_frag,
mca_ptl_ib_recv_frag_t *recv_frag)
{
mca_ptl_base_header_t *hdr;
mca_ptl_base_recv_request_t *request;
mca_ptl_ib_peer_t *ib_peer;
ib_buffer_t *ib_buf;
int recv_len;
int len_to_reg, len_added = 0;
void *addr_to_reg, *ack_buf;
/* Header starts at beginning of registered
* buffer space */
hdr = (mca_ptl_base_header_t *)
&send_frag->ib_buf.buf[0];
request = recv_frag->super.frag_request;
/* Amount of data we have already received */
recv_len =
recv_frag->super.frag_base.frag_header.hdr_rndv.hdr_frag_length;
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK;
hdr->hdr_common.hdr_flags = 0;
/* Remote side send descriptor */
hdr->hdr_ack.hdr_src_ptr =
recv_frag->super.frag_base.frag_header.hdr_rndv.hdr_src_ptr;
/* Matched request from recv side */
hdr->hdr_ack.hdr_dst_match.lval = 0;
hdr->hdr_ack.hdr_dst_match.pval = request;
hdr->hdr_ack.hdr_dst_addr.lval = 0;
addr_to_reg = (void*)((char*)request->req_recv.req_base.req_addr + recv_len);
hdr->hdr_ack.hdr_dst_addr.pval = addr_to_reg;
len_to_reg = request->req_recv.req_bytes_packed - recv_len;
hdr->hdr_ack.hdr_dst_size = len_to_reg;
A_PRINT("Dest addr : %p, RDMA Len : %d",
hdr->hdr_ack.hdr_dst_addr.pval,
hdr->hdr_ack.hdr_dst_size);
ack_buf = (void*) ((char*) (&send_frag->ib_buf.buf[0]) +
sizeof(mca_ptl_base_ack_header_t));
/* Prepare ACK packet with IB specific stuff */
mca_ptl_ib_prepare_ack(ib_ptl, addr_to_reg, len_to_reg,
ack_buf, &len_added);
/* Send it right away! */
ib_peer = (mca_ptl_ib_peer_t *)
recv_frag->super.frag_base.frag_peer;
ib_buf = &send_frag->ib_buf;
IB_SET_SEND_DESC_LEN(ib_buf,
(sizeof(mca_ptl_base_ack_header_t) + len_added));
mca_ptl_ib_post_send(ib_ptl, ib_peer, &send_frag->ib_buf, send_frag);
/* fragment state */
send_frag->frag_send.frag_base.frag_owner = &ib_ptl->super;
send_frag->frag_send.frag_base.frag_peer = recv_frag->super.frag_base.frag_peer;
send_frag->frag_send.frag_base.frag_addr = NULL;
send_frag->frag_send.frag_base.frag_size = 0;
}
/*
* A posted receive has been matched - if required send an
* ack back to the peer and process the fragment. Copy the
* data to user buffer
*/
void mca_ptl_ib_matched(
mca_ptl_base_module_t* ptl,
mca_ptl_base_recv_frag_t* frag)
{
mca_ptl_ib_module_t* ib_ptl = (mca_ptl_ib_module_t*)ptl;
mca_ptl_base_recv_request_t *request;
mca_ptl_base_header_t *header;
mca_ptl_ib_recv_frag_t *recv_frag;
header = &frag->frag_base.frag_header;
request = frag->frag_request;
recv_frag = (mca_ptl_ib_recv_frag_t*) frag;
D_PRINT("Matched frag\n");
if (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK) {
mca_ptl_ib_send_frag_t *send_frag;
send_frag = mca_ptl_ib_alloc_send_frag(ib_ptl, NULL);
if(NULL == send_frag) {
ompi_output(0, "Cannot get send descriptor");
} else {
mca_ptl_ib_ack(ib_ptl, send_frag, recv_frag);
}
}
/* Process the fragment */
/* IN TCP case, IO_VEC is first allocated.
* then recv the data, and copy if needed,
* But in ELAN cases, we save the data into an
* unex buffer if the recv descriptor is not posted
* (for too long) (TODO).
* We then need to copy from
* unex_buffer to application buffer */
if ((header->hdr_common.hdr_type & MCA_PTL_HDR_TYPE_MATCH) &&
(header->hdr_match.hdr_msg_length > 0)) {
struct iovec iov;
ompi_proc_t *proc;
unsigned int iov_count, max_data;
int freeAfter;
iov.iov_base = frag->frag_base.frag_addr;
iov.iov_len = frag->frag_base.frag_size;
proc = ompi_comm_peer_lookup(request->req_recv.req_base.req_comm,
request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE);
ompi_convertor_copy(proc->proc_convertor, &frag->frag_base.frag_convertor);
ompi_convertor_init_for_recv( &frag->frag_base.frag_convertor,
0,
request->req_recv.req_base.req_datatype,
request->req_recv.req_base.req_count,
request->req_recv.req_base.req_addr,
0, /* fragment offset */
NULL );
ompi_convertor_unpack(&frag->frag_base.frag_convertor, &iov, &iov_count, &max_data, &freeAfter);
}
mca_ptl_ib_recv_frag_done(header, frag, request);
}

Просмотреть файл

@ -1,346 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_IB_H
#define MCA_PTL_IB_H
/* Standard system includes */
#include <sys/types.h>
#include <string.h>
/* Open MPI includes */
#include "class/ompi_free_list.h"
#include "class/ompi_bitmap.h"
#include "event/event.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "util/output.h"
/* InfiniBand VAPI includes */
#include "ptl_ib_vapi.h"
#include "ptl_ib_addr.h"
#include "ptl_ib_proc.h"
#include "ptl_ib_peer.h"
#include "ptl_ib_priv.h"
/* Other IB ptl includes */
#include "ptl_ib_sendreq.h"
#include "ptl_ib_recvfrag.h"
#include "ptl_ib_sendfrag.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* IB PTL component.
*/
struct mca_ptl_ib_component_t {
mca_ptl_base_component_1_0_0_t super;
uint32_t ib_num_ptls;
/**< number of hcas available to the IB component */
struct mca_ptl_ib_module_t *ib_ptls;
/**< array of available PTLs */
int ib_free_list_num;
/**< initial size of free lists */
int ib_free_list_max;
/**< maximum size of free lists */
int ib_free_list_inc;
/**< number of elements to alloc when growing free lists */
ompi_free_list_t ib_send_requests;
/**< free list of ib send requests -- sendreq + IB */
ompi_free_list_t ib_send_frags;
/**< free list of ib send fragments */
ompi_free_list_t ib_recv_frags;
/**< free list of ib recv fragments */
ompi_list_t ib_procs;
/**< list of ib proc structures */
ompi_event_t ib_send_event;
/**< event structure for sends */
ompi_event_t ib_recv_event;
/**< event structure for recvs */
ompi_mutex_t ib_lock;
/**< lock for accessing module state */
int ib_mem_registry_hints_log_size;
/**< log2 size of hints hash array used by memory registry */
};
typedef struct mca_ptl_ib_component_t mca_ptl_ib_component_t;
struct mca_ptl_ib_recv_frag_t;
extern mca_ptl_ib_component_t mca_ptl_ib_component;
/**
* IB PTL Interface
*/
struct mca_ptl_ib_module_t {
mca_ptl_base_module_t super; /**< base PTL interface */
VAPI_hca_id_t hca_id; /**< ID of HCA */
VAPI_hca_port_t port; /**< IB port of this PTL */
VAPI_hca_hndl_t nic; /**< NIC handle */
VAPI_pd_hndl_t ptag; /**< Protection Domain tag */
VAPI_cq_hndl_t cq_hndl; /**< Completion Queue handle */
EVAPI_async_handler_hndl_t async_handler;
/**< Async event handler used to detect weird/unknown events */
mca_ptl_ib_mem_registry_t mem_registry; /**< registry of memory regions */
ompi_free_list_t send_free; /**< free list of send buffer descriptors */
ompi_list_t repost; /**< list of buffers to repost */
};
typedef struct mca_ptl_ib_module_t mca_ptl_ib_module_t;
extern mca_ptl_ib_module_t mca_ptl_ib_module;
/**
* IB FIN header
*/
typedef struct mca_ptl_ib_fin_header_t mca_ptl_ib_fin_header_t;
struct mca_ptl_ib_fin_header_t {
mca_ptl_base_frag_header_t frag_hdr;
ompi_ptr_t mr_addr;
uint64_t mr_size;
};
/**
* Register IB component parameters with the MCA framework
*/
extern int mca_ptl_ib_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_ptl_ib_component_close(void);
/**
* IB component initialization.
*
* @param num_ptl_modules (OUT) Number of PTLs returned in PTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether PTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether PTL uses threads (TRUE)
*
* (1) read interface list from kernel and compare against component parameters
* then create a PTL instance for selected interfaces
* (2) setup IB listen socket for incoming connection attempts
* (3) publish PTL addressing info
*
*/
extern mca_ptl_base_module_t** mca_ptl_ib_component_init(
int *num_ptl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
* IB component control.
*/
extern int mca_ptl_ib_component_control(
int param,
void* value,
size_t size
);
/**
* IB component progress.
*/
extern int mca_ptl_ib_component_progress(
mca_ptl_tstamp_t tstamp
);
/**
* Cleanup any resources held by the PTL.
*
* @param ptl PTL instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_ptl_ib_finalize(
struct mca_ptl_base_module_t* ptl
);
/**
* PML->PTL notification of change in the process list.
*
* @param ptl (IN)
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param peers (OUT) Set of (optional) peer addressing info.
* @param peers (IN/OUT) Set of processes that are reachable via this PTL.
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_ptl_ib_add_procs(
struct mca_ptl_base_module_t* ptl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t** peers,
ompi_bitmap_t* reachable
);
/**
* PML->PTL notification of change in the process list.
*
* @param ptl (IN) PTL instance
* @param nproc (IN) Number of processes.
* @param procs (IN) Set of processes.
* @param peers (IN) Set of peer data structures.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_ptl_ib_del_procs(
struct mca_ptl_base_module_t* ptl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t** peers
);
/**
* PML->PTL Initialize a send request for TCP cache.
*
* @param ptl (IN) PTL instance
* @param request (IN) Pointer to allocated request.
*
**/
extern int mca_ptl_ib_request_init(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_send_request_t*
);
/**
* PML->PTL Cleanup a send request that is being removed from the cache.
*
* @param ptl (IN) PTL instance
* @param request (IN) Pointer to allocated request.
*
**/
extern void mca_ptl_ib_request_fini(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_send_request_t*
);
/**
* PML->PTL Return a send request to the PTL modules free list.
*
* @param ptl (IN) PTL instance
* @param request (IN) Pointer to allocated request.
*
*/
extern void mca_ptl_ib_request_return(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_send_request_t*
);
/**
* PML->PTL Notification that a receive fragment has been matched.
*
* @param ptl (IN) PTL instance
* @param recv_frag (IN) Receive fragment
*
*/
extern void mca_ptl_ib_matched(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_recv_frag_t* frag
);
/**
* PML->PTL Initiate a send of the specified size.
*
* @param ptl (IN) PTL instance
* @param ptl_base_peer (IN) PTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting PTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments
*/
extern int mca_ptl_ib_send(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t* ptl_peer,
struct mca_ptl_base_send_request_t*,
size_t offset,
size_t size,
int flags
);
/**
* PML->PTL Initiate a put of the specified size.
*
* @param ptl (IN) PTL instance
* @param ptl_base_peer (IN) PTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting PTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments
*/
extern int mca_ptl_ib_put(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t* ptl_peer,
struct mca_ptl_base_send_request_t*,
size_t offset,
size_t size,
int flags
);
/**
* Return a recv fragment to the modules free list.
*
* @param ptl (IN) PTL instance
* @param frag (IN) IB receive fragment
*
*/
extern void mca_ptl_ib_recv_frag_return(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_ib_recv_frag_t* frag
);
/**
* Return a send fragment to the modules free list.
*
* @param ptl (IN) PTL instance
* @param frag (IN) IB send fragment
*
*/
extern void mca_ptl_ib_send_frag_return(
struct mca_ptl_base_module_t* ptl,
struct mca_ptl_ib_send_frag_t*
);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,24 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PTL_IB_ADDR_H
#define MCA_PTL_IB_ADDR_H
#include "ptl_ib.h"
#endif

Просмотреть файл

@ -1,362 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/* #include <hh_common.h> */
/* Open MPI includes */
#include "ompi_config.h"
#include "include/constants.h"
#include "event/event.h"
#include "util/if.h"
#include "util/argv.h"
#include "util/output.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "mca/base/mca_base_param.h"
#include "mca/base/mca_base_module_exchange.h"
#include "mca/errmgr/errmgr.h"
/* IB ptl includes */
#include "ptl_ib.h"
mca_ptl_ib_component_t mca_ptl_ib_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v1.0.0 component (which also implies a
specific MCA version) */
MCA_PTL_BASE_VERSION_1_0_0,
"ib", /* MCA component name */
1, /* MCA component major version */
0, /* MCA component minor version */
0, /* MCA component release version */
mca_ptl_ib_component_open, /* component open */
mca_ptl_ib_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_ptl_ib_component_init,
mca_ptl_ib_component_control,
mca_ptl_ib_component_progress,
}
};
/*
* utility routines for parameter registration
*/
static inline char* mca_ptl_ib_param_register_string(
const char* param_name,
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string("ptl","ib",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
static inline int mca_ptl_ib_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("ptl","ib",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_ptl_ib_component_open(void)
{
/* register component parameters */
mca_ptl_ib_module.super.ptl_exclusivity =
mca_ptl_ib_param_register_int ("exclusivity", 0);
mca_ptl_ib_module.super.ptl_first_frag_size =
mca_ptl_ib_param_register_int ("first_frag_size",
(MCA_PTL_IB_FIRST_FRAG_SIZE
- sizeof(mca_ptl_base_header_t)));
mca_ptl_ib_module.super.ptl_min_frag_size =
mca_ptl_ib_param_register_int ("min_frag_size",
(MCA_PTL_IB_FIRST_FRAG_SIZE
- sizeof(mca_ptl_base_header_t)));
mca_ptl_ib_module.super.ptl_max_frag_size =
mca_ptl_ib_param_register_int ("max_frag_size", 2<<30);
/* register IB component parameters */
mca_ptl_ib_component.ib_free_list_num =
mca_ptl_ib_param_register_int ("free_list_num", 8);
mca_ptl_ib_component.ib_free_list_max =
mca_ptl_ib_param_register_int ("free_list_max", 1024);
mca_ptl_ib_component.ib_free_list_inc =
mca_ptl_ib_param_register_int ("free_list_inc", 32);
mca_ptl_ib_component.ib_mem_registry_hints_log_size =
mca_ptl_ib_param_register_int ("hints_log_size", 8);
/* initialize global state */
mca_ptl_ib_component.ib_num_ptls=0;
mca_ptl_ib_component.ib_ptls=NULL;
OBJ_CONSTRUCT(&mca_ptl_ib_component.ib_procs, ompi_list_t);
OBJ_CONSTRUCT (&mca_ptl_ib_component.ib_recv_frags, ompi_free_list_t);
return OMPI_SUCCESS;
}
/*
* component cleanup - sanity checking of queue lengths
*/
int mca_ptl_ib_component_close(void)
{
D_PRINT("");
/* Stub */
return OMPI_SUCCESS;
}
/*
* IB component initialization:
* (1) read interface list from kernel and compare against component parameters
* then create a PTL instance for selected interfaces
* (2) setup IB listen socket for incoming connection attempts
* (3) register PTL parameters with the MCA
*/
mca_ptl_base_module_t** mca_ptl_ib_component_init(int *num_ptl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
VAPI_ret_t vapi_ret;
VAPI_hca_id_t* hca_ids;
mca_ptl_base_module_t** ptls;
int i, ret;
/* initialization */
*num_ptl_modules = 0;
/* query the list of available hcas */
vapi_ret=EVAPI_list_hcas(0, &(mca_ptl_ib_component.ib_num_ptls), NULL);
if( VAPI_EAGAIN != vapi_ret || 0 == mca_ptl_ib_component.ib_num_ptls ) {
ompi_output(0,"Warning: no IB HCAs found\n");
return NULL;
}
hca_ids = (VAPI_hca_id_t*) malloc(mca_ptl_ib_component.ib_num_ptls * sizeof(VAPI_hca_id_t));
if(NULL == hca_ids) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
vapi_ret=EVAPI_list_hcas(mca_ptl_ib_component.ib_num_ptls, &mca_ptl_ib_component.ib_num_ptls, hca_ids);
if( VAPI_OK != vapi_ret ) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
/* Allocate space for ptl modules */
mca_ptl_ib_component.ib_ptls = (mca_ptl_ib_module_t*) malloc(sizeof(mca_ptl_ib_module_t) *
mca_ptl_ib_component.ib_num_ptls);
if(NULL == mca_ptl_ib_component.ib_ptls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
ptls = (struct mca_ptl_base_module_t**)
malloc(mca_ptl_ib_component.ib_num_ptls * sizeof(struct mca_ptl_ib_module_t*));
if(NULL == ptls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
/* Initialize pool of receive fragments */
ompi_free_list_init (&(mca_ptl_ib_component.ib_recv_frags),
sizeof (mca_ptl_ib_recv_frag_t),
OBJ_CLASS (mca_ptl_ib_recv_frag_t),
mca_ptl_ib_component.ib_free_list_num,
mca_ptl_ib_component.ib_free_list_max,
mca_ptl_ib_component.ib_free_list_inc, NULL);
/* Initialize each module */
for(i = 0; i < mca_ptl_ib_component.ib_num_ptls; i++) {
mca_ptl_ib_module_t* ib_ptl = &mca_ptl_ib_component.ib_ptls[i];
/* Initialize the modules function pointers */
memcpy(ib_ptl, &mca_ptl_ib_module, sizeof(mca_ptl_ib_module));
/* Initialize module state */
OBJ_CONSTRUCT(&ib_ptl->send_free, ompi_free_list_t);
OBJ_CONSTRUCT(&ib_ptl->repost, ompi_list_t);
ompi_free_list_init(&ib_ptl->send_free,
sizeof(mca_ptl_ib_send_frag_t),
OBJ_CLASS(mca_ptl_ib_send_frag_t),
mca_ptl_ib_component.ib_free_list_num,
mca_ptl_ib_component.ib_free_list_max,
mca_ptl_ib_component.ib_free_list_inc,
NULL);
memcpy(ib_ptl->hca_id, hca_ids[i], sizeof(ib_ptl->hca_id));
if(mca_ptl_ib_module_init(ib_ptl) != OMPI_SUCCESS) {
free(hca_ids);
return NULL;
}
/* Initialize the send descriptors */
if(mca_ptl_ib_send_frag_register(ib_ptl) != OMPI_SUCCESS) {
free(hca_ids);
return NULL;
}
ptls[i] = &ib_ptl->super;
}
/* Post OOB receive to support dynamic connection setup */
mca_ptl_ib_post_recv();
*num_ptl_modules = mca_ptl_ib_component.ib_num_ptls;
free(hca_ids);
return ptls;
}
/*
* IB component control
*/
int mca_ptl_ib_component_control(int param, void* value, size_t size)
{
return OMPI_SUCCESS;
}
/*
* IB component progress.
*/
#define MCA_PTL_IB_DRAIN_NETWORK(nic, cq_hndl, comp_type, comp_addr) \
{ \
VAPI_ret_t ret; \
VAPI_wc_desc_t comp; \
\
ret = VAPI_poll_cq(nic, cq_hndl, &comp); \
if(VAPI_OK == ret) { \
if(comp.status != VAPI_SUCCESS) { \
ompi_output(0, "Got error : %s, Vendor code : %d Frag : %p", \
VAPI_wc_status_sym(comp.status), \
comp.vendor_err_syndrome, comp.id); \
*comp_type = IB_COMP_ERROR; \
*comp_addr = NULL; \
} else { \
if(VAPI_CQE_SQ_SEND_DATA == comp.opcode) { \
*comp_type = IB_COMP_SEND; \
*comp_addr = (void*) (unsigned long) comp.id; \
} else if(VAPI_CQE_RQ_SEND_DATA == comp.opcode) { \
*comp_type = IB_COMP_RECV; \
*comp_addr = (void*) (unsigned long) comp.id; \
} else if(VAPI_CQE_SQ_RDMA_WRITE == comp.opcode) { \
*comp_type = IB_COMP_RDMA_W; \
*comp_addr = (void*) (unsigned long) comp.id; \
} else { \
ompi_output(0, "VAPI_poll_cq: returned unknown opcode : %d\n", \
comp.opcode); \
*comp_type = IB_COMP_ERROR; \
*comp_addr = NULL; \
} \
} \
} else { \
/* No completions from the network */ \
*comp_type = IB_COMP_NOTHING; \
*comp_addr = NULL; \
} \
}
int mca_ptl_ib_component_progress(mca_ptl_tstamp_t tstamp)
{
int i;
int count = 0;
/* Poll for completions */
for(i = 0; i < mca_ptl_ib_component.ib_num_ptls; i++) {
mca_ptl_ib_module_t* ib_ptl = &mca_ptl_ib_component.ib_ptls[i];
int comp_type = IB_COMP_NOTHING;
void* comp_addr;
MCA_PTL_IB_DRAIN_NETWORK(ib_ptl->nic, ib_ptl->cq_hndl, &comp_type, &comp_addr);
/* Handle n/w completions */
switch(comp_type) {
case IB_COMP_SEND :
/* Process a completed send */
mca_ptl_ib_send_frag_send_complete(ib_ptl, (mca_ptl_ib_send_frag_t*)comp_addr);
count++;
break;
case IB_COMP_RECV :
/* Process incoming receives */
mca_ptl_ib_process_recv(ib_ptl, comp_addr);
#if 0
/* Re post recv buffers */
if(ompi_list_get_size(&ib_ptl->repost) <= 1) {
ompi_list_append(&ib_ptl->repost, (ompi_list_item_t*)comp_addr);
} else {
ompi_list_item_t* item;
while(NULL != (item = ompi_list_remove_first(&ib_ptl->repost))) {
mca_ptl_ib_buffer_repost(ib_ptl->nic, item);
}
mca_ptl_ib_buffer_repost(ib_ptl->nic, comp_addr);
}
#else
mca_ptl_ib_buffer_repost(ib_ptl->nic, comp_addr);
#endif
count++;
break;
case IB_COMP_RDMA_W :
ompi_output(0, "%s:%d RDMA not implemented\n", __FILE__,__LINE__);
count++;
break;
case IB_COMP_NOTHING:
break;
default:
ompi_output(0, "Errorneous network completion");
break;
}
}
return count;
}

Просмотреть файл

@ -1,311 +0,0 @@
/* Standard system includes */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Open MPI includes */
/* Other IB PTL includes */
#include "ptl_ib.h"
#include "ptl_ib_memory.h"
#include "ptl_ib_priv.h"
static void mca_ptl_ib_mem_registry_construct(ompi_object_t *object);
static void mca_ptl_ib_mem_registry_destruct(ompi_object_t *object);
static void mca_ptl_ib_mem_registry_info_construct(ompi_object_t *object);
static void mca_ptl_ib_mem_registry_info_destruct(ompi_object_t *object);
static int mca_ptl_ib_mem_registry_info_compare(void *key1, void *key2);
static int mca_ptl_ib_mem_registry_real_deregister(
mca_ptl_ib_mem_registry_t *registry,
mca_ptl_ib_mem_registry_info_t *info);
OBJ_CLASS_INSTANCE(mca_ptl_ib_mem_registry_info_t, ompi_list_item_t,
mca_ptl_ib_mem_registry_info_construct, mca_ptl_ib_mem_registry_info_destruct);
OBJ_CLASS_INSTANCE(mca_ptl_ib_mem_registry_t, ompi_rb_tree_t, mca_ptl_ib_mem_registry_construct,
mca_ptl_ib_mem_registry_destruct);
static void mca_ptl_ib_mem_registry_construct(ompi_object_t *object)
{
mca_ptl_ib_mem_registry_t *registry = (mca_ptl_ib_mem_registry_t *)object;
int i;
ompi_rb_tree_init(&(registry->rb_tree), mca_ptl_ib_mem_registry_info_compare);
OBJ_CONSTRUCT(&(registry->info_free_list), ompi_free_list_t);
ompi_free_list_init(&registry->info_free_list, sizeof(mca_ptl_ib_mem_registry_info_t),
OBJ_CLASS(mca_ptl_ib_mem_registry_info_t), 32, -1, 32, NULL);
registry->hints_log_size = mca_ptl_ib_component.ib_mem_registry_hints_log_size;
/* sanity check -- enforce lower bound for hash calculation */
if (registry->hints_log_size < 1) {
registry->hints_log_size = 1;
}
registry->hints = (ompi_ptr_t *)malloc((1 << registry->hints_log_size) *
sizeof(ompi_ptr_t));
registry->hints_log_size = mca_ptl_ib_component.ib_mem_registry_hints_log_size;
registry->hints_size = (registry->hints) ? (1 << registry->hints_log_size) : 0;
for (i = 0; i < registry->hints_size; i++) {
registry->hints[i].pval = (void *)NULL;
}
registry->ib_ptl = NULL;
registry->evictable = NULL;
return;
}
static void mca_ptl_ib_mem_registry_destruct(ompi_object_t *object)
{
/* memory regions that are being tracked are not deregistered here */
mca_ptl_ib_mem_registry_t *registry = (mca_ptl_ib_mem_registry_t *)object;
OBJ_DESTRUCT(&(registry->info_free_list));
if (registry->hints_size != 0) {
free(registry->hints);
registry->hints = (ompi_ptr_t *)NULL;
registry->hints_size = 0;
}
return;
}
static void mca_ptl_ib_mem_registry_info_construct(ompi_object_t *object)
{
mca_ptl_ib_mem_registry_info_t *info = (mca_ptl_ib_mem_registry_info_t *)object;
info->next = NULL;
info->ref_cnt = 0;
info->hndl = VAPI_INVAL_HNDL;
memset(&(info->request), 0, sizeof(VAPI_mr_t));
memset(&(info->reply), 0, sizeof(VAPI_mr_t));
return;
}
static void mca_ptl_ib_mem_registry_info_destruct(ompi_object_t *object)
{
return;
}
static int mca_ptl_ib_mem_registry_info_compare(void *request, void *treenode)
{
int result;
VAPI_mr_t *mr1 = (VAPI_mr_t *)request;
VAPI_mr_t *mr2 = (VAPI_mr_t *)treenode;
uint64_t start1 = mr1->start;
uint64_t start2 = mr2->start;
uint64_t end1 = start1 + mr1->size;
uint64_t end2 = start2 + mr2->size;
if (end1 < start2) {
/* non-overlapping mr1 < mr2 */
result = -1;
}
else if (start1 > end2) {
/* non-overlapping mr1 > mr2 */
result = 1;
}
else if ((end1 <= end2) && (start1 >= start2)) {
/* completely overlapping mr1 and mr2 (mr2 may be bigger) */
if ((mr1->acl & mr2->acl) == mr1->acl) {
/* minimum access permissions met */
result = 0;
}
else {
/* oops -- access permissions not good enough */
result = 1;
}
}
else if (start1 < start2) {
/* partially overlapping mr1 < mr2 */
result = -1;
}
else {
/* partially overlapping mr1 > mr2 */
result = 1;
}
return result;
}
void mca_ptl_ib_mem_registry_clean_evictables(
mca_ptl_ib_mem_registry_t *registry,
mca_ptl_ib_mem_registry_info_t *info)
{
mca_ptl_ib_mem_registry_info_t *tmp = registry->evictable;
mca_ptl_ib_mem_registry_info_t *prev = NULL;
while (NULL != tmp) {
if (tmp == info) {
if (NULL == prev) {
/* no more entries left -- no evictable list */
registry->evictable = NULL;
}
else {
/* remove this entry from the evictable list */
prev->next = tmp->next;
}
/* clear this entry's evictable link */
tmp->next = NULL;
break;
}
prev = tmp;
tmp = tmp->next;
}
return;
}
mca_ptl_ib_mem_registry_info_t *mca_ptl_ib_mem_registry_register(
mca_ptl_ib_mem_registry_t *registry, VAPI_mr_t *mr)
{
mca_ptl_ib_mem_registry_info_t *info = mca_ptl_ib_mem_registry_find(registry, mr);
mca_ptl_ib_mem_registry_info_t *next_to_evict;
ompi_list_item_t *item;
VAPI_ret_t vapi_result;
int rc;
if (info == (mca_ptl_ib_mem_registry_info_t *)NULL) {
/* create new entry and register memory region */
item = (ompi_list_item_t *)info;
OMPI_FREE_LIST_GET(&(registry->info_free_list), item, rc);
info = (mca_ptl_ib_mem_registry_info_t *)item;
if (OMPI_SUCCESS != rc) {
/* error - return null pointer */
return info;
}
memcpy(&(info->request),mr,sizeof(VAPI_mr_t));
info->ref_cnt = 1;
do {
vapi_result = VAPI_register_mr(registry->ib_ptl->nic, mr,
&(info->hndl), &(info->reply));
if (VAPI_OK != vapi_result) {
if (VAPI_EAGAIN == vapi_result) {
/* evict an unused memory region, if at all possible */
if (NULL != registry->evictable) {
next_to_evict = registry->evictable->next;
mca_ptl_ib_mem_registry_real_deregister(registry, registry->evictable);
registry->evictable = next_to_evict;
}
}
else {
/* fatal error */
item = (ompi_list_item_t *)info;
OMPI_FREE_LIST_RETURN(&(registry->info_free_list), item);
info = NULL;
return info;
}
}
} while ((VAPI_OK != vapi_result) && (NULL != info));
/* insert a reference to this information into the red/black tree */
rc = ompi_rb_tree_insert(&(registry->rb_tree), &(info->reply), info);
/* aargh! what do we do if the tree insert fails... */
mca_ptl_ib_mem_registry_insert_hint(registry, &(info->reply), info);
}
else {
if (0 == info->ref_cnt) {
/* make sure we're not on the evictable list */
mca_ptl_ib_mem_registry_clean_evictables(registry, info);
}
(info->ref_cnt)++;
}
return info;
}
mca_ptl_ib_mem_registry_info_t *mca_ptl_ib_register_mem_with_registry(
mca_ptl_ib_module_t *ib_module,
void *addr, size_t len)
{
mca_ptl_ib_mem_registry_info_t *info;
VAPI_mr_t mr;
mr.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE;
mr.l_key = 0;
mr.r_key = 0;
mr.pd_hndl = ib_module->ptag;
mr.size = len;
mr.start = (VAPI_virt_addr_t) (MT_virt_addr_t) addr;
mr.type = VAPI_MR;
info = mca_ptl_ib_mem_registry_register(&(ib_module->mem_registry),&mr);
return info;
}
int mca_ptl_ib_deregister_mem_with_registry(
mca_ptl_ib_module_t *ib_module,
void *addr, size_t len)
{
VAPI_mr_t mr;
int rc;
mr.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE;
mr.l_key = 0;
mr.r_key = 0;
mr.pd_hndl = ib_module->ptag;
mr.size = len;
mr.start = (VAPI_virt_addr_t) (MT_virt_addr_t) addr;
mr.type = VAPI_MR;
rc = mca_ptl_ib_mem_registry_deregister(&(ib_module->mem_registry),&mr);
return rc;
}
static int mca_ptl_ib_mem_registry_real_deregister(
mca_ptl_ib_mem_registry_t *registry,
mca_ptl_ib_mem_registry_info_t *info)
{
ompi_list_item_t *item;
VAPI_ret_t vapi_result;
int i;
/* clear hints array of references to this info object */
for (i = 0; i < registry->hints_size; i++) {
if (registry->hints[i].pval == info) {
registry->hints[i].pval = (void *)NULL;
}
}
/* delete the info object from the red/black tree */
ompi_rb_tree_delete(&(registry->rb_tree), &(info->reply));
/* do the real deregistration */
vapi_result = VAPI_deregister_mr(registry->ib_ptl->nic, info->hndl);
/* return the info object to the free list */
item = (ompi_list_item_t *)info;
OMPI_FREE_LIST_RETURN(&(registry->info_free_list), item);
/* return an error if we could not successfully deregister memory region */
if (VAPI_OK != vapi_result) {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int mca_ptl_ib_mem_registry_deregister(
mca_ptl_ib_mem_registry_t *registry, VAPI_mr_t *mr)
{
mca_ptl_ib_mem_registry_info_t *info = mca_ptl_ib_mem_registry_find(registry, mr);
if (info != NULL) {
if (info->ref_cnt > 0) {
(info->ref_cnt)--;
if (0 == info->ref_cnt) {
info->next = registry->evictable;
registry->evictable = info;
}
}
}
else {
return OMPI_ERR_NOT_FOUND;
}
return OMPI_SUCCESS;
}
int mca_ptl_ib_mem_registry_init(
mca_ptl_ib_mem_registry_t *registry,
struct mca_ptl_ib_module_t *ib_ptl)
{
registry->ib_ptl = ib_ptl;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,157 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_IB_MEMORY_H
#define MCA_PTL_IB_MEMORY_H
/* Standard system includes */
#include <sys/types.h>
/* Open MPI includes */
#include "include/types.h"
#include "include/constants.h"
#include "class/ompi_object.h"
#include "class/ompi_list.h"
#include "class/ompi_rb_tree.h"
#include "class/ompi_free_list.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/* vapi.h is not a C++ safe header file */
#include <vapi.h>
#include <vapi_common.h>
struct mca_ptl_ib_module_t;
typedef struct mca_ptl_ib_mem_registry_info_t mca_ptl_ib_mem_registry_info_t;
struct mca_ptl_ib_mem_registry_info_t {
ompi_list_item_t super;
mca_ptl_ib_mem_registry_info_t *next;
int ref_cnt;
VAPI_mr_hndl_t hndl;
VAPI_mr_t request;
VAPI_mr_t reply;
};
OBJ_CLASS_DECLARATION(mca_ptl_ib_mem_registry_info_t);
typedef struct mca_ptl_ib_mem_registry_t mca_ptl_ib_mem_registry_t;
struct mca_ptl_ib_mem_registry_t {
ompi_rb_tree_t rb_tree;
ompi_free_list_t info_free_list;
ompi_ptr_t *hints;
mca_ptl_ib_mem_registry_info_t *evictable;
struct mca_ptl_ib_module_t *ib_ptl;
int hints_log_size;
int hints_size;
};
OBJ_CLASS_DECLARATION(mca_ptl_ib_mem_registry_t);
static inline void mca_ptl_ib_mem_registry_insert_hint(
mca_ptl_ib_mem_registry_t *registry, VAPI_mr_t *key,
mca_ptl_ib_mem_registry_info_t *info)
{
uint64_t hints_hash = 0, addrll;
if (registry->hints_size) {
addrll = (uint64_t)(key->start);
/* calculate hash index for hints array - hash is (hints_log_size - 1) bits of key
* from first non-zero least significant bit
*/
hints_hash = addrll & (-addrll);
hints_hash = (((hints_hash << registry->hints_log_size) - hints_hash) & addrll) /
hints_hash;
registry->hints[hints_hash].pval = info;
}
return;
}
/* find information on a registered memory region for a given address,
* region size, and access permissions
*
*/
static inline mca_ptl_ib_mem_registry_info_t *mca_ptl_ib_mem_registry_find(
mca_ptl_ib_mem_registry_t *registry, VAPI_mr_t *key)
{
mca_ptl_ib_mem_registry_info_t *info = (mca_ptl_ib_mem_registry_info_t *)NULL;
uint64_t hints_hash = 0, addrll;
if (registry->hints_size) {
addrll = (uint64_t)(key->start);
/* calculate hash index for hints array - hash is (hints_log_size - 1) bits of key
* from first non-zero least significant bit
*/
hints_hash = addrll & (-addrll);
hints_hash = (((hints_hash << registry->hints_log_size) - hints_hash) & addrll) /
hints_hash;
if ((info = registry->hints[hints_hash].pval) != (void *)NULL) {
if ((info->reply.start <= key->start) &&
((info->reply.start + info->reply.size) >= (key->start + key->size)) &&
((info->reply.acl & key->acl) == key->acl)) {
return info;
}
}
}
/* search the red/black tree */
info = ompi_rb_tree_find(&(registry->rb_tree), key);
/* store a pointer to this info in the hints array for later lookups */
if ((info != NULL) && registry->hints_size) {
registry->hints[hints_hash].pval = info;
}
return info;
}
/* prototypes */
mca_ptl_ib_mem_registry_info_t *mca_ptl_ib_mem_registry_register(
mca_ptl_ib_mem_registry_t *registry,
VAPI_mr_t *mr);
mca_ptl_ib_mem_registry_info_t *mca_ptl_ib_register_mem_with_registry(
struct mca_ptl_ib_module_t *ib_ptl,
void *addr, size_t len);
int mca_ptl_ib_deregister_mem_with_registry(
struct mca_ptl_ib_module_t *ib_ptl,
void *addr, size_t len);
int mca_ptl_ib_mem_registry_deregister(
mca_ptl_ib_mem_registry_t *registry,
VAPI_mr_t *mr);
int mca_ptl_ib_mem_registry_init(
mca_ptl_ib_mem_registry_t* registry,
struct mca_ptl_ib_module_t *ib_ptl);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,537 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "include/types.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "mca/ns/base/base.h"
#include "mca/oob/base/base.h"
#include "mca/rml/rml.h"
#include "mca/errmgr/errmgr.h"
#include "dps/dps.h"
#include "ptl_ib.h"
#include "ptl_ib_addr.h"
#include "ptl_ib_peer.h"
#include "ptl_ib_proc.h"
#include "ptl_ib_priv.h"
#include "ptl_ib_sendfrag.h"
static void mca_ptl_ib_peer_construct(mca_ptl_base_peer_t* peer);
static void mca_ptl_ib_peer_destruct(mca_ptl_base_peer_t* peer);
OBJ_CLASS_INSTANCE(mca_ptl_ib_peer_t,
ompi_list_item_t, mca_ptl_ib_peer_construct,
mca_ptl_ib_peer_destruct);
/*
* Initialize state of the peer instance.
*
*/
static void mca_ptl_ib_peer_construct(mca_ptl_base_peer_t* peer)
{
peer->peer_ptl = 0;
peer->peer_proc = 0;
peer->peer_tstamp = 0.0;
peer->peer_state = MCA_PTL_IB_CLOSED;
peer->peer_retries = 0;
OBJ_CONSTRUCT(&peer->peer_send_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&peer->peer_recv_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&peer->pending_send_frags, ompi_list_t);
}
/*
* Destroy a peer
*
*/
static void mca_ptl_ib_peer_destruct(mca_ptl_base_peer_t* peer)
{
}
/*
* Send connection information to remote peer using OOB
*
*/
static void mca_ptl_ib_peer_send_cb(
int status,
orte_process_name_t* peer,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
OBJ_RELEASE(buffer);
}
static int mca_ptl_ib_peer_send_connect_req(mca_ptl_base_peer_t* peer)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
int rc;
if(NULL == buffer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* pack the info in the send buffer */
rc = orte_dps.pack(buffer, &peer->lcl_qp_prop.qp_num, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.pack(buffer, &peer->peer_ptl->port.lid, 1, ORTE_UINT32);
/* send to peer */
rc = orte_rml.send_buffer_nb(&peer->peer_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
mca_ptl_ib_peer_send_cb, NULL);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
* Send connect ACK to remote peer
*
*/
static int mca_ptl_ib_peer_send_connect_ack(mca_ptl_base_peer_t* peer)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
int rc;
uint32_t zero = 0;
/* pack the info in the send buffer */
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send to peer */
rc = orte_rml.send_buffer_nb(&peer->peer_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
mca_ptl_ib_peer_send_cb, NULL);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/*
* Set remote connection info
*
* XXX: Currently size is unutilized, this shall change
* as soon as we add more info to be exchanged at connection
* setup.
*
*/
static int mca_ptl_ib_peer_set_remote_info(mca_ptl_base_peer_t* peer, orte_buffer_t* buffer)
{
int rc;
size_t cnt = 1;
rc = orte_dps.unpack(buffer, &peer->rem_qp_num, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.unpack(buffer, &peer->rem_lid, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
D_PRINT("Received QP num = %d, LID = %d",
peer->rem_qp_num,
peer->rem_lid);
return ORTE_SUCCESS;
}
static int mca_ptl_ib_peer_init(
mca_ptl_ib_peer_t *peer)
{
return OMPI_SUCCESS;
}
/*
* Start to connect to the peer. We send our Queue Pair
* information over the TCP OOB communication mechanism.
* On completion of our send, a send completion handler
* is called.
*
*/
static int mca_ptl_ib_peer_start_connect(mca_ptl_base_peer_t* peer)
{
mca_ptl_ib_module_t* ib_ptl = peer->peer_ptl;
int rc;
/* Create the Queue Pair */
if(OMPI_SUCCESS != (rc = mca_ptl_ib_create_qp(ib_ptl->nic,
ib_ptl->ptag,
ib_ptl->cq_hndl,
ib_ptl->cq_hndl,
&peer->lcl_qp_hndl,
&peer->lcl_qp_prop,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
D_PRINT("mca_ptl_ib_peer_start_connect: qp_num=%d lid=%d", peer->lcl_qp_prop.qp_num, ib_ptl->port.lid);
/* Send connection info over to remote peer */
peer->peer_state = MCA_PTL_IB_CONNECTING;
if(OMPI_SUCCESS != (rc = mca_ptl_ib_peer_send_connect_req(peer))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
* Reply to a `start - connect' message
*
*/
static int mca_ptl_ib_peer_reply_start_connect(mca_ptl_ib_peer_t *peer, orte_buffer_t* buffer)
{
mca_ptl_ib_module_t* ib_ptl = peer->peer_ptl;
int rc;
/* Create the Queue Pair */
if(OMPI_SUCCESS != (rc = mca_ptl_ib_create_qp(ib_ptl->nic,
ib_ptl->ptag,
ib_ptl->cq_hndl,
ib_ptl->cq_hndl,
&peer->lcl_qp_hndl,
&peer->lcl_qp_prop,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
D_PRINT("mca_ptl_ib_peer_reply_start_connect: qp_num=%d lid=%d", peer->lcl_qp_prop.qp_num, ib_ptl->port.lid);
/* Set the remote side info */
mca_ptl_ib_peer_set_remote_info(peer, buffer);
/* Connect to peer */
rc = mca_ptl_ib_peer_connect(peer);
if(rc != OMPI_SUCCESS) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
/* Send connection info over to remote peer */
if(OMPI_SUCCESS != (rc = mca_ptl_ib_peer_send_connect_req(peer))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
*
*/
static void mca_ptl_ib_peer_connected(mca_ptl_ib_peer_t *peer)
{
peer->peer_state = MCA_PTL_IB_CONNECTED;
mca_ptl_ib_progress_send_frags(peer);
}
/*
* Non blocking OOB recv callback.
* Read incoming QP and other info, and if this peer
* is trying to connect, reply with our QP info,
* otherwise try to modify QP's and establish
* reliable connection
*
*/
static void mca_ptl_ib_peer_recv(
int status,
orte_process_name_t* peer,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
mca_ptl_ib_proc_t *ib_proc;
mca_ptl_ib_peer_t *ib_peer;
int peer_state;
int rc;
for(ib_proc = (mca_ptl_ib_proc_t*)
ompi_list_get_first(&mca_ptl_ib_component.ib_procs);
ib_proc != (mca_ptl_ib_proc_t*)
ompi_list_get_end(&mca_ptl_ib_component.ib_procs);
ib_proc = (mca_ptl_ib_proc_t*)ompi_list_get_next(ib_proc)) {
if(ib_proc->proc_guid.vpid == peer->vpid) {
/* Try to get the peer instance of this proc */
/* Limitation: Right now, we have only 1 peer
* for every process. Need several changes, some
* in PML/PTL interface to set this right */
ib_peer = ib_proc->proc_peers[0];
peer_state = ib_peer->peer_state;
/* Update status */
switch(peer_state) {
case MCA_PTL_IB_CLOSED :
/* We had this connection closed before.
* The peer is trying to connect. Move the
* status of this connection to CONNECTING,
* and then reply with our QP information */
if(OMPI_SUCCESS != (rc = mca_ptl_ib_peer_reply_start_connect(ib_peer, buffer))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
break;
}
/* Setup state as connected */
ib_peer->peer_state = MCA_PTL_IB_CONNECT_ACK;
break;
case MCA_PTL_IB_CONNECTING :
mca_ptl_ib_peer_set_remote_info(ib_peer, buffer);
if(OMPI_SUCCESS != (rc = mca_ptl_ib_peer_connect(ib_peer))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
break;
}
/* Setup state as connected */
mca_ptl_ib_peer_connected(ib_peer);
/* Send him an ack */
mca_ptl_ib_peer_send_connect_ack(ib_peer);
break;
case MCA_PTL_IB_CONNECT_ACK:
mca_ptl_ib_peer_connected(ib_peer);
break;
case MCA_PTL_IB_CONNECTED :
break;
default :
ompi_output(0, "Connected -> Connecting not possible.\n");
}
break;
}
}
/* Okay, now that we are done receiving,
* re-post the buffer */
mca_ptl_ib_post_recv();
}
void mca_ptl_ib_post_recv()
{
D_PRINT("");
orte_rml.recv_buffer_nb(
ORTE_RML_NAME_ANY,
ORTE_RML_TAG_DYNAMIC-1,
0,
mca_ptl_ib_peer_recv,
NULL);
}
/*
* Attempt to send a fragment using a given peer. If the peer is not
* connected, queue the fragment and start the connection as required.
*/
int mca_ptl_ib_peer_send(mca_ptl_base_peer_t* peer,
mca_ptl_ib_send_frag_t* frag)
{
int rc;
OMPI_THREAD_LOCK(&peer->peer_send_lock);
switch(peer->peer_state) {
case MCA_PTL_IB_CONNECTING:
D_PRINT("Queing because state is connecting");
ompi_list_append(&peer->pending_send_frags,
(ompi_list_item_t *)frag);
rc = OMPI_SUCCESS;
break;
case MCA_PTL_IB_CONNECT_ACK:
D_PRINT("Queuing because waiting for ack");
ompi_list_append(&peer->pending_send_frags,
(ompi_list_item_t *)frag);
rc = OMPI_SUCCESS;
break;
case MCA_PTL_IB_CLOSED:
D_PRINT("Connection to peer closed ... connecting ...");
ompi_list_append(&peer->pending_send_frags,
(ompi_list_item_t *)frag);
rc = mca_ptl_ib_peer_start_connect(peer);
break;
case MCA_PTL_IB_FAILED:
rc = OMPI_ERR_UNREACH;
break;
case MCA_PTL_IB_CONNECTED:
{
mca_ptl_ib_module_t* ib_ptl = peer->peer_ptl;
ompi_list_item_t* item;
A_PRINT("Send to : %d, len : %d, frag : %p",
peer->peer_proc->proc_guid.vpid,
frag->ib_buf.desc.sg_entry.len,
frag);
rc = mca_ptl_ib_post_send(peer->peer_ptl, peer,
&frag->ib_buf, (void*) frag);
while(NULL != (item = ompi_list_remove_first(&ib_ptl->repost))) {
mca_ptl_ib_buffer_repost(ib_ptl->nic, item);
}
break;
}
default:
rc = OMPI_ERR_UNREACH;
}
OMPI_THREAD_UNLOCK(&peer->peer_send_lock);
return rc;
}
void mca_ptl_ib_progress_send_frags(mca_ptl_ib_peer_t* peer)
{
ompi_list_item_t *frag_item;
mca_ptl_ib_send_frag_t *sendfrag;
/*Check if peer is connected */
if(peer->peer_state != MCA_PTL_IB_CONNECTED) {
return;
}
/* While there are frags in the list,
* process them */
while(!ompi_list_is_empty(&(peer->pending_send_frags))) {
frag_item = ompi_list_remove_first(&(peer->pending_send_frags));
sendfrag = (mca_ptl_ib_send_frag_t *) frag_item;
/* We need to post this one */
if(mca_ptl_ib_post_send(peer->peer_ptl, peer, &sendfrag->ib_buf,
(void*) sendfrag)
!= OMPI_SUCCESS) {
ompi_output(0, "Error in posting send");
}
}
}
/*
* Complete connection to peer.
*/
int mca_ptl_ib_peer_connect(
mca_ptl_ib_peer_t *peer)
{
int rc, i;
VAPI_ret_t ret;
ib_buffer_t *ib_buf_ptr;
mca_ptl_ib_module_t *ib_ptl = peer->peer_ptl;
/* Establish Reliable Connection */
rc = mca_ptl_ib_qp_init(ib_ptl->nic,
peer->lcl_qp_hndl,
peer->rem_qp_num,
peer->rem_lid);
if(rc != OMPI_SUCCESS) {
return rc;
}
/* Allocate resources to this connection */
peer->lcl_recv = (ib_buffer_t*)
malloc(sizeof(ib_buffer_t) * NUM_IB_RECV_BUF);
if(NULL == peer->lcl_recv) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Register the buffers */
for(i = 0; i < NUM_IB_RECV_BUF; i++) {
rc = mca_ptl_ib_register_mem(ib_ptl->nic, ib_ptl->ptag,
(void*) peer->lcl_recv[i].buf,
MCA_PTL_IB_FIRST_FRAG_SIZE,
&peer->lcl_recv[i].hndl);
if(rc != OMPI_SUCCESS) {
return OMPI_ERROR;
}
ib_buf_ptr = &peer->lcl_recv[i];
ib_buf_ptr->qp_hndl = peer->lcl_qp_hndl;
IB_PREPARE_RECV_DESC(ib_buf_ptr);
}
/* Post receives */
for(i = 0; i < NUM_IB_RECV_BUF; i++) {
ret = VAPI_post_rr(ib_ptl->nic,
peer->lcl_qp_hndl,
&peer->lcl_recv[i].desc.rr);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_post_rr");
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,136 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PTL_IB_PEER_H
#define MCA_PTL_IB_PEER_H
#include "class/ompi_list.h"
#include "event/event.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "ptl_ib_recvfrag.h"
#include "ptl_ib_sendfrag.h"
#include "ptl_ib_priv.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_ptl_ib_peer_t);
/**
* State of IB peer connection.
*/
typedef enum {
/* Defines the state in which this PTL instance
* has started the process of connection */
MCA_PTL_IB_CONNECTING,
/* Waiting for ack from peer */
MCA_PTL_IB_CONNECT_ACK,
/* Connected ... both sender & receiver have
* buffers associated with this connection */
MCA_PTL_IB_CONNECTED,
/* Connection is closed, there are no resources
* associated with this */
MCA_PTL_IB_CLOSED,
/* Maximum number of retries have been used.
* Report failure on send to upper layer */
MCA_PTL_IB_FAILED
} mca_ptl_ib_peer_state_t;
/**
* An abstraction that represents a connection to a peer process.
* An instance of mca_ptl_base_peer_t is associated w/ each process
* and PTL pair at startup. However, connections to the peer
* are established dynamically on an as-needed basis:
*/
struct mca_ptl_base_peer_t {
ompi_list_item_t super;
struct mca_ptl_ib_module_t* peer_ptl;
/**< PTL instance that created this connection */
struct mca_ptl_ib_proc_t* peer_proc;
/**< proc structure corresponding to peer */
mca_ptl_ib_peer_state_t peer_state;
/**< current state of the connection */
size_t peer_retries;
/**< number of connection retries attempted */
double peer_tstamp;
/**< timestamp of when the first connection was attempted */
ompi_mutex_t peer_send_lock;
/**< lock for concurrent access to peer state */
ompi_mutex_t peer_recv_lock;
/**< lock for concurrent access to peer state */
ompi_list_t pending_send_frags;
/**< list of pending send frags for this peer */
VAPI_qp_num_t rem_qp_num;
/* Remote side QP number */
IB_lid_t rem_lid;
/* Local identifier of the remote process */
VAPI_qp_hndl_t lcl_qp_hndl;
/* Local QP handle */
VAPI_qp_prop_t lcl_qp_prop;
/* Local QP properties */
ib_buffer_t *lcl_recv;
/* Remote resources associated with this connection */
};
typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t;
typedef struct mca_ptl_base_peer_t mca_ptl_ib_peer_t;
int mca_ptl_ib_peer_send(mca_ptl_base_peer_t*, mca_ptl_ib_send_frag_t*);
int mca_ptl_ib_peer_connect(mca_ptl_base_peer_t*);
void mca_ptl_ib_post_recv(void);
void mca_ptl_ib_progress_send_frags(mca_ptl_ib_peer_t*);
#define DUMP_PEER(peer_ptr) { \
ompi_output(0, "[%s:%d] ", __FILE__, __LINE__); \
ompi_output(0, "Dumping peer %d state", \
peer->peer_proc->proc_guid.vpid); \
ompi_output(0, "Local QP hndl : %d", \
peer_ptr->peer_conn->lres->qp_hndl); \
ompi_output(0, "Local QP num : %d", \
peer_ptr->peer_conn->lres->qp_prop.qp_num); \
ompi_output(0, "Remote QP num : %d", \
peer_ptr->peer_conn->rres->qp_num); \
ompi_output(0, "Remote LID : %d", \
peer_ptr->peer_conn->rres->lid); \
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,474 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ptl_ib_vapi.h"
#include "ptl_ib_priv.h"
#include "ptl_ib.h"
#include "ptl_ib_memory.h"
/*
* Asynchronous event handler to detect unforseen
* events. Usually, such events are catastrophic.
* Should have a robust mechanism to handle these
* events and abort the OMPI application if necessary.
*
*/
static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
VAPI_event_record_t * event_p,
void *priv_data)
{
switch (event_p->type) {
case VAPI_QP_PATH_MIGRATED:
case VAPI_EEC_PATH_MIGRATED:
case VAPI_QP_COMM_ESTABLISHED:
case VAPI_EEC_COMM_ESTABLISHED:
case VAPI_SEND_QUEUE_DRAINED:
case VAPI_PORT_ACTIVE:
{
D_PRINT("Got an asynchronous event: %s\n",
VAPI_event_record_sym(event_p->type));
break;
}
case VAPI_CQ_ERROR:
case VAPI_LOCAL_WQ_INV_REQUEST_ERROR:
case VAPI_LOCAL_WQ_ACCESS_VIOL_ERROR:
case VAPI_LOCAL_WQ_CATASTROPHIC_ERROR:
case VAPI_PATH_MIG_REQ_ERROR:
case VAPI_LOCAL_EEC_CATASTROPHIC_ERROR:
case VAPI_LOCAL_CATASTROPHIC_ERROR:
case VAPI_PORT_ERROR:
{
ompi_output(0, "Got an asynchronous event: %s (%s)",
VAPI_event_record_sym(event_p->type),
VAPI_event_syndrome_sym(event_p->
syndrome));
break;
}
default:
ompi_output(0, "Warning!! Got an undefined "
"asynchronous event\n");
}
}
/*
* This function returns the hca_id for each PTL
* in a round robin manner. Each PTL gets a different
* HCA id ...
*
* If num PTLs > num HCAs, then those ptls will be
* assigned HCA ids beginning from 0 again.
*
*/
static int mca_ptl_ib_get_hca_hndl(VAPI_hca_id_t hca_id,
VAPI_hca_hndl_t* hca_hndl)
{
VAPI_ret_t ret;
/* Open the HCA */
ret = EVAPI_get_hca_hndl(hca_id, hca_hndl);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "EVAPI_get_hca_hndl");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
static int mca_ptl_ib_query_hca_prop(VAPI_hca_hndl_t nic,
VAPI_hca_port_t* port)
{
VAPI_ret_t ret;
/* Querying for port properties */
ret = VAPI_query_hca_port_prop(nic,
(IB_port_t)DEFAULT_PORT,
port);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_query_hca_port_prop");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
static int mca_ptl_ib_alloc_pd(VAPI_hca_hndl_t nic,
VAPI_pd_hndl_t* ptag)
{
VAPI_ret_t ret;
ret = VAPI_alloc_pd(nic, ptag);
if(ret != VAPI_OK) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_alloc_pd");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
static int mca_ptl_ib_create_cq(VAPI_hca_hndl_t nic,
VAPI_cq_hndl_t* cq_hndl)
{
uint32_t act_num_cqe = 0;
VAPI_ret_t ret;
ret = VAPI_create_cq(nic, DEFAULT_CQ_SIZE,
cq_hndl, &act_num_cqe);
if( (VAPI_OK != ret) || (0 == act_num_cqe)) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_create_cq");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
static int mca_ptl_ib_set_async_handler(VAPI_hca_hndl_t nic,
EVAPI_async_handler_hndl_t *async_handler)
{
VAPI_ret_t ret;
ret = EVAPI_set_async_event_handler(nic,
async_event_handler, 0, async_handler);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "EVAPI_set_async_event_handler");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int mca_ptl_ib_create_qp(VAPI_hca_hndl_t nic,
VAPI_pd_hndl_t ptag,
VAPI_cq_hndl_t recv_cq,
VAPI_cq_hndl_t send_cq,
VAPI_qp_hndl_t* qp_hndl,
VAPI_qp_prop_t* qp_prop,
int transport_type)
{
VAPI_ret_t ret;
VAPI_qp_init_attr_t qp_init_attr;
switch(transport_type) {
case VAPI_TS_RC: /* Set up RC qp parameters */
qp_init_attr.cap.max_oust_wr_rq = DEFAULT_WQ_SIZE;
qp_init_attr.cap.max_oust_wr_sq = DEFAULT_WQ_SIZE;
qp_init_attr.cap.max_sg_size_rq = DEFAULT_SG_LIST;
qp_init_attr.cap.max_sg_size_sq = DEFAULT_SG_LIST;
qp_init_attr.pd_hndl = ptag;
/* We don't have Reliable Datagram Handle right now */
qp_init_attr.rdd_hndl = 0;
/* Set Send and Recv completion queues */
qp_init_attr.rq_cq_hndl = recv_cq;
qp_init_attr.sq_cq_hndl = send_cq;
/* Signal all work requests on this queue pair */
qp_init_attr.rq_sig_type = VAPI_SIGNAL_REQ_WR;
qp_init_attr.sq_sig_type = VAPI_SIGNAL_REQ_WR;
/* Use Unreliable Datagram transport service */
qp_init_attr.ts_type = VAPI_TS_RC;
break;
case VAPI_TS_UD: /* Set up UD qp parameters */
default:
return OMPI_ERR_NOT_IMPLEMENTED;
}
ret = VAPI_create_qp(nic, &qp_init_attr,
qp_hndl, qp_prop);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_create_qp");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int mca_ptl_ib_module_init(mca_ptl_ib_module_t *ib_ptl)
{
/* Get HCA handle */
if(mca_ptl_ib_get_hca_hndl(ib_ptl->hca_id, &ib_ptl->nic)
!= OMPI_SUCCESS) {
return OMPI_ERROR;
}
/* Allocate a protection domain for this NIC */
if(mca_ptl_ib_alloc_pd(ib_ptl->nic, &ib_ptl->ptag)
!= OMPI_SUCCESS) {
return OMPI_ERROR;
}
/* Get the properties of the HCA,
* LID etc. are part of the properties */
if(mca_ptl_ib_query_hca_prop(ib_ptl->nic, &ib_ptl->port)
!= OMPI_SUCCESS) {
return OMPI_ERROR;
}
/* Create Completion Q */
/* We use a single completion Q for sends & recvs
* This saves us overhead of polling 2 separate Qs */
if(mca_ptl_ib_create_cq(ib_ptl->nic, &ib_ptl->cq_hndl)
!= OMPI_SUCCESS) {
return OMPI_ERROR;
}
/* Attach asynchronous handler */
if(mca_ptl_ib_set_async_handler(ib_ptl->nic,
&ib_ptl->async_handler)
!= OMPI_SUCCESS) {
return OMPI_ERROR;
}
/* initialize memory region registry */
OBJ_CONSTRUCT(&ib_ptl->mem_registry, mca_ptl_ib_mem_registry_t);
mca_ptl_ib_mem_registry_init(&ib_ptl->mem_registry, ib_ptl);
return OMPI_SUCCESS;
}
int mca_ptl_ib_qp_init(VAPI_hca_hndl_t nic,
VAPI_qp_hndl_t qp_hndl,
VAPI_qp_num_t remote_qp,
IB_lid_t remote_lid)
{
VAPI_ret_t ret;
VAPI_qp_attr_t qp_attr;
VAPI_qp_attr_mask_t qp_attr_mask;
VAPI_qp_cap_t qp_cap;
/* Modifying QP to INIT */
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
qp_attr.qp_state = VAPI_INIT;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
qp_attr.pkey_ix = DEFAULT_PKEY_IX;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);
qp_attr.port = DEFAULT_PORT;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PORT);
qp_attr.remote_atomic_flags = VAPI_EN_REM_WRITE | VAPI_EN_REM_READ;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_REMOTE_ATOMIC_FLAGS);
ret = VAPI_modify_qp(nic, qp_hndl,
&qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_modify_qp");
return OMPI_ERROR;
}
D_PRINT("Modified to init..Qp %d", qp_hndl);
/********************** INIT --> RTR ************************/
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
qp_attr.qp_state = VAPI_RTR;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
qp_attr.qp_ous_rd_atom = DEFAULT_QP_OUS_RD_ATOM;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_OUS_RD_ATOM);
qp_attr.path_mtu = DEFAULT_MTU;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PATH_MTU);
qp_attr.rq_psn = DEFAULT_PSN;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RQ_PSN);
qp_attr.pkey_ix = DEFAULT_PKEY_IX;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);
qp_attr.min_rnr_timer = DEFAULT_MIN_RNR_TIMER;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_MIN_RNR_TIMER);
qp_attr.av.sl = DEFAULT_SERVICE_LEVEL;
qp_attr.av.grh_flag = FALSE;
qp_attr.av.static_rate = DEFAULT_STATIC_RATE;
qp_attr.av.src_path_bits = DEFAULT_SRC_PATH_BITS;
qp_attr.dest_qp_num = remote_qp;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_DEST_QP_NUM);
qp_attr.av.dlid = remote_lid;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_AV);
ret = VAPI_modify_qp(nic, qp_hndl,
&qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_modify_qp");
return OMPI_ERROR;
}
D_PRINT("Modified to RTR..Qp %d", qp_hndl);
/************** RTS *******************/
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
qp_attr.qp_state = VAPI_RTS;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
qp_attr.sq_psn = DEFAULT_PSN;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_SQ_PSN);
qp_attr.timeout = DEFAULT_TIME_OUT;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_TIMEOUT);
qp_attr.retry_count = DEFAULT_RETRY_COUNT;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RETRY_COUNT);
qp_attr.rnr_retry = DEFAULT_RNR_RETRY;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RNR_RETRY);
qp_attr.ous_dst_rd_atom = DEFAULT_MAX_RDMA_DST_OPS;
QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_OUS_DST_RD_ATOM);
ret = VAPI_modify_qp(nic, qp_hndl,
&qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_modify_qp");
return OMPI_ERROR;
}
D_PRINT("Modified to RTS..Qp %d", qp_hndl);
return OMPI_SUCCESS;
}
int mca_ptl_ib_register_mem(VAPI_hca_hndl_t nic, VAPI_pd_hndl_t ptag,
void* buf, int len, vapi_memhandle_t* memhandle)
{
VAPI_ret_t ret;
VAPI_mrw_t mr_in, mr_out;
vapi_memhandle_t mem_handle;
mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE;
mr_in.l_key = 0;
mr_in.r_key = 0;
mr_in.pd_hndl = ptag;
mr_in.size = len;
mr_in.start = (VAPI_virt_addr_t) (MT_virt_addr_t) buf;
mr_in.type = VAPI_MR;
ret = VAPI_register_mr(nic, &mr_in, &mem_handle.hndl, &mr_out);
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_register_mr");
return OMPI_ERROR;
}
mem_handle.lkey = mr_out.l_key;
mem_handle.rkey = mr_out.r_key;
memhandle->lkey = mem_handle.lkey;
memhandle->rkey = mem_handle.rkey;
/* D_PRINT("addr = %p, lkey = %d\n", buf, memhandle->lkey); */
memhandle->hndl = mem_handle.hndl;
return OMPI_SUCCESS;
}
int mca_ptl_ib_post_send(mca_ptl_ib_module_t *ib_ptl,
mca_ptl_ib_peer_t *peer,
ib_buffer_t *ib_buf, void* addr)
{
VAPI_ret_t ret;
int msg_len = ib_buf->desc.sg_entry.len;
IB_PREPARE_SEND_DESC(ib_buf, (peer->rem_qp_num),
msg_len, addr);
/* TODO - get this from NIC properties */
if(msg_len < 128) {
ret = EVAPI_post_inline_sr(ib_ptl->nic,
peer->lcl_qp_hndl,
&ib_buf->desc.sr);
} else {
ret = VAPI_post_sr(ib_ptl->nic,
peer->lcl_qp_hndl,
&ib_buf->desc.sr);
}
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_post_sr");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
void mca_ptl_ib_buffer_repost(VAPI_hca_hndl_t nic, void* addr)
{
VAPI_ret_t ret;
ib_buffer_t *ib_buf = (ib_buffer_t*)addr;
IB_PREPARE_RECV_DESC(ib_buf);
ret = VAPI_post_rr(nic, ib_buf->qp_hndl, &(ib_buf->desc.rr));
if(VAPI_OK != ret) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_post_rr");
ompi_output(0, "Error in buffer reposting");
}
}
void mca_ptl_ib_prepare_ack(mca_ptl_ib_module_t *ib_ptl,
void* addr_to_reg, int len_to_reg,
void* ack_buf, int* len_added)
{
mca_ptl_ib_mem_registry_info_t *info =
mca_ptl_ib_register_mem_with_registry(ib_ptl,
addr_to_reg, (size_t)len_to_reg);
if(NULL == info) {
ompi_output(0, "Error in registering");
}
A_PRINT("Sending Remote key : %d", info->reply.r_key);
memcpy(ack_buf,(void*) &(info->reply.r_key), sizeof(VAPI_rkey_t));
*len_added = sizeof(VAPI_rkey_t);
}
int mca_ptl_ib_rdma_write(mca_ptl_ib_module_t *ib_ptl,
mca_ptl_ib_peer_t *peer, ib_buffer_t *ib_buf,
void* send_buf, size_t send_len, void* remote_buf,
VAPI_rkey_t remote_key, void* id_buf)
{
VAPI_ret_t ret;
mca_ptl_ib_mem_registry_info_t *info =
mca_ptl_ib_register_mem_with_registry(ib_ptl,
send_buf, send_len);
if (NULL == info) {
return OMPI_ERROR;
}
/* Prepare descriptor */
IB_PREPARE_RDMA_W_DESC(ib_buf, (peer->rem_qp_num),
send_len, send_buf, (info->reply.l_key), remote_key,
id_buf, remote_buf);
ret = VAPI_post_sr(ib_ptl->nic,
peer->lcl_qp_hndl,
&ib_buf->desc.sr);
if(ret != VAPI_OK) {
MCA_PTL_IB_VAPI_RET(ret, "VAPI_post_sr");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,217 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PTL_IB_PRIV_H
#define MCA_PTL_IB_PRIV_H
#include <stdint.h>
#include "class/ompi_free_list.h"
#include "ptl_ib_vapi.h"
#include "ptl_ib_memory.h"
#define NUM_IB_SEND_BUF (1)
#define NUM_IB_RECV_BUF (4)
#define MCA_PTL_IB_FIRST_FRAG_SIZE (65536)
typedef enum {
IB_RECV,
IB_SEND
} IB_wr_t;
typedef enum {
IB_COMP_ERROR,
IB_COMP_RECV,
IB_COMP_SEND,
IB_COMP_RDMA_W,
IB_COMP_NOTHING
} IB_comp_t;
struct vapi_memhandle_t {
VAPI_mr_hndl_t hndl;
/* Memory region handle */
VAPI_lkey_t lkey;
/* Local key to registered memory, needed for
* posting send/recv requests */
VAPI_rkey_t rkey;
/* Remote key to registered memory, need to send this
* to remote processes for incoming RDMA ops */
};
typedef struct vapi_memhandle_t vapi_memhandle_t;
struct vapi_descriptor_t {
union {
VAPI_rr_desc_t rr;
/* Receive descriptor */
VAPI_sr_desc_t sr;
/* Send descriptor */
};
VAPI_sg_lst_entry_t sg_entry;
/* Scatter/Gather entry */
};
typedef struct vapi_descriptor_t vapi_descriptor_t;
struct ib_buffer_t {
ompi_list_item_t super;
vapi_descriptor_t desc;
/* Descriptor of the buffer */
vapi_memhandle_t hndl;
/* Buffer handle */
char buf[MCA_PTL_IB_FIRST_FRAG_SIZE];
/* Buffer space */
VAPI_qp_hndl_t qp_hndl;
/* Queue pair used for this IB buffer */
};
typedef struct ib_buffer_t ib_buffer_t;
#define DUMP_IB_STATE(ib_ptl) { \
ompi_output(0, "[%s:%d] ", __FILE__, __LINE__); \
ompi_output(0, "Dumping IB state"); \
ompi_output(0, "HCA ID : %s", ib_ptl->hca_id); \
ompi_output(0, "LID : %d", ib_ptl->port.lid); \
ompi_output(0, "HCA handle : %d", ib_ptl->nic); \
ompi_output(0, "Protection Domain: %d", ib_ptl->ptag); \
ompi_output(0, "Comp Q handle : %d", ib_ptl->cq_hndl); \
ompi_output(0, "Async hndl : %d", ib_ptl->async_handler); \
}
#define IB_PREPARE_RECV_DESC(ib_buf_ptr) { \
ib_buf_ptr->desc.rr.comp_type = VAPI_SIGNALED; \
ib_buf_ptr->desc.rr.opcode = VAPI_RECEIVE; \
ib_buf_ptr->desc.rr.id = (VAPI_virt_addr_t) \
(MT_virt_addr_t) ib_buf_ptr; \
ib_buf_ptr->desc.rr.sg_lst_len = 1; \
ib_buf_ptr->desc.rr.sg_lst_p = &ib_buf_ptr->desc.sg_entry; \
ib_buf_ptr->desc.sg_entry.len = MCA_PTL_IB_FIRST_FRAG_SIZE; \
ib_buf_ptr->desc.sg_entry.lkey = ib_buf_ptr->hndl.lkey; \
ib_buf_ptr->desc.sg_entry.addr = (VAPI_virt_addr_t) \
(MT_virt_addr_t) ib_buf_ptr->buf; \
}
#define IB_PREPARE_SEND_DESC(ib_buf_ptr, qp, msg_len, \
id_buf) { \
ib_buf_ptr->desc.sr.comp_type = VAPI_SIGNALED; \
ib_buf_ptr->desc.sr.opcode = VAPI_SEND; \
ib_buf_ptr->desc.sr.remote_qkey = 0; \
ib_buf_ptr->desc.sr.remote_qp = qp; \
ib_buf_ptr->desc.sr.id = (VAPI_virt_addr_t) \
(MT_virt_addr_t) id_buf; \
ib_buf_ptr->desc.sr.sg_lst_len = 1; \
ib_buf_ptr->desc.sr.sg_lst_p = &ib_buf_ptr->desc.sg_entry; \
ib_buf_ptr->desc.sg_entry.len = msg_len; \
ib_buf_ptr->desc.sg_entry.lkey = ib_buf_ptr->hndl.lkey; \
ib_buf_ptr->desc.sg_entry.addr = (VAPI_virt_addr_t) \
(MT_virt_addr_t) ib_buf_ptr->buf; \
}
#define IB_SET_REMOTE_QP_NUM(ib_buf_ptr, qp) { \
ib_buf_ptr->desc.sr.remote_qp = qp; \
}
#define IB_SET_SEND_DESC_ID(ib_buf_ptr, addr) { \
ib_buf_ptr->desc.sr.id = (VAPI_virt_addr_t) \
(MT_virt_addr_t) addr; \
}
#define IB_SET_SEND_DESC_LEN(ib_buf_ptr, msg_len) { \
ib_buf_ptr->desc.sg_entry.len = msg_len; \
}
#define IB_PREPARE_RDMA_W_DESC(ib_buf_ptr, qp, \
msg_len, user_buf, local_key, remote_key, \
id_buf, remote_buf) { \
ib_buf_ptr->desc.sr.comp_type = VAPI_SIGNALED; \
ib_buf_ptr->desc.sr.opcode = VAPI_RDMA_WRITE; \
ib_buf_ptr->desc.sr.remote_qkey = 0; \
ib_buf_ptr->desc.sr.remote_qp = qp; \
ib_buf_ptr->desc.sr.id = (VAPI_virt_addr_t) \
(MT_virt_addr_t) id_buf; \
ib_buf_ptr->desc.sr.sg_lst_len = 1; \
ib_buf_ptr->desc.sr.sg_lst_p = &ib_buf_ptr->desc.sg_entry; \
ib_buf_ptr->desc.sg_entry.len = msg_len; \
ib_buf_ptr->desc.sg_entry.lkey = local_key; \
ib_buf_ptr->desc.sg_entry.addr = (VAPI_virt_addr_t) \
(MT_virt_addr_t) user_buf; \
ib_buf_ptr->desc.sr.remote_addr = (VAPI_virt_addr_t) \
(MT_virt_addr_t) remote_buf; \
ib_buf_ptr->desc.sr.r_key = remote_key; \
}
struct mca_ptl_ib_module_t;
struct mca_ptl_base_peer_t;
int mca_ptl_ib_module_init(struct mca_ptl_ib_module_t*);
int mca_ptl_ib_register_mem(
VAPI_hca_hndl_t nic,
VAPI_pd_hndl_t ptag,
void* buf,
int len,
vapi_memhandle_t* memhandle);
int mca_ptl_ib_post_send(
struct mca_ptl_ib_module_t *ib_module,
struct mca_ptl_base_peer_t *peer,
ib_buffer_t *ib_buf, void*);
void mca_ptl_ib_buffer_repost(
VAPI_hca_hndl_t nic,
void* addr);
void mca_ptl_ib_prepare_ack(
struct mca_ptl_ib_module_t *ib_module,
void* addr_to_reg, int len_to_reg,
void* ack_buf, int* len_added);
int mca_ptl_ib_rdma_write(
struct mca_ptl_ib_module_t *ib_module,
struct mca_ptl_base_peer_t *peer,
ib_buffer_t *ib_buf,
void* send_buf,
size_t send_len,
void* remote_buf,
VAPI_rkey_t remote_key, void*);
int mca_ptl_ib_create_qp(VAPI_hca_hndl_t nic,
VAPI_pd_hndl_t ptag,
VAPI_cq_hndl_t recv_cq,
VAPI_cq_hndl_t send_cq,
VAPI_qp_hndl_t* qp_hndl,
VAPI_qp_prop_t* qp_prop,
int transport_type);
int mca_ptl_ib_qp_init(
VAPI_hca_hndl_t nic,
VAPI_qp_hndl_t qp_hndl,
VAPI_qp_num_t remote_qp,
IB_lid_t remote_lid);
#endif /* MCA_PTL_IB_PRIV_H */

Просмотреть файл

@ -1,164 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "class/ompi_hash_table.h"
#include "mca/base/mca_base_module_exchange.h"
#include "ptl_ib.h"
#include "ptl_ib_vapi.h"
#include "ptl_ib_proc.h"
static void mca_ptl_ib_proc_construct(mca_ptl_ib_proc_t* proc);
static void mca_ptl_ib_proc_destruct(mca_ptl_ib_proc_t* proc);
OBJ_CLASS_INSTANCE(mca_ptl_ib_proc_t,
ompi_list_item_t, mca_ptl_ib_proc_construct,
mca_ptl_ib_proc_destruct);
void mca_ptl_ib_proc_construct(mca_ptl_ib_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_addr_count = 0;
proc->proc_peers = 0;
proc->proc_peer_count = 0;
OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t);
/* add to list of all proc instance */
OMPI_THREAD_LOCK(&mca_ptl_ib_component.ib_lock);
ompi_list_append(&mca_ptl_ib_component.ib_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
}
/*
* Cleanup ib proc instance
*/
void mca_ptl_ib_proc_destruct(mca_ptl_ib_proc_t* proc)
{
/* remove from list of all proc instances */
OMPI_THREAD_LOCK(&mca_ptl_ib_component.ib_lock);
ompi_list_remove_item(&mca_ptl_ib_component.ib_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
/* release resources */
if(NULL != proc->proc_peers) {
free(proc->proc_peers);
}
}
/*
* Look for an existing IB process instances based on the associated
* ompi_proc_t instance.
*/
static mca_ptl_ib_proc_t* mca_ptl_ib_proc_lookup_ompi(ompi_proc_t* ompi_proc)
{
mca_ptl_ib_proc_t* ib_proc;
OMPI_THREAD_LOCK(&mca_ptl_ib_component.ib_lock);
for(ib_proc = (mca_ptl_ib_proc_t*)
ompi_list_get_first(&mca_ptl_ib_component.ib_procs);
ib_proc != (mca_ptl_ib_proc_t*)
ompi_list_get_end(&mca_ptl_ib_component.ib_procs);
ib_proc = (mca_ptl_ib_proc_t*)ompi_list_get_next(ib_proc)) {
if(ib_proc->proc_ompi == ompi_proc) {
OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
return ib_proc;
}
}
OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
return NULL;
}
/*
* Create a IB process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_ptl_ib_proc_t instance. We cache
* additional data (specifically the list of mca_ptl_ib_peer_t instances,
* and published addresses) associated w/ a given destination on this
* datastructure.
*/
mca_ptl_ib_proc_t* mca_ptl_ib_proc_create(ompi_proc_t* ompi_proc)
{
mca_ptl_ib_proc_t* module_proc = NULL;
/* Check if we have already created a IB proc
* structure for this ompi process */
module_proc = mca_ptl_ib_proc_lookup_ompi(ompi_proc);
if(module_proc != NULL) {
/* Gotcha! */
return module_proc;
}
/* Oops! First time, gotta create a new IB proc
* out of the ompi_proc ... */
module_proc = OBJ_NEW(mca_ptl_ib_proc_t);
/* Initialize number of peer */
module_proc->proc_peer_count = 0;
module_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary
* size) to represent the proc */
module_proc->proc_guid = ompi_proc->proc_name;
/* IB module doesn't have addresses exported at
* initialization, so the addr_count is set to one. */
module_proc->proc_addr_count = 1;
/* XXX: Right now, there can be only 1 peer associated
* with a proc. Needs a little bit change in
* mca_ptl_ib_proc_t to allow on demand increasing of
* number of peers for this proc */
module_proc->proc_peers = (mca_ptl_base_peer_t**)
malloc(module_proc->proc_addr_count * sizeof(mca_ptl_base_peer_t*));
if(NULL == module_proc->proc_peers) {
OBJ_RELEASE(module_proc);
return NULL;
}
return module_proc;
}
/*
* Note that this routine must be called with the lock on the process
* already held. Insert a ptl instance into the proc array and assign
* it an address.
*/
int mca_ptl_ib_proc_insert(mca_ptl_ib_proc_t* module_proc,
mca_ptl_base_peer_t* module_peer)
{
/* insert into peer array */
module_peer->peer_proc = module_proc;
module_proc->proc_peers[module_proc->proc_peer_count++] = module_peer;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,199 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "ptl_ib.h"
#include "ptl_ib_peer.h"
#include "ptl_ib_recvfrag.h"
#include "ptl_ib_sendfrag.h"
#include "ptl_ib_memory.h"
static void mca_ptl_ib_recv_frag_construct(mca_ptl_ib_recv_frag_t* frag);
static void mca_ptl_ib_recv_frag_destruct(mca_ptl_ib_recv_frag_t* frag);
OBJ_CLASS_INSTANCE(mca_ptl_ib_recv_frag_t,
mca_ptl_base_recv_frag_t,
mca_ptl_ib_recv_frag_construct,
mca_ptl_ib_recv_frag_destruct);
/*
* IB fragment constructor
*/
static void mca_ptl_ib_recv_frag_construct(mca_ptl_ib_recv_frag_t* frag)
{
}
/*
* IB fragment destructor
*/
static void mca_ptl_ib_recv_frag_destruct(mca_ptl_ib_recv_frag_t* frag)
{
}
void
mca_ptl_ib_recv_frag_done (
mca_ptl_base_header_t *header,
mca_ptl_base_recv_frag_t* frag,
mca_ptl_base_recv_request_t *request)
{
D_PRINT("");
frag->frag_base.frag_owner->ptl_recv_progress (
frag->frag_base.frag_owner,
request,
frag->frag_base.frag_size,
frag->frag_base.frag_size);
/* Return recv frag to free list */
OMPI_FREE_LIST_RETURN(&mca_ptl_ib_component.ib_recv_frags,
(ompi_list_item_t*)frag);
}
static void mca_ptl_ib_data_frag(
mca_ptl_ib_module_t *ib_ptl,
mca_ptl_base_header_t *hdr)
{
bool matched;
int rc;
ompi_list_item_t *item;
mca_ptl_ib_recv_frag_t *recv_frag;
size_t hdr_length;
OMPI_FREE_LIST_WAIT (&mca_ptl_ib_component.ib_recv_frags, item, rc);
recv_frag = (mca_ptl_ib_recv_frag_t *) item;
recv_frag->super.frag_base.frag_owner = &ib_ptl->super;
recv_frag->super.frag_base.frag_peer = NULL;
recv_frag->super.frag_request = NULL;
recv_frag->super.frag_is_buffered = false;
/* Copy the header, mca_ptl_base_match() */
recv_frag->super.frag_base.frag_header = *hdr;
switch(hdr->hdr_common.hdr_type) {
case MCA_PTL_HDR_TYPE_MATCH:
hdr_length = sizeof(mca_ptl_base_match_header_t);
recv_frag->super.frag_base.frag_size = hdr->hdr_match.hdr_msg_length;
break;
case MCA_PTL_HDR_TYPE_RNDV:
hdr_length = sizeof(mca_ptl_base_rendezvous_header_t);
recv_frag->super.frag_base.frag_size = hdr->hdr_rndv.hdr_frag_length;
break;
}
/* Taking the data starting point be default */
recv_frag->super.frag_base.frag_addr = (char *) hdr + hdr_length;
/* match against preposted requests */
matched = ib_ptl->super.ptl_match(
recv_frag->super.frag_base.frag_owner,
&recv_frag->super,
&recv_frag->super.frag_base.frag_header.hdr_match);
if (!matched) {
memcpy (recv_frag->unex_buf, (char *) hdr + hdr_length, recv_frag->super.frag_base.frag_size);
recv_frag->super.frag_is_buffered = true;
recv_frag->super.frag_base.frag_addr = recv_frag->unex_buf;
}
}
static void mca_ptl_ib_ctrl_frag(
mca_ptl_ib_module_t *ib_ptl,
mca_ptl_base_header_t *header)
{
mca_ptl_ib_send_frag_t *send_frag;
mca_ptl_base_send_request_t *req;
void *data_ptr;
send_frag = (mca_ptl_ib_send_frag_t *)
header->hdr_ack.hdr_src_ptr.pval;
req = (mca_ptl_base_send_request_t *)
send_frag->frag_send.frag_request;
req->req_peer_match = header->hdr_ack.hdr_dst_match;
req->req_peer_addr = header->hdr_ack.hdr_dst_addr;
req->req_peer_size = header->hdr_ack.hdr_dst_size;
/* Locate data in the ACK buffer */
data_ptr = (void*)
((char*) header + sizeof(mca_ptl_base_ack_header_t));
/* Copy over data to request buffer */
memcpy(&((mca_ptl_ib_send_request_t *) req)->req_key,
data_ptr, sizeof(VAPI_rkey_t));
/* Progress & release fragments */
mca_ptl_ib_send_frag_send_complete(ib_ptl, send_frag);
}
static void mca_ptl_ib_last_frag(mca_ptl_ib_module_t *ib_ptl,
mca_ptl_base_header_t *hdr)
{
mca_ptl_ib_fin_header_t *fin_hdr = (mca_ptl_ib_fin_header_t *)hdr;
mca_ptl_base_recv_request_t *request;
request = (mca_ptl_base_recv_request_t*) hdr->hdr_frag.hdr_dst_ptr.pval;
/* deregister memory if this is the last fragment */
if ((request->req_bytes_received + hdr->hdr_frag.hdr_frag_length) >=
request->req_recv.req_bytes_packed) {
mca_ptl_ib_deregister_mem_with_registry(ib_ptl,
fin_hdr->mr_addr.pval, (size_t)fin_hdr->mr_size);
}
ib_ptl->super.ptl_recv_progress (
&ib_ptl->super,
request,
hdr->hdr_frag.hdr_frag_length,
hdr->hdr_frag.hdr_frag_length);
}
/*
* Process incoming receive fragments
*
*/
void mca_ptl_ib_process_recv(mca_ptl_ib_module_t *ib_ptl, void* addr)
{
ib_buffer_t *ib_buf;
mca_ptl_base_header_t *header;
ib_buf = (ib_buffer_t *) addr;
header = (mca_ptl_base_header_t *) &ib_buf->buf[0];
switch(header->hdr_common.hdr_type) {
case MCA_PTL_HDR_TYPE_MATCH :
case MCA_PTL_HDR_TYPE_RNDV :
case MCA_PTL_HDR_TYPE_FRAG :
mca_ptl_ib_data_frag(ib_ptl, header);
break;
case MCA_PTL_HDR_TYPE_ACK :
mca_ptl_ib_ctrl_frag(ib_ptl, header);
break;
case MCA_PTL_HDR_TYPE_FIN :
A_PRINT("Fin");
mca_ptl_ib_last_frag(ib_ptl, header);
break;
default :
ompi_output(0, "Unknown fragment type");
break;
}
}

Просмотреть файл

@ -1,53 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PTL_IB_RECV_FRAG_H
#define MCA_PTL_IB_RECV_FRAG_H
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/ptl_base_recvfrag.h"
#define MCA_PTL_IB_UNEX_BUF_SIZE (4096)
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_ptl_ib_recv_frag_t);
/**
* IB received fragment derived type.
*/
struct mca_ptl_ib_recv_frag_t {
mca_ptl_base_recv_frag_t super;
/**< base receive fragment descriptor */
char unex_buf[MCA_PTL_IB_UNEX_BUF_SIZE];
/**< Unexpected buffer */
};
typedef struct mca_ptl_ib_recv_frag_t mca_ptl_ib_recv_frag_t;
struct mca_ptl_ib_module_t;
void mca_ptl_ib_recv_frag_done (mca_ptl_base_header_t*,
mca_ptl_base_recv_frag_t*, mca_ptl_base_recv_request_t*);
void mca_ptl_ib_process_recv(struct mca_ptl_ib_module_t* , void*);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,159 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/types.h"
#include "datatype/datatype.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "ptl_ib.h"
#include "ptl_ib_peer.h"
#include "ptl_ib_proc.h"
#include "ptl_ib_sendfrag.h"
#include "ptl_ib_priv.h"
#include "ptl_ib_memory.h"
static void mca_ptl_ib_send_frag_construct(mca_ptl_ib_send_frag_t* frag);
static void mca_ptl_ib_send_frag_destruct(mca_ptl_ib_send_frag_t* frag);
OBJ_CLASS_INSTANCE(mca_ptl_ib_send_frag_t,
mca_ptl_base_send_frag_t,
mca_ptl_ib_send_frag_construct,
mca_ptl_ib_send_frag_destruct);
/*
* Placeholders for send fragment constructor/destructors.
*/
static void mca_ptl_ib_send_frag_construct(mca_ptl_ib_send_frag_t* frag)
{
frag->frag_progressed = 0;
frag->frag_ack_pending = 0;
}
static void mca_ptl_ib_send_frag_destruct(mca_ptl_ib_send_frag_t* frag)
{
}
/*
* Allocate a IB send descriptor
*
*/
mca_ptl_ib_send_frag_t* mca_ptl_ib_alloc_send_frag(
mca_ptl_ib_module_t* ib_ptl,
mca_ptl_base_send_request_t* request)
{
ompi_free_list_t *flist = &ib_ptl->send_free;
ompi_list_item_t *item;
mca_ptl_ib_send_frag_t *ib_send_frag;
item = ompi_list_remove_first(&((flist)->super));
while(NULL == item) {
mca_ptl_tstamp_t tstamp = 0;
D_PRINT("Gone one NULL descriptor ... trying again");
mca_ptl_ib_component_progress(0);
item = ompi_list_remove_first (&((flist)->super));
}
ib_send_frag = (mca_ptl_ib_send_frag_t *)item;
return ib_send_frag;
}
int mca_ptl_ib_send_frag_register(mca_ptl_ib_module_t *ib_ptl)
{
int i, rc, num_send_frags;
ompi_list_item_t *item;
ompi_free_list_t *flist = &ib_ptl->send_free;
ib_buffer_t *ib_buf_ptr;
mca_ptl_ib_send_frag_t *ib_send_frag;
num_send_frags = ompi_list_get_size(&(flist->super));
item = ompi_list_get_first(&((flist)->super));
/* Register the buffers */
for(i = 0; i < num_send_frags;
item = ompi_list_get_next(item), i++) {
ib_send_frag = (mca_ptl_ib_send_frag_t *) item;
ib_send_frag->frag_progressed = 0;
ib_buf_ptr = (ib_buffer_t *) &ib_send_frag->ib_buf;
rc = mca_ptl_ib_register_mem(ib_ptl->nic, ib_ptl->ptag,
(void*) ib_buf_ptr->buf,
MCA_PTL_IB_FIRST_FRAG_SIZE,
&ib_buf_ptr->hndl);
if(rc != OMPI_SUCCESS) {
return OMPI_ERROR;
}
IB_PREPARE_SEND_DESC(ib_buf_ptr, 0,
MCA_PTL_IB_FIRST_FRAG_SIZE, ib_buf_ptr);
}
return OMPI_SUCCESS;
}
/*
* Process send completions
*
*/
void mca_ptl_ib_send_frag_send_complete(mca_ptl_ib_module_t *ib_ptl, mca_ptl_ib_send_frag_t* sendfrag)
{
mca_ptl_base_header_t *hdr;
mca_ptl_base_send_request_t* req = sendfrag->frag_send.frag_request;
hdr = (mca_ptl_base_header_t *) sendfrag->ib_buf.buf;
switch(hdr->hdr_common.hdr_type) {
case MCA_PTL_HDR_TYPE_MATCH:
if (0 == (hdr->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK)
|| req->req_peer_match.pval != NULL) {
ib_ptl->super.ptl_send_progress(&ib_ptl->super,
sendfrag->frag_send.frag_request,
hdr->hdr_rndv.hdr_frag_length);
if(req->req_cached == false) {
OMPI_FREE_LIST_RETURN(&ib_ptl->send_free,
((ompi_list_item_t *) sendfrag));
}
}
break;
case MCA_PTL_HDR_TYPE_ACK:
OMPI_FREE_LIST_RETURN(&ib_ptl->send_free,
((ompi_list_item_t *) sendfrag));
break;
case MCA_PTL_HDR_TYPE_FIN:
ib_ptl->super.ptl_send_progress(&ib_ptl->super,
sendfrag->frag_send.frag_request,
hdr->hdr_frag.hdr_frag_length);
OMPI_FREE_LIST_RETURN(&ib_ptl->send_free,
((ompi_list_item_t *) sendfrag));
break;
}
}

Просмотреть файл

@ -1,70 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PTL_IB_SEND_FRAG_H
#define MCA_PTL_IB_SEND_FRAG_H
#include "ompi_config.h"
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "mca/ptl/base/ptl_base_sendfrag.h"
#include "ptl_ib_priv.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_ptl_ib_send_frag_t);
/**
* IB send fragment derived type.
*/
struct mca_ptl_ib_send_frag_t {
mca_ptl_base_send_frag_t frag_send;
/**< base send fragment descriptor */
ib_buffer_t ib_buf;
/**< IB buffer attached to this frag */
volatile int frag_progressed;
bool frag_ack_pending;
};
typedef struct mca_ptl_ib_send_frag_t mca_ptl_ib_send_frag_t;
struct mca_ptl_ib_module_t;
/**
* Allocate a fragment descriptor.
*
* request (IN) PML base send request
* ptl (IN) PTL module
* RETURN mca_ptl_ib_send_frag_t*
*
*/
mca_ptl_ib_send_frag_t* mca_ptl_ib_alloc_send_frag(
struct mca_ptl_ib_module_t* ib_ptl,
mca_ptl_base_send_request_t* request);
int mca_ptl_ib_send_frag_register(struct mca_ptl_ib_module_t *ptl);
void mca_ptl_ib_send_frag_send_complete(struct mca_ptl_ib_module_t *ptl, mca_ptl_ib_send_frag_t*);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,51 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/types.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "ptl_ib.h"
#include "ptl_ib_sendreq.h"
static void mca_ptl_ib_send_request_construct(mca_ptl_ib_send_request_t*);
static void mca_ptl_ib_send_request_destruct(mca_ptl_ib_send_request_t*);
OBJ_CLASS_INSTANCE(mca_ptl_ib_send_request_t,
mca_ptl_base_send_request_t,
mca_ptl_ib_send_request_construct,
mca_ptl_ib_send_request_destruct);
void mca_ptl_ib_send_request_construct(mca_ptl_ib_send_request_t* request)
{
A_PRINT("Request Construct");
request->req_frag = NULL;
/*
OBJ_CONSTRUCT(&request->req_frag, mca_ptl_ib_send_frag_t);
*/
}
void mca_ptl_ib_send_request_destruct(mca_ptl_ib_send_request_t* request)
{
D_PRINT("\n");
/*
OBJ_DESTRUCT(&request->req_frag);
*/
}

Просмотреть файл

@ -1,49 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PTL_IB_SEND_REQUEST_H
#define MCA_PTL_IB_SEND_REQUEST_H
#include "ompi_config.h"
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "ptl_ib_sendfrag.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_ptl_ib_send_request_t);
/**
* IB send request derived type. The send request contains both the
* base send request, and space for the first IB send fragment
* descriptor.
* This avoids the overhead of a second allocation for the initial send
* fragment on every send request.
*/
struct mca_ptl_ib_send_request_t {
mca_ptl_base_send_request_t super;
mca_ptl_ib_send_frag_t *req_frag;
VAPI_rkey_t req_key;
};
typedef struct mca_ptl_ib_send_request_t mca_ptl_ib_send_request_t;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,90 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PTL_IB_VAPI_H
#define MCA_PTL_IB_VAPI_H
#include <vapi.h>
#include <mtl_common.h>
#include <vapi_common.h>
/* HACK: Alert, these are dumb defines,
* all this stuff should be runtime. Ignoring for now.
*/
#define DEFAULT_PORT (1)
#define DEFAULT_CQ_SIZE (40000)
#define DEFAULT_WQ_SIZE (10000)
#define DEFAULT_SG_LIST (1)
#define DEFAULT_PKEY_IX (0)
#define DEFAULT_PSN (0)
#define DEFAULT_QP_OUS_RD_ATOM (1)
#define DEFAULT_MTU (MTU1024)
#define DEFAULT_MIN_RNR_TIMER (5)
#define DEFAULT_TIME_OUT (10)
#define DEFAULT_RETRY_COUNT (7)
#define DEFAULT_RNR_RETRY (7)
#define DEFAULT_MAX_RDMA_DST_OPS (16)
#define DEFAULT_TRAFFIC_CLASS (0)
#define DEFAULT_HOP_LIMIT (63)
#define DEFAULT_FLOW_LABEL (0)
#define DEFAULT_SERVICE_LEVEL (0)
#define DEFAULT_STATIC_RATE (0)
#define DEFAULT_SRC_PATH_BITS (0)
/* This is a convinence macro.
*
* ret : The value to return if call failed
* vapi_ret : The value which was returned from the last VAPI call
* func_name : The VAPI function which was called
*/
#define MCA_PTL_IB_VAPI_RET(vapi_ret, func_name) { \
ompi_output(0,"[%s:%d] ", __FILE__, __LINE__); \
ompi_output(0,"%s : %s",func_name,VAPI_strerror(vapi_ret)); \
}
/* Debug Print */
#if 0
#define D_PRINT(fmt, args...) { \
ompi_output(0, "[%s:%d:%s] " fmt, __FILE__, __LINE__, __func__, \
##args); \
}
#else
#define D_PRINT(fmt, args...)
#endif
#if 0
#define A_PRINT(fmt, args...) { \
ompi_output(0, "[%s:%d:%s] " fmt, __FILE__, __LINE__, __func__, \
##args); \
}
#else
#define A_PRINT(fmt, args...)
#endif
#if 0
#define B_PRINT(fmt, args...) { \
ompi_output(0, "[%s:%d:%s] " fmt, __FILE__, __LINE__, __func__, \
##args); \
}
#else
#define B_PRINT(fmt, args...)
#endif
#endif