Working version of openib btl ;-)
Fixed receive descriptor counts that limited mvapi and openib to 2 procs. Begin porting error messages to use the BTL_ERROR macro. This commit was SVN r6554.
Этот коммит содержится в:
родитель
acb9365793
Коммит
2f67ab82bb
24
ompi/mca/btl/base/btl_base_error.h
Обычный файл
24
ompi/mca/btl/base/btl_base_error.h
Обычный файл
@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_BASE_ERROR_H
|
||||
#define MCA_BTL_BASE_ERROR_H
|
||||
|
||||
#define BTL_ERROR(fmt, args...) { \
|
||||
opal_output(0, "[%s:%d:%d " fmt, __FILE__, __LINE__, __func__, ##args); \
|
||||
}
|
||||
|
||||
#endif
|
@ -828,5 +828,6 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -157,11 +157,6 @@ struct mca_btl_mvapi_module_t {
|
||||
|
||||
mca_mpool_base_module_t* ib_pool; /**< ib memory pool */
|
||||
|
||||
|
||||
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
|
||||
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
|
||||
|
||||
|
||||
VAPI_rr_desc_t* rr_desc_post;
|
||||
|
||||
/**< an array to allow posting of rr in one swoop */
|
||||
|
@ -487,13 +487,19 @@ int mca_btl_mvapi_component_progress()
|
||||
{
|
||||
uint32_t i;
|
||||
int count = 0;
|
||||
mca_btl_mvapi_frag_t* frag;
|
||||
mca_btl_mvapi_frag_t* frag;
|
||||
mca_btl_mvapi_endpoint_t* endpoint;
|
||||
/* Poll for completions */
|
||||
for(i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++) {
|
||||
VAPI_ret_t ret;
|
||||
VAPI_wc_desc_t comp;
|
||||
mca_btl_mvapi_module_t* mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
||||
|
||||
|
||||
/* we have two completion queues, one for "high" priority and one for "low".
|
||||
* we will check the high priority and process them until there are none left.
|
||||
* note that low priority messages are only processed one per progress call.
|
||||
*/
|
||||
do{
|
||||
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_high, &comp);
|
||||
if(VAPI_OK == ret) {
|
||||
@ -504,7 +510,7 @@ int mca_btl_mvapi_component_progress()
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Handle n/w completions */
|
||||
/* Handle work completions */
|
||||
switch(comp.opcode) {
|
||||
case VAPI_CQE_RQ_RDMA_WITH_IMM:
|
||||
if(comp.imm_data_valid){
|
||||
@ -515,7 +521,7 @@ int mca_btl_mvapi_component_progress()
|
||||
case VAPI_CQE_SQ_RDMA_WRITE:
|
||||
case VAPI_CQE_SQ_SEND_DATA :
|
||||
|
||||
/* Process a completed send */
|
||||
/* Process a completed send or an rdma write */
|
||||
frag = (mca_btl_mvapi_frag_t*) comp.id;
|
||||
frag->rc = OMPI_SUCCESS;
|
||||
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||
@ -524,15 +530,18 @@ int mca_btl_mvapi_component_progress()
|
||||
|
||||
case VAPI_CQE_RQ_SEND_DATA:
|
||||
|
||||
DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
|
||||
/* Process a RECV */
|
||||
DEBUG_OUT("Got an recv completion" );
|
||||
frag = (mca_btl_mvapi_frag_t*) comp.id;
|
||||
endpoint = (mca_btl_endpoint_t*) frag->endpoint;
|
||||
|
||||
frag->rc=OMPI_SUCCESS;
|
||||
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_eager), (opal_list_item_t*) frag);
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->rr_posted_high, -1);
|
||||
OPAL_THREAD_ADD32(&endpoint->rr_posted_high, -1);
|
||||
|
||||
mca_btl_mvapi_endpoint_post_rr(((mca_btl_mvapi_frag_t*)comp.id)->endpoint, 0);
|
||||
|
||||
@ -540,7 +549,7 @@ int mca_btl_mvapi_component_progress()
|
||||
break;
|
||||
|
||||
default:
|
||||
opal_output(0, "Errorneous network completion");
|
||||
opal_output(0, "Unhandled work completion opcode is %d", comp.opcode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -570,15 +579,17 @@ int mca_btl_mvapi_component_progress()
|
||||
|
||||
case VAPI_CQE_RQ_SEND_DATA:
|
||||
|
||||
|
||||
DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
|
||||
frag = (mca_btl_mvapi_frag_t*) comp.id;
|
||||
endpoint = (mca_btl_endpoint_t*) frag->endpoint;
|
||||
frag->rc=OMPI_SUCCESS;
|
||||
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_max), (opal_list_item_t*) frag);
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->rr_posted_low, -1);
|
||||
OPAL_THREAD_ADD32(&endpoint->rr_posted_low, -1);
|
||||
|
||||
|
||||
mca_btl_mvapi_endpoint_post_rr(((mca_btl_mvapi_frag_t*)comp.id)->endpoint, 0);
|
||||
|
@ -114,6 +114,10 @@ static void mca_btl_mvapi_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
||||
OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&endpoint->pending_send_frags, opal_list_t);
|
||||
|
||||
endpoint->rr_posted_high = 0;
|
||||
endpoint->rr_posted_low = 0;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -109,6 +109,10 @@ struct mca_btl_base_endpoint_t {
|
||||
VAPI_qp_prop_t lcl_qp_prop_low;
|
||||
/* Low priority local QP properties */
|
||||
|
||||
|
||||
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
|
||||
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
|
||||
|
||||
};
|
||||
|
||||
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||
@ -161,14 +165,14 @@ static inline int mca_btl_mvapi_endpoint_post_rr_sub(int cnt,
|
||||
static inline int mca_btl_mvapi_endpoint_post_rr( mca_btl_mvapi_endpoint_t * endpoint, int additional){
|
||||
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl;
|
||||
int rc;
|
||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock);
|
||||
OPAL_THREAD_LOCK(&endpoint->ib_lock);
|
||||
|
||||
if(mvapi_btl->rr_posted_high <= mca_btl_mvapi_component.ib_rr_buf_min+additional && mvapi_btl->rr_posted_high < mca_btl_mvapi_component.ib_rr_buf_max){
|
||||
if(endpoint->rr_posted_high <= mca_btl_mvapi_component.ib_rr_buf_min+additional && endpoint->rr_posted_high < mca_btl_mvapi_component.ib_rr_buf_max){
|
||||
|
||||
rc = mca_btl_mvapi_endpoint_post_rr_sub(mca_btl_mvapi_component.ib_rr_buf_max - mvapi_btl->rr_posted_high,
|
||||
rc = mca_btl_mvapi_endpoint_post_rr_sub(mca_btl_mvapi_component.ib_rr_buf_max - endpoint->rr_posted_high,
|
||||
endpoint,
|
||||
&mvapi_btl->recv_free_eager,
|
||||
&mvapi_btl->rr_posted_high,
|
||||
&endpoint->rr_posted_high,
|
||||
mvapi_btl->nic,
|
||||
endpoint->lcl_qp_hndl_high
|
||||
);
|
||||
@ -177,12 +181,12 @@ static inline int mca_btl_mvapi_endpoint_post_rr( mca_btl_mvapi_endpoint_t * end
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
if(mvapi_btl->rr_posted_low <= mca_btl_mvapi_component.ib_rr_buf_min+additional && mvapi_btl->rr_posted_low < mca_btl_mvapi_component.ib_rr_buf_max){
|
||||
if(endpoint->rr_posted_low <= mca_btl_mvapi_component.ib_rr_buf_min+additional && endpoint->rr_posted_low < mca_btl_mvapi_component.ib_rr_buf_max){
|
||||
|
||||
rc = mca_btl_mvapi_endpoint_post_rr_sub(mca_btl_mvapi_component.ib_rr_buf_max - mvapi_btl->rr_posted_low,
|
||||
rc = mca_btl_mvapi_endpoint_post_rr_sub(mca_btl_mvapi_component.ib_rr_buf_max - endpoint->rr_posted_low,
|
||||
endpoint,
|
||||
&mvapi_btl->recv_free_max,
|
||||
&mvapi_btl->rr_posted_low,
|
||||
&endpoint->rr_posted_low,
|
||||
mvapi_btl->nic,
|
||||
endpoint->lcl_qp_hndl_low
|
||||
);
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "opal/util/if.h"
|
||||
#include "mca/pml/pml.h"
|
||||
#include "mca/btl/btl.h"
|
||||
|
||||
#include "mca/btl/base/btl_base_error.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib_proc.h"
|
||||
@ -127,6 +127,10 @@ int mca_btl_openib_del_procs(struct mca_btl_base_module_t* btl,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
*Register callback function to support send/recv semantics
|
||||
*/
|
||||
|
||||
int mca_btl_openib_register(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
@ -179,7 +183,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Return a segment
|
||||
*
|
||||
*/
|
||||
int mca_btl_openib_free(
|
||||
@ -189,16 +193,16 @@ int mca_btl_openib_free(
|
||||
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)des;
|
||||
|
||||
if(frag->size == 0) {
|
||||
MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag);
|
||||
|
||||
OBJ_RELEASE(frag->openib_reg);
|
||||
|
||||
|
||||
MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag);
|
||||
|
||||
}
|
||||
else if(frag->size == mca_btl_openib_component.max_send_size){
|
||||
MCA_BTL_IB_FRAG_RETURN_MAX(btl, frag);
|
||||
} else if(frag->size == mca_btl_openib_component.eager_limit){
|
||||
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
|
||||
} else {
|
||||
BTL_ERROR("invalid descriptor");
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -265,13 +269,13 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
|
||||
rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
opal_output(0, "%s:%d:%s error removing memory region from memory pool tree", __FILE__, __LINE__, __func__);
|
||||
BTL_ERROR("error removing memory region from memory pool tree");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(is_leave_pinned) {
|
||||
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)){
|
||||
opal_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
|
||||
BTL_ERROR("error removing item from reg_mru_list");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -290,7 +294,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
opal_output(0,"%s:%d:%s error inserting memory region into memory pool tree", __FILE__, __LINE__, __func__);
|
||||
BTL_ERROR("error inserting memory region into memory pool tree");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -302,7 +306,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
}
|
||||
else if(is_leave_pinned) {
|
||||
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
|
||||
opal_output(0,"%s:%d:%s error removing item from reg_mru_list", __FILE__, __LINE__, __func__);
|
||||
BTL_ERROR("error removing item from reg_mru_list");
|
||||
return NULL;
|
||||
}
|
||||
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
|
||||
@ -399,13 +403,16 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
frag->sg_entry.lkey = openib_reg->mr->lkey;
|
||||
frag->sg_entry.addr = (uintptr_t) iov.iov_base;
|
||||
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->lkey;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->rkey;
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->openib_reg = openib_reg;
|
||||
DEBUG_OUT("frag->sg_entry.lkey = %lu .addr = %llu", frag->sg_entry.lkey, frag->sg_entry.addr);
|
||||
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else if (max_data+reserve <= btl->btl_eager_limit) {
|
||||
@ -416,7 +423,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = frag->segment.seg_addr.pval + reserve;
|
||||
iov.iov_base = frag->segment.seg_addr.lval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
|
||||
*size = max_data;
|
||||
@ -507,8 +514,9 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
if(NULL!= openib_reg){
|
||||
reg_len = (unsigned char*)openib_reg->base_reg.bound - (unsigned char*)frag->segment.seg_addr.pval + 1;
|
||||
bool is_leave_pinned = openib_reg->base_reg.is_leave_pinned;
|
||||
reg_len = (unsigned char*)openib_reg->base_reg.bound - (unsigned char*)frag->segment.seg_addr.pval + 1;
|
||||
|
||||
|
||||
if(frag->segment.seg_len > reg_len ) {
|
||||
size_t new_len = openib_reg->base_reg.bound - openib_reg->base_reg.base + 1
|
||||
@ -560,6 +568,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
}
|
||||
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
|
||||
}
|
||||
OBJ_RETAIN(openib_reg);
|
||||
} else {
|
||||
|
||||
if(mca_btl_openib_component.leave_pinned) {
|
||||
@ -601,7 +610,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
opal_output(0,"%s:%d:%s error inserting memory region into memory pool", __FILE__, __LINE__, __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
OBJ_RETAIN(openib_reg);
|
||||
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
|
||||
|
||||
@ -622,14 +631,15 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
frag->sg_entry.lkey = openib_reg->mr->lkey;
|
||||
frag->sg_entry.addr = (uintptr_t) frag->segment.seg_addr.pval;
|
||||
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->lkey;
|
||||
frag->segment.seg_key.key32[0] = frag->mr->rkey;
|
||||
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->openib_reg = openib_reg;
|
||||
OBJ_RETAIN(openib_reg);
|
||||
DEBUG_OUT("frag->sg_entry.lkey = %lu .addr = %llu frag->segment.seg_key.key32[0] = %lu" , frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0]);
|
||||
|
||||
return &frag->base;
|
||||
|
||||
}
|
||||
@ -712,10 +722,17 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
|
||||
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) descriptor;
|
||||
frag->endpoint = endpoint;
|
||||
frag->sr_desc.opcode = IBV_WR_RDMA_WRITE;
|
||||
frag->sr_desc.wr.rdma.remote_addr = (uintptr_t) frag->base.des_src->seg_addr.pval;
|
||||
frag->sr_desc.send_flags = IBV_SEND_SIGNALED;
|
||||
frag->sr_desc.wr.rdma.remote_addr = (uintptr_t) frag->base.des_dst->seg_addr.pval;
|
||||
frag->sr_desc.wr.rdma.rkey = frag->base.des_dst->seg_key.key32[0];
|
||||
frag->sg_entry.addr = (uintptr_t) frag->base.des_src->seg_addr.pval;
|
||||
frag->sg_entry.length = frag->base.des_src->seg_len;
|
||||
|
||||
DEBUG_OUT("frag->sr_desc.wr.rdma.remote_addr = %llu .rkey = %lu frag->sg_entry.addr = %llu .length = %lu"
|
||||
, frag->sr_desc.wr.rdma.remote_addr
|
||||
, frag->sr_desc.wr.rdma.rkey
|
||||
, frag->sg_entry.addr
|
||||
, frag->sg_entry.length);
|
||||
|
||||
if(ibv_post_send(endpoint->lcl_qp_low,
|
||||
&frag->sr_desc,
|
||||
@ -785,7 +802,7 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl)
|
||||
|
||||
/* Allocate Protection Domain */
|
||||
struct ibv_context *ctx;
|
||||
|
||||
openib_btl->poll_cq = false;
|
||||
|
||||
ctx = openib_btl->ib_dev_context;
|
||||
openib_btl->ib_pd = ibv_alloc_pd(ctx);
|
||||
@ -821,5 +838,7 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl)
|
||||
|
||||
/* TODO: EVAPI_set_qsync_event_handler? */
|
||||
|
||||
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -155,14 +155,10 @@ struct mca_btl_openib_module_t {
|
||||
mca_mpool_base_module_t* ib_pool; /**< ib memory pool */
|
||||
|
||||
|
||||
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
|
||||
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
|
||||
|
||||
|
||||
|
||||
|
||||
/**< an array to allow posting of rr in one swoop */
|
||||
size_t ib_inline_max; /**< max size of inline send*/
|
||||
bool poll_cq;
|
||||
|
||||
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib_endpoint.h"
|
||||
#include "mca/btl/base/base.h"
|
||||
#include "mca/btl/base/btl_base_error.h"
|
||||
|
||||
|
||||
#include "datatype/convertor.h"
|
||||
@ -253,12 +254,15 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
mca_btl_openib_module_t * openib_btl;
|
||||
mca_btl_base_selected_module_t* ib_selected;
|
||||
opal_list_item_t* item;
|
||||
struct dlist *dev_list;
|
||||
|
||||
struct ibv_device* ib_dev;
|
||||
|
||||
|
||||
/* initialization */
|
||||
*num_btl_modules = 0;
|
||||
num_devs = 0;
|
||||
|
||||
struct dlist *dev_list;
|
||||
struct ibv_device* ib_dev;
|
||||
|
||||
/* Determine the number of hca's available on the host */
|
||||
dev_list = ibv_get_devices();
|
||||
@ -268,7 +272,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
num_devs++;
|
||||
|
||||
if(0 == num_devs) {
|
||||
opal_output(0, "No hca's found on this host! \n");
|
||||
BTL_ERROR("No hca's found on this host!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -297,51 +301,18 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
|
||||
for(i = 0; i < num_devs; i++){
|
||||
struct ibv_device_attr ib_dev_attr;
|
||||
struct ibv_context* ib_dev_context;
|
||||
struct ibv_pd *my_pd;
|
||||
struct ibv_mr *mr;
|
||||
void* my_addr;
|
||||
uint32_t my_size;
|
||||
uint32_t my_indx;
|
||||
uint32_t my_mult;
|
||||
my_mult = 4096;
|
||||
|
||||
struct ibv_context* ib_dev_context;
|
||||
|
||||
ib_dev = ib_devs[i];
|
||||
|
||||
ib_dev_context = ibv_open_device(ib_dev);
|
||||
if(!ib_dev_context) {
|
||||
opal_output(0, "%s: error obtaining device context for %s errno says %s\n", __func__, ibv_get_device_name(ib_dev), strerror(errno));
|
||||
BTL_ERROR(" error obtaining device context for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
my_pd = ibv_alloc_pd(ib_dev_context);
|
||||
for(my_indx = 1; my_indx <= 8192; my_indx++){
|
||||
my_size = my_mult * my_indx;
|
||||
my_addr = memalign(4096, my_size);
|
||||
|
||||
memset(my_addr, 0, my_size);
|
||||
mr = ibv_reg_mr(
|
||||
my_pd,
|
||||
my_addr,
|
||||
my_size,
|
||||
IBV_ACCESS_REMOTE_WRITE
|
||||
/* IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE */
|
||||
);
|
||||
|
||||
|
||||
if(NULL == mr){
|
||||
opal_output(0, "%s: error on mr test! can't register %lu bytes, errno says %s \n", __func__, my_size, strerror(errno));
|
||||
break;
|
||||
}
|
||||
else {
|
||||
opal_output(0, "%s: successfully registerted %lu bytes", __func__, my_size);
|
||||
ibv_dereg_mr(mr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
if(ibv_query_device(ib_dev_context, &ib_dev_attr)){
|
||||
opal_output(0, "%s: error obtaining device attributes for %s errno says %s\n", __func__, ibv_get_device_name(ib_dev), strerror(errno));
|
||||
BTL_ERROR("error obtaining device attributes for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -352,8 +323,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
struct ibv_port_attr* ib_port_attr;
|
||||
ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr));
|
||||
if(ibv_query_port(ib_dev_context, (uint8_t) j, ib_port_attr)){
|
||||
opal_output(0, "%s: error getting port attributes for device %s port number %d errno says %s",
|
||||
__func__, ibv_get_device_name(ib_dev), j, strerror(errno));
|
||||
BTL_ERROR("error getting port attributes for device %s port number %d errno says %s",
|
||||
ibv_get_device_name(ib_dev), j, strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -438,7 +409,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
&mpool_resources);
|
||||
|
||||
if(NULL == openib_btl->ib_pool) {
|
||||
opal_output(0, "%s: error creating vapi memory pool! aborting ib btl initialization", __func__);
|
||||
BTL_ERROR("error creating vapi memory pool! aborting ib btl initialization");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -531,68 +502,80 @@ int mca_btl_openib_component_progress()
|
||||
uint32_t i, ne;
|
||||
int count = 0;
|
||||
mca_btl_openib_frag_t* frag;
|
||||
mca_btl_openib_endpoint_t* endpoint;
|
||||
/* Poll for completions */
|
||||
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
||||
|
||||
struct ibv_wc wc;
|
||||
memset(&wc, 0, sizeof(struct ibv_wc));
|
||||
|
||||
mca_btl_openib_module_t* openib_btl = &mca_btl_openib_component.openib_btls[i];
|
||||
|
||||
|
||||
/* we have two completion queues, one for "high" priority and one for "low".
|
||||
* we will check the high priority and process them until there are none left.
|
||||
* note that low priority messages are only processed one per progress call.
|
||||
*/
|
||||
do{
|
||||
ne=ibv_poll_cq(openib_btl->ib_cq_high, 1, &wc );
|
||||
if(ne < 0 ){
|
||||
opal_output(0, "%s: error polling CQ with %d errno says %s\n", __func__, ne, strerror(errno));
|
||||
BTL_ERROR("error polling CQ with %d errno says %s\n", ne, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
else if(wc.status != IBV_WC_SUCCESS) {
|
||||
opal_output(0, "%s: error polling CQ with status %d for wr_id %d\n",
|
||||
__func__,
|
||||
wc.status, wc.wr_id);
|
||||
BTL_ERROR("error polling CQ with status %d for wr_id %d\n",
|
||||
wc.status, wc.wr_id);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
else if(1 == ne) {
|
||||
/* Handle n/w completions */
|
||||
else if(1 == ne) {
|
||||
DEBUG_OUT("completion queue event says opcode is %d\n", wc.opcode);
|
||||
|
||||
/* Handle work completions */
|
||||
switch(wc.opcode) {
|
||||
case IBV_WC_RECV_RDMA_WITH_IMM:
|
||||
opal_output(0, "Got an RDMA with Immediate data Not supported!\n");
|
||||
BTL_ERROR("Got an RDMA with Immediate data Not supported!");
|
||||
return OMPI_ERROR;
|
||||
|
||||
case IBV_WC_RECV:
|
||||
/* Process a RECV */
|
||||
|
||||
DEBUG_OUT("Got an recv on the completion queue");
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
||||
frag->rc=OMPI_SUCCESS;
|
||||
frag->segment.seg_len =
|
||||
wc.byte_len-
|
||||
((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
|
||||
|
||||
OPAL_THREAD_ADD32(&endpoint->rr_posted_high, -1);
|
||||
|
||||
mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)wc.wr_id)->endpoint, 0);
|
||||
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super,
|
||||
frag->hdr->tag,
|
||||
&frag->base,
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager), (opal_list_item_t*) frag);
|
||||
count++;
|
||||
break;
|
||||
|
||||
case IBV_WC_RDMA_WRITE:
|
||||
case IBV_WC_SEND :
|
||||
if(wc.opcode & IBV_WC_RECV){
|
||||
/* process a recv completion (this should only occur for a send not an rdma) */
|
||||
DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
frag->rc=OMPI_SUCCESS;
|
||||
frag->segment.seg_len =
|
||||
wc.byte_len-
|
||||
((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super,
|
||||
frag->hdr->tag,
|
||||
&frag->base,
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager), (opal_list_item_t*) frag);
|
||||
OPAL_THREAD_ADD32(&openib_btl->rr_posted_high, -1);
|
||||
|
||||
mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)wc.wr_id)->endpoint, 0);
|
||||
|
||||
count++;
|
||||
}
|
||||
else {
|
||||
/* Process a completed send */
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
frag->rc = OMPI_SUCCESS;
|
||||
frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||
count++;
|
||||
|
||||
}
|
||||
|
||||
/* Process a completed send or rdma write*/
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
frag->rc = OMPI_SUCCESS;
|
||||
frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||
count++;
|
||||
break;
|
||||
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
opal_output(0, "Errorneous network completion");
|
||||
BTL_ERROR("Unhandled work completion opcode is %d", wc.opcode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -601,12 +584,11 @@ int mca_btl_openib_component_progress()
|
||||
|
||||
ne=ibv_poll_cq(openib_btl->ib_cq_low, 1, &wc );
|
||||
if(ne < 0){
|
||||
opal_output(0, "%s: error polling CQ with %d errno says %s\n", __func__, ne, strerror(errno));
|
||||
BTL_ERROR("error polling CQ with %d errno says %s", ne, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
else if(wc.status != IBV_WC_SUCCESS) {
|
||||
opal_output(0, "%s: error polling CQ with status %d for wr_id %d\n",
|
||||
__func__,
|
||||
BTL_ERROR("error polling CQ with status %d for wr_id %d",
|
||||
wc.status, wc.wr_id);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -614,46 +596,46 @@ int mca_btl_openib_component_progress()
|
||||
/* Handle n/w completions */
|
||||
switch(wc.opcode) {
|
||||
case IBV_WC_RECV_RDMA_WITH_IMM:
|
||||
opal_output(0, "Got an RDMA with Immediate data Not supported!\n");
|
||||
BTL_ERROR("Got an RDMA with Immediate data Not supported!");
|
||||
return OMPI_ERROR;
|
||||
|
||||
case IBV_WC_RECV:
|
||||
/* process a recv completion (this should only occur for a send not an rdma) */
|
||||
DEBUG_OUT( "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
||||
frag->rc=OMPI_SUCCESS;
|
||||
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
frag->segment.seg_len =
|
||||
wc.byte_len-
|
||||
((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
|
||||
OPAL_THREAD_ADD32(&endpoint->rr_posted_low, -1);
|
||||
|
||||
mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)wc.wr_id)->endpoint, 0);
|
||||
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super,
|
||||
frag->hdr->tag,
|
||||
&frag->base,
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_max), (opal_list_item_t*) frag);
|
||||
|
||||
count++;
|
||||
break;
|
||||
|
||||
case IBV_WC_RDMA_WRITE:
|
||||
case IBV_WC_SEND :
|
||||
if(wc.opcode & IBV_WC_RECV){
|
||||
/* process a recv completion (this should only occur for a send not an rdma) */
|
||||
DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
frag->rc=OMPI_SUCCESS;
|
||||
frag->segment.seg_len =
|
||||
wc.byte_len-
|
||||
((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super,
|
||||
frag->hdr->tag,
|
||||
&frag->base,
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager), (opal_list_item_t*) frag);
|
||||
OPAL_THREAD_ADD32(&openib_btl->rr_posted_high, -1);
|
||||
|
||||
mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)wc.wr_id)->endpoint, 0);
|
||||
|
||||
count++;
|
||||
}
|
||||
else {
|
||||
/* Process a completed send */
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
frag->rc = OMPI_SUCCESS;
|
||||
frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||
count++;
|
||||
|
||||
}
|
||||
|
||||
/* Process a completed send */
|
||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||
frag->rc = OMPI_SUCCESS;
|
||||
frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||
count++;
|
||||
break;
|
||||
|
||||
default:
|
||||
opal_output(0, "Errorneous network completion");
|
||||
BTL_ERROR("Unhandled work completion opcode is %d", wc.opcode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -74,14 +74,17 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
|
||||
}
|
||||
|
||||
frag->sr_desc.opcode = IBV_WR_SEND;
|
||||
frag->sr_desc.send_flags = IBV_SEND_SIGNALED;
|
||||
|
||||
frag->sg_entry.length =
|
||||
frag->segment.seg_len +
|
||||
((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
|
||||
frag->sg_entry.length = frag->segment.seg_len + ((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); /* sizeof(mca_btl_openib_header_t); */
|
||||
|
||||
if(frag->sg_entry.length <= openib_btl->ib_inline_max) {
|
||||
/* frag->sr_desc.send_flags |= IBV_SEND_INLINE; */
|
||||
}
|
||||
|
||||
/* TODO: should check if we can inline send,, but can't find
|
||||
* inline send defined in openib verbs api.
|
||||
* if(frag->sg_entry.len <= openib_btl->ib_inline_max) {
|
||||
*/
|
||||
|
||||
if(ibv_post_send(ib_qp,
|
||||
&frag->sr_desc,
|
||||
&bad_wr)) {
|
||||
@ -90,7 +93,7 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
|
||||
}
|
||||
mca_btl_openib_endpoint_post_rr(endpoint, 1);
|
||||
|
||||
return OMPI_ERROR;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -114,6 +117,14 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
||||
OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&endpoint->pending_send_frags, opal_list_t);
|
||||
endpoint->lcl_qp_attr_high = (struct ibv_qp_attr *) malloc(sizeof(struct ibv_qp_attr));
|
||||
endpoint->lcl_qp_attr_low = (struct ibv_qp_attr *) malloc(sizeof(struct ibv_qp_attr));
|
||||
memset(endpoint->lcl_qp_attr_high, 0, sizeof(struct ibv_qp_attr));
|
||||
memset(endpoint->lcl_qp_attr_low, 0, sizeof(struct ibv_qp_attr));
|
||||
endpoint->rr_posted_high = 0;
|
||||
endpoint->rr_posted_low = 0;
|
||||
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@ -190,9 +201,9 @@ static int mca_btl_openib_endpoint_send_connect_req(mca_btl_base_endpoint_t* end
|
||||
|
||||
|
||||
DEBUG_OUT("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||
endpoint->lcl_qp_prop_high.qp_num,
|
||||
endpoint->lcl_qp_prop_low.qp_num,
|
||||
endpoint->endpoint_btl->port.lid);
|
||||
endpoint->lcl_qp_high->qp_num,
|
||||
endpoint->lcl_qp_low->qp_num,
|
||||
endpoint->endpoint_btl->ib_port_attr->lid);
|
||||
|
||||
if(rc < 0) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -318,6 +329,7 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
||||
return rc;
|
||||
}
|
||||
srand48(getpid() * time(NULL));
|
||||
endpoint->lcl_psn_high = lrand48() & 0xffffff;
|
||||
|
||||
/* Create the Low Priority Queue Pair */
|
||||
@ -334,7 +346,7 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
|
||||
|
||||
DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||
endpoint->lcl_qp_high->qp_num,
|
||||
endpoint->lcl_qp_low.qp_num,
|
||||
endpoint->lcl_qp_low->qp_num,
|
||||
openib_btl->ib_port_attr->lid);
|
||||
|
||||
/* Send connection info over to remote endpoint */
|
||||
@ -367,6 +379,7 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
|
||||
return rc;
|
||||
}
|
||||
srand48(getpid() * time(NULL));
|
||||
endpoint->lcl_psn_high = lrand48() & 0xffffff;
|
||||
|
||||
/* Create the Low Priority Queue Pair */
|
||||
@ -383,7 +396,7 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
|
||||
|
||||
DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||
endpoint->lcl_qp_high->qp_num,
|
||||
endpoint->lcl_qp_low.qp_num,
|
||||
endpoint->lcl_qp_low->qp_num,
|
||||
openib_btl->ib_port_attr->lid);
|
||||
|
||||
|
||||
@ -415,6 +428,7 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
|
||||
static void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
|
||||
{
|
||||
endpoint->endpoint_state = MCA_BTL_IB_CONNECTED;
|
||||
endpoint->endpoint_btl->poll_cq = true;
|
||||
mca_btl_openib_progress_send_frags(endpoint);
|
||||
}
|
||||
|
||||
@ -491,12 +505,13 @@ static void mca_btl_openib_endpoint_recv(
|
||||
break;
|
||||
|
||||
case MCA_BTL_IB_CONNECT_ACK:
|
||||
DEBUG_OUT("Got a connect ack from %d\n", endpoint->vpid);
|
||||
|
||||
mca_btl_openib_endpoint_connected(ib_endpoint);
|
||||
|
||||
break;
|
||||
|
||||
case MCA_BTL_IB_CONNECTED :
|
||||
|
||||
break;
|
||||
default :
|
||||
opal_output(0, "Connected -> Connecting not possible.\n");
|
||||
@ -581,9 +596,9 @@ int mca_btl_openib_endpoint_send(
|
||||
|
||||
|
||||
DEBUG_OUT("Send to : %d, len : %d, frag : %p",
|
||||
endpoint->endpoint_proc->proc_guid.vpid,
|
||||
frag->ib_buf.desc.sg_entry.len,
|
||||
frag);
|
||||
endpoint->endpoint_proc->proc_guid.vpid,
|
||||
frag->sg_entry.length,
|
||||
frag);
|
||||
|
||||
rc = mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag);
|
||||
|
||||
@ -686,23 +701,27 @@ int mca_btl_openib_endpoint_create_qp(
|
||||
)
|
||||
{
|
||||
{
|
||||
struct ibv_qp* my_qp;
|
||||
struct ibv_qp_init_attr qp_init_attr;
|
||||
|
||||
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
|
||||
|
||||
qp_init_attr.send_cq = cq;
|
||||
qp_init_attr.recv_cq = cq;
|
||||
qp_init_attr.cap.max_send_wr = mca_btl_openib_component.ib_wq_size;
|
||||
qp_init_attr.cap.max_send_wr = mca_btl_openib_component.ib_wq_size;
|
||||
qp_init_attr.cap.max_recv_wr = mca_btl_openib_component.ib_wq_size;
|
||||
qp_init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size;
|
||||
qp_init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size;
|
||||
qp_init_attr.cap.max_recv_sge = mca_btl_openib_component.ib_sg_list_size;
|
||||
qp_init_attr.qp_type = IBV_QPT_RC;
|
||||
|
||||
|
||||
(*qp) = ibv_create_qp(pd, &qp_init_attr);
|
||||
my_qp = ibv_create_qp(pd, &qp_init_attr);
|
||||
|
||||
if(NULL == (*qp)) {
|
||||
if(NULL == my_qp) {
|
||||
opal_output(0, "%s: error creating qp errno says %s\n", __func__, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
(*qp) = my_qp;
|
||||
openib_btl->ib_inline_max = qp_init_attr.cap.max_inline_data;
|
||||
|
||||
}
|
||||
@ -711,7 +730,7 @@ int mca_btl_openib_endpoint_create_qp(
|
||||
qp_attr->qp_state = IBV_QPS_INIT;
|
||||
qp_attr->pkey_index = mca_btl_openib_component.ib_pkey_ix;
|
||||
qp_attr->port_num = openib_btl->port_num;
|
||||
qp_attr->qp_access_flags = 0;
|
||||
qp_attr->qp_access_flags = IBV_ACCESS_REMOTE_WRITE;
|
||||
|
||||
if(ibv_modify_qp((*qp), qp_attr,
|
||||
IBV_QP_STATE |
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
@ -90,7 +91,7 @@ struct mca_btl_base_endpoint_t {
|
||||
/**< lock for concurrent access to endpoint state */
|
||||
|
||||
opal_list_t pending_send_frags;
|
||||
/**< list of pending send frags for this endpoint */
|
||||
/**< list of pending send frags for this endpotint */
|
||||
|
||||
uint32_t rem_qp_num_high;
|
||||
uint32_t rem_qp_num_low;
|
||||
@ -115,7 +116,11 @@ struct mca_btl_base_endpoint_t {
|
||||
struct ibv_qp_attr* lcl_qp_attr_high;
|
||||
struct ibv_qp_attr* lcl_qp_attr_low;
|
||||
/* Local QP attributes (Low and High) */
|
||||
|
||||
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
|
||||
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
|
||||
|
||||
|
||||
};
|
||||
|
||||
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||
@ -160,16 +165,15 @@ static inline int mca_btl_openib_endpoint_post_rr_sub(int cnt,
|
||||
}
|
||||
|
||||
for(i=0; i< cnt; i++){
|
||||
|
||||
if(ibv_post_recv(qp,
|
||||
&rr_desc_post[i],
|
||||
&bad_wr)) {
|
||||
opal_output(0, "%s: error posting receive errno says %s\n", __func__, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
OPAL_THREAD_ADD32(rr_posted, cnt);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -179,12 +183,12 @@ static inline int mca_btl_openib_endpoint_post_rr( mca_btl_openib_endpoint_t * e
|
||||
int rc;
|
||||
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
||||
|
||||
if(openib_btl->rr_posted_high <= mca_btl_openib_component.ib_rr_buf_min+additional && openib_btl->rr_posted_high < mca_btl_openib_component.ib_rr_buf_max){
|
||||
if(endpoint->rr_posted_high <= mca_btl_openib_component.ib_rr_buf_min+additional && endpoint->rr_posted_high < mca_btl_openib_component.ib_rr_buf_max){
|
||||
|
||||
rc = mca_btl_openib_endpoint_post_rr_sub(mca_btl_openib_component.ib_rr_buf_max - openib_btl->rr_posted_high,
|
||||
rc = mca_btl_openib_endpoint_post_rr_sub(mca_btl_openib_component.ib_rr_buf_max - endpoint->rr_posted_high,
|
||||
endpoint,
|
||||
&openib_btl->recv_free_eager,
|
||||
&openib_btl->rr_posted_high,
|
||||
&endpoint->rr_posted_high,
|
||||
endpoint->lcl_qp_high
|
||||
);
|
||||
if(rc != OMPI_SUCCESS){
|
||||
@ -192,12 +196,12 @@ static inline int mca_btl_openib_endpoint_post_rr( mca_btl_openib_endpoint_t * e
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
if(openib_btl->rr_posted_low <= mca_btl_openib_component.ib_rr_buf_min+additional && openib_btl->rr_posted_low < mca_btl_openib_component.ib_rr_buf_max){
|
||||
if(endpoint->rr_posted_low <= mca_btl_openib_component.ib_rr_buf_min+additional && endpoint->rr_posted_low < mca_btl_openib_component.ib_rr_buf_max){
|
||||
|
||||
rc = mca_btl_openib_endpoint_post_rr_sub(mca_btl_openib_component.ib_rr_buf_max - openib_btl->rr_posted_low,
|
||||
rc = mca_btl_openib_endpoint_post_rr_sub(mca_btl_openib_component.ib_rr_buf_max - endpoint->rr_posted_low,
|
||||
endpoint,
|
||||
&openib_btl->recv_free_max,
|
||||
&openib_btl->rr_posted_low,
|
||||
&endpoint->rr_posted_low,
|
||||
endpoint->lcl_qp_low
|
||||
);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
|
@ -45,11 +45,12 @@ static void mca_btl_openib_send_frag_common_constructor(mca_btl_openib_frag_t* f
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
|
||||
frag->sr_desc.wr_id = frag;
|
||||
frag->sr_desc.wr_id = (uint64_t) frag;
|
||||
frag->sr_desc.sg_list = &frag->sg_entry;
|
||||
frag->sr_desc.num_sge = 1;
|
||||
frag->sr_desc.opcode = IBV_WR_SEND;
|
||||
frag->sr_desc.send_flags = IBV_SEND_SIGNALED;
|
||||
frag->sr_desc.next = NULL;
|
||||
}
|
||||
|
||||
static void mca_btl_openib_recv_frag_common_constructor(mca_btl_openib_frag_t* frag)
|
||||
@ -60,10 +61,11 @@ static void mca_btl_openib_recv_frag_common_constructor(mca_btl_openib_frag_t* f
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
|
||||
frag->rr_desc.wr_id = frag;
|
||||
|
||||
frag->rr_desc.wr_id = (uint64_t) frag;
|
||||
frag->rr_desc.sg_list = &frag->sg_entry;
|
||||
frag->rr_desc.num_sge = 1;
|
||||
frag->rr_desc.next = NULL;
|
||||
}
|
||||
|
||||
static void mca_btl_openib_send_frag_eager_constructor(mca_btl_openib_frag_t* frag)
|
||||
|
@ -30,11 +30,11 @@ extern "C" {
|
||||
|
||||
|
||||
static inline void * DOWN_ALIGN_ADDR(void * addr, uint32_t cnt) {
|
||||
return (void*)((uintptr_t)(addr) & (~((uintptr_t)0) << (cnt)));
|
||||
return (void*)((uintptr_t)((unsigned char*) addr) & (~((uintptr_t)0) << (cnt)));
|
||||
}
|
||||
|
||||
static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) {
|
||||
DOWN_ALIGN_ADDR(((addr) + ~(~((uintptr_t)0) << (cnt))), (cnt));
|
||||
DOWN_ALIGN_ADDR((((unsigned char*) addr) + ~(~((uintptr_t)0) << (cnt))), (cnt));
|
||||
return addr;
|
||||
}
|
||||
|
||||
|
@ -70,13 +70,6 @@ static void mca_mpool_openib_registration_constructor( mca_mpool_openib_registra
|
||||
|
||||
static void mca_mpool_openib_registration_destructor( mca_mpool_openib_registration_t * registration )
|
||||
{
|
||||
mca_mpool_base_remove((void*) registration);
|
||||
registration->base_reg.mpool->mpool_deregister(
|
||||
registration->base_reg.mpool,
|
||||
registration->base_reg.base,
|
||||
0,
|
||||
(mca_mpool_base_registration_t*) registration);
|
||||
|
||||
registration->base_reg.base = NULL;
|
||||
registration->base_reg.bound = NULL;
|
||||
registration->base_reg.is_leave_pinned=false;
|
||||
|
@ -57,6 +57,7 @@ void* mca_mpool_openib_alloc(
|
||||
free(addr_malloc);
|
||||
return NULL;
|
||||
}
|
||||
(*registration)->alloc_base = addr_malloc;
|
||||
return addr;
|
||||
}
|
||||
|
||||
@ -80,7 +81,7 @@ int mca_mpool_openib_register(mca_mpool_base_module_t* mpool,
|
||||
mpool_module->resources.ib_pd,
|
||||
addr,
|
||||
size,
|
||||
IBV_ACCESS_REMOTE_WRITE
|
||||
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE
|
||||
/* IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE */
|
||||
);
|
||||
|
||||
@ -106,7 +107,7 @@ int mca_mpool_openib_deregister(mca_mpool_base_module_t* mpool, void *addr, size
|
||||
mca_mpool_openib_module_t * mpool_openib = (mca_mpool_openib_module_t*) mpool;
|
||||
mca_mpool_openib_registration_t * openib_reg;
|
||||
openib_reg = (mca_mpool_openib_registration_t*) registration;
|
||||
if(! ibv_dereg_mr(openib_reg->mr)){
|
||||
if(ibv_dereg_mr(openib_reg->mr)){
|
||||
opal_output(0, "%s: error unpinning openib memory errno says %s\n", __func__, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user