1
1

Modified ib bmi to use inline send

Post receive requests during send 

This commit was SVN r5973.
Этот коммит содержится в:
Galen Shipman 2005-06-07 20:19:44 +00:00
родитель cfb1e2d608
Коммит 66cc6df56d
16 изменённых файлов: 776 добавлений и 525 удалений

Просмотреть файл

@ -52,10 +52,10 @@ int mca_bmi_base_close(void)
/* Close all remaining opened components (may be one if this is a
OMPI RTE program, or [possibly] multiple if this is ompi_info) */
if (0 != ompi_list_get_size(&mca_bmi_base_components_opened)) {
mca_base_components_close(mca_bmi_base_output,
&mca_bmi_base_components_opened, NULL);
mca_base_components_close(mca_bmi_base_output,
&mca_bmi_base_components_opened, NULL);
}
/* cleanup */

Просмотреть файл

@ -27,7 +27,6 @@ libmca_bmi_ib_la_SOURCES = \
bmi_ib_endpoint.h \
bmi_ib_frag.c \
bmi_ib_frag.h \
bmi_ib_memory.c \
bmi_ib_priv.c \
bmi_ib_priv.h \
bmi_ib_proc.c \

Просмотреть файл

@ -25,6 +25,11 @@
#include "bmi_ib.h"
#include "bmi_ib_frag.h"
#include "bmi_ib_addr.h"
#include "bmi_ib_proc.h"
#include "bmi_ib_endpoint.h"
#include "bmi_ib_priv.h"
mca_bmi_ib_module_t mca_bmi_ib_module = {
{
@ -215,7 +220,7 @@ int mca_bmi_ib_send(
{
mca_bmi_ib_frag_t* frag = (mca_bmi_ib_frag_t*)descriptor;
frag->endpoint = endpoint;
frag->hdr->tag = tag;
frag->type = MCA_BMI_IB_FRAG_SEND;

Просмотреть файл

@ -34,15 +34,11 @@
#include "mca/bmi/bmi.h"
#include "util/output.h"
#include "mca/mpool/mpool.h"
#include "bmi_ib_vapi.h"
/* InfiniBand VAPI includes */
#include "mca/bmi/bmi.h"
#include "bmi_ib_vapi.h"
#include "bmi_ib_addr.h"
#include "bmi_ib_proc.h"
#include "bmi_ib_endpoint.h"
#include "bmi_ib_priv.h"
#include "bmi_ib_frag.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -87,8 +83,12 @@ struct mca_bmi_ib_component_t {
char* ib_mpool_name;
/**< name of ib memory pool */
int ib_rr_buf_max;
/**< the maximum number of posted rr */
int ib_rr_buf_min;
/**< the minimum number of posted rr */
}; typedef struct mca_bmi_ib_component_t mca_bmi_ib_component_t;
@ -114,13 +114,16 @@ struct mca_bmi_ib_module_t {
EVAPI_async_handler_hndl_t async_handler;
/**< Async event handler used to detect weird/unknown events */
mca_bmi_ib_mem_registry_t mem_registry; /**< registry of memory regions */
ompi_free_list_t send_free; /**< free list of buffer descriptors */
ompi_free_list_t recv_free; /**< free list of buffer descriptors */
ompi_list_t repost; /**< list of buffers to repost */
mca_mpool_base_module_t* ib_pool;
/**< ib memory pool */
mca_mpool_base_module_t* ib_pool; /**< ib memory pool */
uint32_t rr_posted; /**< number of rr posted to the nic*/
VAPI_rr_desc_t* rr_desc_post;
/**< an array to allow posting of rr in one swoop */
size_t ib_inline_max; /**< max size of inline send*/
}; typedef struct mca_bmi_ib_module_t mca_bmi_ib_module_t;
extern mca_bmi_ib_module_t mca_bmi_ib_module;

Просмотреть файл

@ -33,6 +33,7 @@
#include "bmi_ib_frag.h"
#include "bmi_ib_sendfrag.h"
#include "bmi_ib_recvfrag.h"
#include "bmi_ib_endpoint.h"
mca_bmi_ib_component_t mca_bmi_ib_component = {
{
@ -119,7 +120,8 @@ int mca_bmi_ib_component_open(void)
mca_bmi_ib_param_register_int ("hints_log_size", 8);
mca_bmi_ib_component.ib_mpool_name =
mca_bmi_ib_param_register_string("mpool", "ib");
mca_bmi_ib_component.ib_rr_buf_max =
mca_bmi_ib_param_register_int("rr_buf_max", 16);
mca_bmi_ib_module.super.bmi_exclusivity =
mca_bmi_ib_param_register_int ("exclusivity", 0);
@ -133,7 +135,9 @@ int mca_bmi_ib_component_open(void)
- sizeof(mca_bmi_ib_header_t)));
mca_bmi_ib_module.super.bmi_max_frag_size =
mca_bmi_ib_param_register_int ("max_frag_size", 2<<30);
return OMPI_SUCCESS;
}
@ -255,6 +259,9 @@ mca_bmi_base_module_t** mca_bmi_ib_component_init(int *num_bmi_modules,
mca_bmi_ib_component.ib_free_list_max,
mca_bmi_ib_component.ib_free_list_inc, ib_bmi->ib_pool);
/* Initialize the rr_desc_post array for posting of rr*/
ib_bmi->rr_desc_post = (VAPI_rr_desc_t*) malloc((mca_bmi_ib_component.ib_rr_buf_max * sizeof(VAPI_rr_desc_t)));
/* This is now done by the memory pool passed to free_list_init.. Initialize the send descriptors */
/* if(mca_bmi_ib_send_frag_register(ib_bmi) != OMPI_SUCCESS) { */
/* free(hca_ids); */
@ -374,28 +381,23 @@ int mca_bmi_ib_component_progress()
case IB_COMP_RECV :
ompi_output(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
D_PRINT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__);
frag = (mca_bmi_ib_frag_t*) comp_addr;
frag->segment.seg_len = comp.byte_len-sizeof(mca_bmi_ib_header_t);
/* advance the segment address past the header and subtract from the length..*/
ib_bmi->ib_reg[frag->hdr->tag].cbfunc(&ib_bmi->super, frag->hdr->tag, &frag->base, ib_bmi->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&ib_bmi->recv_free, (ompi_free_list_item_t*)comp_addr);
/* Process incoming receives */
/* mca_bmi_ib_process_recv(ib_bmi, comp_addr); */
/* /\* Re post recv buffers *\/ */
/* if(ompi_list_get_size(&ib_bmi->repost) <= 1) { */
/* ompi_list_append(&ib_bmi->repost, (ompi_list_item_t*)comp_addr); */
/* } else { */
/* ompi_list_item_t* item; */
/* while(NULL != (item = ompi_list_remove_first(&ib_bmi->repost))) { */
/* mca_bmi_ib_buffer_repost(ib_bmi->nic, item); */
/* } */
/* mca_bmi_ib_buffer_repost(ib_bmi->nic, comp_addr); */
/* } */
/* count++; */
if(OMPI_THREAD_ADD32(&ib_bmi->rr_posted, -1) <= mca_bmi_ib_component.ib_rr_buf_min)
mca_bmi_ib_endpoint_post_rr(mca_bmi_ib_component.ib_rr_buf_max - ib_bmi->rr_posted,
((mca_bmi_ib_recv_frag_t*)comp_addr)->endpoint);
count++;
break;
case IB_COMP_RDMA_W :
ompi_output(0, "%s:%d RDMA not implemented\n", __FILE__,__LINE__);

567
src/mca/bmi/ib/bmi_ib_endpoint.c Обычный файл
Просмотреть файл

@ -0,0 +1,567 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "include/types.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "mca/ns/base/base.h"
#include "mca/oob/base/base.h"
#include "mca/rml/rml.h"
#include "mca/errmgr/errmgr.h"
#include "dps/dps.h"
#include "bmi_ib.h"
#include "bmi_ib_endpoint.h"
#include "bmi_ib_addr.h"
#include "bmi_ib_proc.h"
#include "bmi_ib_priv.h"
#include "bmi_ib_frag.h"
#include "class/ompi_free_list.h"
static void mca_bmi_ib_endpoint_construct(mca_bmi_base_endpoint_t* endpoint);
static void mca_bmi_ib_endpoint_destruct(mca_bmi_base_endpoint_t* endpoint);
static inline int mca_bmi_ib_endpoint_post_send(mca_bmi_ib_module_t* ib_bmi, mca_bmi_ib_endpoint_t * endpoint, mca_bmi_ib_send_frag_t * frag)
{
frag->sr_desc.remote_qkey = 0;
frag->sr_desc.remote_qp = endpoint->rem_qp_num;
frag->sg_entry.len = frag->segment.seg_len + sizeof(mca_bmi_ib_header_t);
if(frag->sg_entry.len <= ib_bmi->ib_inline_max) {
frag->ret = EVAPI_post_inline_sr(ib_bmi->nic,
endpoint->lcl_qp_hndl,
&frag->sr_desc);
}else {
frag->ret = VAPI_post_sr(ib_bmi->nic,
endpoint->lcl_qp_hndl,
&frag->sr_desc);
}
if(VAPI_OK != frag->ret)
return OMPI_ERROR;
return OMPI_SUCCESS;
}
OBJ_CLASS_INSTANCE(mca_bmi_ib_endpoint_t,
ompi_list_item_t, mca_bmi_ib_endpoint_construct,
mca_bmi_ib_endpoint_destruct);
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_bmi_ib_endpoint_construct(mca_bmi_base_endpoint_t* endpoint)
{
endpoint->endpoint_bmi = 0;
endpoint->endpoint_proc = 0;
endpoint->endpoint_tstamp = 0.0;
endpoint->endpoint_state = MCA_BMI_IB_CLOSED;
endpoint->endpoint_retries = 0;
OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&endpoint->pending_send_frags, ompi_list_t);
}
/*
* Destroy a endpoint
*
*/
static void mca_bmi_ib_endpoint_destruct(mca_bmi_base_endpoint_t* endpoint)
{
}
/*
* Send connection information to remote endpoint using OOB
*
*/
static void mca_bmi_ib_endpoint_send_cb(
int status,
orte_process_name_t* endpoint,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
OBJ_RELEASE(buffer);
}
static int mca_bmi_ib_endpoint_send_connect_req(mca_bmi_base_endpoint_t* endpoint)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
int rc;
if(NULL == buffer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* pack the info in the send buffer */
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop.qp_num, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.pack(buffer, &endpoint->endpoint_bmi->port.lid, 1, ORTE_UINT32);
/* send to endpoint */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
mca_bmi_ib_endpoint_send_cb, NULL);
D_PRINT("Sending QP num = %d, LID = %d",
endpoint->lcl_qp_prop.qp_num,
endpoint->endpoint_bmi->port.lid);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
* Send connect ACK to remote endpoint
*
*/
static int mca_bmi_ib_endpoint_send_connect_ack(mca_bmi_base_endpoint_t* endpoint)
{
orte_buffer_t* buffer = OBJ_NEW(orte_buffer_t);
int rc;
uint32_t zero = 0;
/* pack the info in the send buffer */
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(ORTE_SUCCESS != (rc = orte_dps.pack(buffer, &zero, 1, ORTE_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send to endpoint */
rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, buffer, ORTE_RML_TAG_DYNAMIC-1, 0,
mca_bmi_ib_endpoint_send_cb, NULL);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/*
* Set remote connection info
*
* XXX: Currently size is unutilized, this shall change
* as soon as we add more info to be exchanged at connection
* setup.
*
*/
static int mca_bmi_ib_endpoint_set_remote_info(mca_bmi_base_endpoint_t* endpoint, orte_buffer_t* buffer)
{
int rc;
size_t cnt = 1;
rc = orte_dps.unpack(buffer, &endpoint->rem_qp_num, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dps.unpack(buffer, &endpoint->rem_lid, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
D_PRINT("Received QP num = %d, LID = %d",
endpoint->rem_qp_num,
endpoint->rem_lid);
return ORTE_SUCCESS;
}
static int mca_bmi_ib_endpoint_init(mca_bmi_ib_endpoint_t *endpoint)
{
return OMPI_SUCCESS;
}
/*
* Start to connect to the endpoint. We send our Queue Pair
* information over the TCP OOB communication mechanism.
* On completion of our send, a send completion handler
* is called.
*
*/
static int mca_bmi_ib_endpoint_start_connect(mca_bmi_base_endpoint_t* endpoint)
{
mca_bmi_ib_module_t* ib_bmi = endpoint->endpoint_bmi;
int rc;
/* Create the Queue Pair */
if(OMPI_SUCCESS != (rc = mca_bmi_ib_create_qp(ib_bmi->nic,
ib_bmi->ptag,
ib_bmi->cq_hndl,
ib_bmi->cq_hndl,
&endpoint->lcl_qp_hndl,
&endpoint->lcl_qp_prop,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
D_PRINT("Initialized QP num = %d, LID = %d",
endpoint->lcl_qp_prop.qp_num,
ib_bmi->port.lid);
/* Send connection info over to remote endpoint */
endpoint->endpoint_state = MCA_BMI_IB_CONNECTING;
if(OMPI_SUCCESS != (rc = mca_bmi_ib_endpoint_send_connect_req(endpoint))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
* Reply to a `start - connect' message
*
*/
static int mca_bmi_ib_endpoint_reply_start_connect(mca_bmi_ib_endpoint_t *endpoint, orte_buffer_t* buffer)
{
mca_bmi_ib_module_t* ib_bmi = endpoint->endpoint_bmi;
int rc;
/* Create the Queue Pair */
if(OMPI_SUCCESS != (rc = mca_bmi_ib_create_qp(ib_bmi->nic,
ib_bmi->ptag,
ib_bmi->cq_hndl,
ib_bmi->cq_hndl,
&endpoint->lcl_qp_hndl,
&endpoint->lcl_qp_prop,
VAPI_TS_RC))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
D_PRINT("Initialized QP num = %d, LID = %d",
endpoint->lcl_qp_prop.qp_num,
ib_bmi->port.lid);
/* Set the remote side info */
mca_bmi_ib_endpoint_set_remote_info(endpoint, buffer);
/* Connect to endpoint */
rc = mca_bmi_ib_endpoint_connect(endpoint);
if(rc != OMPI_SUCCESS) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
/* Send connection info over to remote endpoint */
if(OMPI_SUCCESS != (rc = mca_bmi_ib_endpoint_send_connect_req(endpoint))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
return rc;
}
return OMPI_SUCCESS;
}
/*
*
*/
static void mca_bmi_ib_endpoint_connected(mca_bmi_ib_endpoint_t *endpoint)
{
endpoint->endpoint_state = MCA_BMI_IB_CONNECTED;
mca_bmi_ib_progress_send_frags(endpoint);
}
/*
* Non blocking OOB recv callback.
* Read incoming QP and other info, and if this endpoint
* is trying to connect, reply with our QP info,
* otherwise try to modify QP's and establish
* reliable connection
*
*/
static void mca_bmi_ib_endpoint_recv(
int status,
orte_process_name_t* endpoint,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
mca_bmi_ib_proc_t *ib_proc;
mca_bmi_ib_endpoint_t *ib_endpoint;
int endpoint_state;
int rc;
for(ib_proc = (mca_bmi_ib_proc_t*)
ompi_list_get_first(&mca_bmi_ib_component.ib_procs);
ib_proc != (mca_bmi_ib_proc_t*)
ompi_list_get_end(&mca_bmi_ib_component.ib_procs);
ib_proc = (mca_bmi_ib_proc_t*)ompi_list_get_next(ib_proc)) {
if(ib_proc->proc_guid.vpid == endpoint->vpid) {
/* Try to get the endpoint instance of this proc */
/* Limitation: Right now, we have only 1 endpoint
* for every process. Need several changes, some
* in PML/BMI interface to set this right */
ib_endpoint = ib_proc->proc_endpoints[0];
endpoint_state = ib_endpoint->endpoint_state;
/* Update status */
switch(endpoint_state) {
case MCA_BMI_IB_CLOSED :
/* We had this connection closed before.
* The endpoint is trying to connect. Move the
* status of this connection to CONNECTING,
* and then reply with our QP information */
if(OMPI_SUCCESS != (rc = mca_bmi_ib_endpoint_reply_start_connect(ib_endpoint, buffer))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
break;
}
/* Setup state as connected */
ib_endpoint->endpoint_state = MCA_BMI_IB_CONNECT_ACK;
break;
case MCA_BMI_IB_CONNECTING :
mca_bmi_ib_endpoint_set_remote_info(ib_endpoint, buffer);
if(OMPI_SUCCESS != (rc = mca_bmi_ib_endpoint_connect(ib_endpoint))) {
ompi_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), __FILE__,__LINE__,rc);
break;
}
/* Setup state as connected */
mca_bmi_ib_endpoint_connected(ib_endpoint);
/* Send him an ack */
mca_bmi_ib_endpoint_send_connect_ack(ib_endpoint);
break;
case MCA_BMI_IB_CONNECT_ACK:
mca_bmi_ib_endpoint_connected(ib_endpoint);
break;
case MCA_BMI_IB_CONNECTED :
break;
default :
ompi_output(0, "Connected -> Connecting not possible.\n");
}
break;
}
}
/* Okay, now that we are done receiving,
* re-post the buffer */
mca_bmi_ib_post_recv();
}
void mca_bmi_ib_post_recv()
{
D_PRINT("");
orte_rml.recv_buffer_nb(
ORTE_RML_NAME_ANY,
ORTE_RML_TAG_DYNAMIC-1,
0,
mca_bmi_ib_endpoint_recv,
NULL);
}
/*
* Attempt to send a fragment using a given endpoint. If the endpoint is not
* connected, queue the fragment and start the connection as required.
*/
int mca_bmi_ib_endpoint_send(
mca_bmi_base_endpoint_t* endpoint,
mca_bmi_ib_frag_t* frag
)
{
int rc;
mca_bmi_ib_module_t *ib_bmi;
OMPI_THREAD_LOCK(&endpoint->endpoint_send_lock);
switch(endpoint->endpoint_state) {
case MCA_BMI_IB_CONNECTING:
D_PRINT("Queing because state is connecting");
ompi_list_append(&endpoint->pending_send_frags,
(ompi_list_item_t *)frag);
rc = OMPI_SUCCESS;
break;
case MCA_BMI_IB_CONNECT_ACK:
D_PRINT("Queuing because waiting for ack");
ompi_list_append(&endpoint->pending_send_frags,
(ompi_list_item_t *)frag);
rc = OMPI_SUCCESS;
break;
case MCA_BMI_IB_CLOSED:
D_PRINT("Connection to endpoint closed ... connecting ...");
ompi_list_append(&endpoint->pending_send_frags,
(ompi_list_item_t *)frag);
rc = mca_bmi_ib_endpoint_start_connect(endpoint);
break;
case MCA_BMI_IB_FAILED:
rc = OMPI_ERR_UNREACH;
break;
case MCA_BMI_IB_CONNECTED:
{
ib_bmi = endpoint->endpoint_bmi;
A_PRINT("Send to : %d, len : %d, frag : %p",
endpoint->endpoint_proc->proc_guid.vpid,
frag->ib_buf.desc.sg_entry.len,
frag);
rc = mca_bmi_ib_endpoint_post_send(ib_bmi, endpoint, frag);
if(ib_bmi->rr_posted <= mca_bmi_ib_component.ib_rr_buf_min+1)
mca_bmi_ib_endpoint_post_rr(mca_bmi_ib_component.ib_rr_buf_max - ib_bmi->rr_posted,
endpoint);
/* rc = mca_bmi_ib_post_send(endpoint->endpoint_bmi, endpoint, */
/* &frag->ib_buf, (void*) frag); */
/* while(NULL != (item = ompi_list_remove_first(&ib_bmi->repost))) { */
/* mca_bmi_ib_buffer_repost(ib_bmi->nic, item); */
/* } */
/* break; */
break;
}
default:
rc = OMPI_ERR_UNREACH;
}
OMPI_THREAD_UNLOCK(&endpoint->endpoint_send_lock);
return rc;
}
void mca_bmi_ib_progress_send_frags(mca_bmi_ib_endpoint_t* endpoint)
{
ompi_list_item_t *frag_item;
mca_bmi_ib_send_frag_t *frag;
mca_bmi_ib_module_t* ib_bmi;
/*Check if endpoint is connected */
if(endpoint->endpoint_state != MCA_BMI_IB_CONNECTED) {
return;
}
/* While there are frags in the list,
* process them */
while(!ompi_list_is_empty(&(endpoint->pending_send_frags))) {
frag_item = ompi_list_remove_first(&(endpoint->pending_send_frags));
frag = (mca_bmi_ib_send_frag_t *) frag_item;
ib_bmi = endpoint->endpoint_bmi;
/* We need to post this one */
if(OMPI_SUCCESS != mca_bmi_ib_endpoint_post_send(ib_bmi, endpoint, frag))
ompi_output(0, "error in mca_bmi_ib_endpoint_send");
}
}
/*
* Complete connection to endpoint.
*/
int mca_bmi_ib_endpoint_connect(
mca_bmi_ib_endpoint_t *endpoint)
{
int rc;
mca_bmi_ib_module_t *ib_bmi = endpoint->endpoint_bmi;
/* Connection establishment RC */
rc = mca_bmi_ib_qp_init(ib_bmi->nic,
endpoint->lcl_qp_hndl,
endpoint->rem_qp_num,
endpoint->rem_lid);
if(rc != OMPI_SUCCESS) {
return rc;
}
rc = mca_bmi_ib_qp_query(ib_bmi,
endpoint->lcl_qp_hndl,
endpoint->rem_qp_num);
if(rc != OMPI_SUCCESS) {
return rc;
}
mca_bmi_ib_endpoint_post_rr(mca_bmi_ib_component.ib_rr_buf_max, endpoint);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -25,10 +25,11 @@
#include "mca/bmi/bmi.h"
#include "bmi_ib_frag.h"
#include "bmi_ib_priv.h"
#include "bmi_ib.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#define MAX_POST_RR (16)
OBJ_CLASS_DECLARATION(mca_bmi_ib_endpoint_t);
/**
@ -106,13 +107,45 @@ struct mca_bmi_base_endpoint_t {
typedef struct mca_bmi_base_endpoint_t mca_bmi_base_endpoint_t;
typedef mca_bmi_base_endpoint_t mca_bmi_ib_endpoint_t;
int mca_bmi_ib_endpoint_send(struct mca_bmi_base_endpoint_t* endpoint, struct mca_bmi_ib_frag_t* frag);
int mca_bmi_ib_endpoint_send(mca_bmi_base_endpoint_t* endpoint, struct mca_bmi_ib_frag_t* frag);
int mca_bmi_ib_endpoint_connect(mca_bmi_base_endpoint_t*);
void mca_bmi_ib_post_recv(void);
void mca_bmi_ib_progress_send_frags(mca_bmi_ib_endpoint_t*);
static inline int mca_bmi_ib_endpoint_post_rr(int cnt, mca_bmi_ib_endpoint_t *endpoint)
{
int i, rc;
ompi_list_item_t* item;
mca_bmi_ib_recv_frag_t* frag;
mca_bmi_ib_module_t *ib_bmi = endpoint->endpoint_bmi;
VAPI_rr_desc_t* rr_desc_post = ib_bmi->rr_desc_post;
/* prepare frags and post receive requests */
for(i = 0; i < cnt; i++) {
OMPI_FREE_LIST_WAIT(&ib_bmi->recv_free, item, rc);
frag = (mca_bmi_ib_recv_frag_t*) item;
frag->endpoint = endpoint;
frag->sg_entry.len = frag->size;
rr_desc_post[i] = frag->rr_desc;
}
frag->ret = EVAPI_post_rr_list(ib_bmi->nic,
endpoint->lcl_qp_hndl,
cnt,
rr_desc_post);
if(VAPI_OK != frag->ret) {
MCA_BMI_IB_VAPI_RET(frag->ret, "EVAPI_post_rr_list");
return OMPI_ERROR;
}
OMPI_THREAD_ADD32(&ib_bmi->rr_posted, cnt);
return OMPI_SUCCESS;
}
#define DUMP_ENDPOINT(endpoint_ptr) { \
ompi_output(0, "[%s:%d] ", __FILE__, __LINE__); \
ompi_output(0, "Dumping endpoint %d state", \

78
src/mca/bmi/ib/bmi_ib_frag.c Обычный файл
Просмотреть файл

@ -0,0 +1,78 @@
#include "bmi_ib_frag.h"
#include "mca/common/vapi/vapi_mem_reg.h"
static void mca_bmi_ib_send_frag_constructor(mca_bmi_ib_frag_t* frag)
{
mca_common_vapi_memhandle_t* mem_hndl = frag->base.super.user_data;
frag->size = MCA_BMI_IB_FIRST_FRAG_SIZE;
frag->hdr = (mca_bmi_ib_header_t*) (frag+1); /* initialize the bmi header to point to start at end of frag */
frag->segment.seg_addr.pval = frag->hdr+1; /* init the segment address to start after the bmi header */
frag->segment.seg_len = frag->size;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->base.des_src->seg_key.key64 = (uint64_t) mem_hndl->l_key;
/* TODO - initialize the firstfrag size */
frag->sr_desc.comp_type = VAPI_SIGNALED;
frag->sr_desc.opcode = VAPI_SEND;
frag->sr_desc.remote_qkey = 0;
frag->sr_desc.sg_lst_len = 1;
frag->sr_desc.sg_lst_p = &frag->sg_entry;
frag->sg_entry.lkey = mem_hndl->l_key;
frag->sr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
}
static void mca_bmi_ib_recv_frag_constructor(mca_bmi_ib_frag_t* frag)
{
mca_common_vapi_memhandle_t* mem_hndl = frag->base.super.user_data;
frag->size = MCA_BMI_IB_FIRST_FRAG_SIZE;
frag->hdr = (mca_bmi_ib_header_t*) (frag+1); /* initialize the bmi header to point to start at end of frag */
frag->segment.seg_addr.pval = frag->hdr+1; /* init the segment address to start after the bmi header */
frag->segment.seg_len = frag->size;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->base.des_src->seg_key.key64 = (uint64_t) mem_hndl->l_key;
/* TODO - initialize the first frag size */
frag->rr_desc.comp_type = VAPI_SIGNALED;
frag->rr_desc.opcode = VAPI_RECEIVE;
frag->rr_desc.sg_lst_len = 1;
frag->rr_desc.sg_lst_p = &frag->sg_entry;
frag->sg_entry.lkey = mem_hndl->l_key;
frag->rr_desc.id = (VAPI_virt_addr_t) (MT_virt_addr_t) frag;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
}
OBJ_CLASS_INSTANCE(
mca_bmi_ib_frag_t,
mca_bmi_base_descriptor_t,
NULL,
NULL);
OBJ_CLASS_INSTANCE(
mca_bmi_ib_send_frag_t,
mca_bmi_base_descriptor_t,
mca_bmi_ib_send_frag_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_bmi_ib_recv_frag_t,
mca_bmi_base_descriptor_t,
mca_bmi_ib_recv_frag_constructor,
NULL);

Просмотреть файл

@ -1,311 +0,0 @@
/* Standard system includes */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Open MPI includes */
/* Other IB BMI includes */
#include "bmi_ib.h"
#include "bmi_ib_memory.h"
#include "bmi_ib_priv.h"
static void mca_bmi_ib_mem_registry_construct(ompi_object_t *object);
static void mca_bmi_ib_mem_registry_destruct(ompi_object_t *object);
static void mca_bmi_ib_mem_registry_info_construct(ompi_object_t *object);
static void mca_bmi_ib_mem_registry_info_destruct(ompi_object_t *object);
static int mca_bmi_ib_mem_registry_info_compare(void *key1, void *key2);
static int mca_bmi_ib_mem_registry_real_deregister(
mca_bmi_ib_mem_registry_t *registry,
mca_bmi_ib_mem_registry_info_t *info);
OBJ_CLASS_INSTANCE(mca_bmi_ib_mem_registry_info_t, ompi_list_item_t,
mca_bmi_ib_mem_registry_info_construct, mca_bmi_ib_mem_registry_info_destruct);
OBJ_CLASS_INSTANCE(mca_bmi_ib_mem_registry_t, ompi_rb_tree_t, mca_bmi_ib_mem_registry_construct,
mca_bmi_ib_mem_registry_destruct);
static void mca_bmi_ib_mem_registry_construct(ompi_object_t *object)
{
mca_bmi_ib_mem_registry_t *registry = (mca_bmi_ib_mem_registry_t *)object;
int i;
ompi_rb_tree_init(&(registry->rb_tree), mca_bmi_ib_mem_registry_info_compare);
OBJ_CONSTRUCT(&(registry->info_free_list), ompi_free_list_t);
ompi_free_list_init(&registry->info_free_list, sizeof(mca_bmi_ib_mem_registry_info_t),
OBJ_CLASS(mca_bmi_ib_mem_registry_info_t), 32, -1, 32, NULL);
registry->hints_log_size = mca_bmi_ib_component.ib_mem_registry_hints_log_size;
/* sanity check -- enforce lower bound for hash calculation */
if (registry->hints_log_size < 1) {
registry->hints_log_size = 1;
}
registry->hints = (ompi_ptr_t *)malloc((1 << registry->hints_log_size) *
sizeof(ompi_ptr_t));
registry->hints_log_size = mca_bmi_ib_component.ib_mem_registry_hints_log_size;
registry->hints_size = (registry->hints) ? (1 << registry->hints_log_size) : 0;
for (i = 0; i < registry->hints_size; i++) {
registry->hints[i].pval = (void *)NULL;
}
registry->ib_bmi = NULL;
registry->evictable = NULL;
return;
}
static void mca_bmi_ib_mem_registry_destruct(ompi_object_t *object)
{
/* memory regions that are being tracked are not deregistered here */
mca_bmi_ib_mem_registry_t *registry = (mca_bmi_ib_mem_registry_t *)object;
OBJ_DESTRUCT(&(registry->info_free_list));
if (registry->hints_size != 0) {
free(registry->hints);
registry->hints = (ompi_ptr_t *)NULL;
registry->hints_size = 0;
}
return;
}
static void mca_bmi_ib_mem_registry_info_construct(ompi_object_t *object)
{
mca_bmi_ib_mem_registry_info_t *info = (mca_bmi_ib_mem_registry_info_t *)object;
info->next = NULL;
info->ref_cnt = 0;
info->hndl = VAPI_INVAL_HNDL;
memset(&(info->request), 0, sizeof(VAPI_mr_t));
memset(&(info->reply), 0, sizeof(VAPI_mr_t));
return;
}
static void mca_bmi_ib_mem_registry_info_destruct(ompi_object_t *object)
{
return;
}
static int mca_bmi_ib_mem_registry_info_compare(void *request, void *treenode)
{
int result;
VAPI_mr_t *mr1 = (VAPI_mr_t *)request;
VAPI_mr_t *mr2 = (VAPI_mr_t *)treenode;
uint64_t start1 = mr1->start;
uint64_t start2 = mr2->start;
uint64_t end1 = start1 + mr1->size;
uint64_t end2 = start2 + mr2->size;
if (end1 < start2) {
/* non-overlapping mr1 < mr2 */
result = -1;
}
else if (start1 > end2) {
/* non-overlapping mr1 > mr2 */
result = 1;
}
else if ((end1 <= end2) && (start1 >= start2)) {
/* completely overlapping mr1 and mr2 (mr2 may be bigger) */
if ((mr1->acl & mr2->acl) == mr1->acl) {
/* minimum access permissions met */
result = 0;
}
else {
/* oops -- access permissions not good enough */
result = 1;
}
}
else if (start1 < start2) {
/* partially overlapping mr1 < mr2 */
result = -1;
}
else {
/* partially overlapping mr1 > mr2 */
result = 1;
}
return result;
}
void mca_bmi_ib_mem_registry_clean_evictables(
mca_bmi_ib_mem_registry_t *registry,
mca_bmi_ib_mem_registry_info_t *info)
{
mca_bmi_ib_mem_registry_info_t *tmp = registry->evictable;
mca_bmi_ib_mem_registry_info_t *prev = NULL;
while (NULL != tmp) {
if (tmp == info) {
if (NULL == prev) {
/* no more entries left -- no evictable list */
registry->evictable = NULL;
}
else {
/* remove this entry from the evictable list */
prev->next = tmp->next;
}
/* clear this entry's evictable link */
tmp->next = NULL;
break;
}
prev = tmp;
tmp = tmp->next;
}
return;
}
mca_bmi_ib_mem_registry_info_t *mca_bmi_ib_mem_registry_register(
mca_bmi_ib_mem_registry_t *registry, VAPI_mr_t *mr)
{
mca_bmi_ib_mem_registry_info_t *info = mca_bmi_ib_mem_registry_find(registry, mr);
mca_bmi_ib_mem_registry_info_t *next_to_evict;
ompi_list_item_t *item;
VAPI_ret_t vapi_result;
int rc;
if (info == (mca_bmi_ib_mem_registry_info_t *)NULL) {
/* create new entry and register memory region */
item = (ompi_list_item_t *)info;
OMPI_FREE_LIST_GET(&(registry->info_free_list), item, rc);
info = (mca_bmi_ib_mem_registry_info_t *)item;
if (OMPI_SUCCESS != rc) {
/* error - return null pointer */
return info;
}
memcpy(&(info->request),mr,sizeof(VAPI_mr_t));
info->ref_cnt = 1;
do {
vapi_result = VAPI_register_mr(registry->ib_bmi->nic, mr,
&(info->hndl), &(info->reply));
if (VAPI_OK != vapi_result) {
if (VAPI_EAGAIN == vapi_result) {
/* evict an unused memory region, if at all possible */
if (NULL != registry->evictable) {
next_to_evict = registry->evictable->next;
mca_bmi_ib_mem_registry_real_deregister(registry, registry->evictable);
registry->evictable = next_to_evict;
}
}
else {
/* fatal error */
item = (ompi_list_item_t *)info;
OMPI_FREE_LIST_RETURN(&(registry->info_free_list), item);
info = NULL;
return info;
}
}
} while ((VAPI_OK != vapi_result) && (NULL != info));
/* insert a reference to this information into the red/black tree */
rc = ompi_rb_tree_insert(&(registry->rb_tree), &(info->reply), info);
/* aargh! what do we do if the tree insert fails... */
mca_bmi_ib_mem_registry_insert_hint(registry, &(info->reply), info);
}
else {
if (0 == info->ref_cnt) {
/* make sure we're not on the evictable list */
mca_bmi_ib_mem_registry_clean_evictables(registry, info);
}
(info->ref_cnt)++;
}
return info;
}
mca_bmi_ib_mem_registry_info_t *mca_bmi_ib_register_mem_with_registry(
mca_bmi_ib_module_t *ib_module,
void *addr, size_t len)
{
mca_bmi_ib_mem_registry_info_t *info;
VAPI_mr_t mr;
mr.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE;
mr.l_key = 0;
mr.r_key = 0;
mr.pd_hndl = ib_module->ptag;
mr.size = len;
mr.start = (VAPI_virt_addr_t) (MT_virt_addr_t) addr;
mr.type = VAPI_MR;
info = mca_bmi_ib_mem_registry_register(&(ib_module->mem_registry),&mr);
return info;
}
int mca_bmi_ib_deregister_mem_with_registry(
mca_bmi_ib_module_t *ib_module,
void *addr, size_t len)
{
VAPI_mr_t mr;
int rc;
mr.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE;
mr.l_key = 0;
mr.r_key = 0;
mr.pd_hndl = ib_module->ptag;
mr.size = len;
mr.start = (VAPI_virt_addr_t) (MT_virt_addr_t) addr;
mr.type = VAPI_MR;
rc = mca_bmi_ib_mem_registry_deregister(&(ib_module->mem_registry),&mr);
return rc;
}
static int mca_bmi_ib_mem_registry_real_deregister(
mca_bmi_ib_mem_registry_t *registry,
mca_bmi_ib_mem_registry_info_t *info)
{
ompi_list_item_t *item;
VAPI_ret_t vapi_result;
int i;
/* clear hints array of references to this info object */
for (i = 0; i < registry->hints_size; i++) {
if (registry->hints[i].pval == info) {
registry->hints[i].pval = (void *)NULL;
}
}
/* delete the info object from the red/black tree */
ompi_rb_tree_delete(&(registry->rb_tree), &(info->reply));
/* do the real deregistration */
vapi_result = VAPI_deregister_mr(registry->ib_bmi->nic, info->hndl);
/* return the info object to the free list */
item = (ompi_list_item_t *)info;
OMPI_FREE_LIST_RETURN(&(registry->info_free_list), item);
/* return an error if we could not successfully deregister memory region */
if (VAPI_OK != vapi_result) {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int mca_bmi_ib_mem_registry_deregister(
mca_bmi_ib_mem_registry_t *registry, VAPI_mr_t *mr)
{
mca_bmi_ib_mem_registry_info_t *info = mca_bmi_ib_mem_registry_find(registry, mr);
if (info != NULL) {
if (info->ref_cnt > 0) {
(info->ref_cnt)--;
if (0 == info->ref_cnt) {
info->next = registry->evictable;
registry->evictable = info;
}
}
}
else {
return OMPI_ERR_NOT_FOUND;
}
return OMPI_SUCCESS;
}
int mca_bmi_ib_mem_registry_init(
mca_bmi_ib_mem_registry_t *registry,
struct mca_bmi_ib_module_t *ib_bmi)
{
registry->ib_bmi = ib_bmi;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,157 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_IB_MEMORY_H
#define MCA_PTL_IB_MEMORY_H
/* Standard system includes */
#include <sys/types.h>
/* Open MPI includes */
#include "include/types.h"
#include "include/constants.h"
#include "class/ompi_object.h"
#include "class/ompi_list.h"
#include "class/ompi_rb_tree.h"
#include "class/ompi_free_list.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/* vapi.h is not a C++ safe header file */
#include <vapi.h>
#include <vapi_common.h>
struct mca_bmi_ib_module_t;
typedef struct mca_bmi_ib_mem_registry_info_t mca_bmi_ib_mem_registry_info_t;
struct mca_bmi_ib_mem_registry_info_t {
ompi_list_item_t super;
mca_bmi_ib_mem_registry_info_t *next;
int ref_cnt;
VAPI_mr_hndl_t hndl;
VAPI_mr_t request;
VAPI_mr_t reply;
};
OBJ_CLASS_DECLARATION(mca_bmi_ib_mem_registry_info_t);
typedef struct mca_bmi_ib_mem_registry_t mca_bmi_ib_mem_registry_t;
struct mca_bmi_ib_mem_registry_t {
ompi_rb_tree_t rb_tree;
ompi_free_list_t info_free_list;
ompi_ptr_t *hints;
mca_bmi_ib_mem_registry_info_t *evictable;
struct mca_bmi_ib_module_t *ib_bmi;
int hints_log_size;
int hints_size;
};
OBJ_CLASS_DECLARATION(mca_bmi_ib_mem_registry_t);
static inline void mca_bmi_ib_mem_registry_insert_hint(
mca_bmi_ib_mem_registry_t *registry, VAPI_mr_t *key,
mca_bmi_ib_mem_registry_info_t *info)
{
uint64_t hints_hash = 0, addrll;
if (registry->hints_size) {
addrll = (uint64_t)(key->start);
/* calculate hash index for hints array - hash is (hints_log_size - 1) bits of key
* from first non-zero least significant bit
*/
hints_hash = addrll & (-addrll);
hints_hash = (((hints_hash << registry->hints_log_size) - hints_hash) & addrll) /
hints_hash;
registry->hints[hints_hash].pval = info;
}
return;
}
/* find information on a registered memory region for a given address,
* region size, and access permissions
*
*/
static inline mca_bmi_ib_mem_registry_info_t *mca_bmi_ib_mem_registry_find(
mca_bmi_ib_mem_registry_t *registry, VAPI_mr_t *key)
{
mca_bmi_ib_mem_registry_info_t *info = (mca_bmi_ib_mem_registry_info_t *)NULL;
uint64_t hints_hash = 0, addrll;
if (registry->hints_size) {
addrll = (uint64_t)(key->start);
/* calculate hash index for hints array - hash is (hints_log_size - 1) bits of key
* from first non-zero least significant bit
*/
hints_hash = addrll & (-addrll);
hints_hash = (((hints_hash << registry->hints_log_size) - hints_hash) & addrll) /
hints_hash;
if ((info = registry->hints[hints_hash].pval) != (void *)NULL) {
if ((info->reply.start <= key->start) &&
((info->reply.start + info->reply.size) >= (key->start + key->size)) &&
((info->reply.acl & key->acl) == key->acl)) {
return info;
}
}
}
/* search the red/black tree */
info = ompi_rb_tree_find(&(registry->rb_tree), key);
/* store a pointer to this info in the hints array for later lookups */
if ((info != NULL) && registry->hints_size) {
registry->hints[hints_hash].pval = info;
}
return info;
}
/* prototypes */
mca_bmi_ib_mem_registry_info_t *mca_bmi_ib_mem_registry_register(
mca_bmi_ib_mem_registry_t *registry,
VAPI_mr_t *mr);
mca_bmi_ib_mem_registry_info_t *mca_bmi_ib_register_mem_with_registry(
struct mca_bmi_ib_module_t *ib_bmi,
void *addr, size_t len);
int mca_bmi_ib_deregister_mem_with_registry(
struct mca_bmi_ib_module_t *ib_bmi,
void *addr, size_t len);
int mca_bmi_ib_mem_registry_deregister(
mca_bmi_ib_mem_registry_t *registry,
VAPI_mr_t *mr);
int mca_bmi_ib_mem_registry_init(
mca_bmi_ib_mem_registry_t* registry,
struct mca_bmi_ib_module_t *ib_bmi);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -20,7 +20,8 @@
#include "bmi_ib_vapi.h"
#include "bmi_ib_priv.h"
#include "bmi_ib.h"
#include "bmi_ib_memory.h"
#include "bmi_ib_frag.h"
#include "bmi_ib_endpoint.h"
/*
* Asynchronous event handler to detect unforseen
@ -200,7 +201,7 @@ int mca_bmi_ib_create_qp(VAPI_hca_hndl_t nic,
ret = VAPI_create_qp(nic, &qp_init_attr,
qp_hndl, qp_prop);
if(VAPI_OK != ret) {
MCA_BMI_IB_VAPI_RET(ret, "VAPI_create_qp");
return OMPI_ERROR;
@ -251,6 +252,26 @@ int mca_bmi_ib_module_init(mca_bmi_ib_module_t *ib_bmi)
}
int mca_bmi_ib_qp_query(mca_bmi_ib_module_t* ib_bmi, VAPI_qp_hndl_t qp_hndl, VAPI_qp_num_t qp_num)
{
VAPI_hca_hndl_t nic = ib_bmi->nic;
VAPI_qp_attr_t qp_attr;
VAPI_qp_attr_mask_t qp_attr_mask;
VAPI_qp_init_attr_t qp_init_attr;
VAPI_ret_t ret;
ret = VAPI_query_qp(nic, qp_hndl, &qp_attr, &qp_attr_mask, &qp_init_attr );
if (ret != VAPI_OK) {
ompi_output(0, "error querying the queue pair");
return OMPI_ERROR;
}
ib_bmi->ib_inline_max = qp_init_attr.cap.max_inline_data_sq;
return OMPI_SUCCESS;
}
int mca_bmi_ib_qp_init(VAPI_hca_hndl_t nic,
VAPI_qp_hndl_t qp_hndl,
VAPI_qp_num_t remote_qp,
@ -341,6 +362,9 @@ int mca_bmi_ib_qp_init(VAPI_hca_hndl_t nic,
}
D_PRINT("Modified to RTS..Qp %d", qp_hndl);
return OMPI_SUCCESS;
}
@ -409,20 +433,21 @@ int mca_bmi_ib_qp_init(VAPI_hca_hndl_t nic,
/* } */
/* void mca_bmi_ib_buffer_repost(VAPI_hca_hndl_t nic, void* addr) */
/* { */
/* VAPI_ret_t ret; */
/* ib_buffer_t *ib_buf = (ib_buffer_t*)addr; */
void mca_bmi_ib_buffer_repost(VAPI_hca_hndl_t nic, void* addr)
{
mca_bmi_ib_recv_frag_t * frag = (mca_bmi_ib_recv_frag_t*)addr;
/* IB_PREPARE_RECV_DESC(ib_buf); */
frag->sg_entry.len = frag->size;
/* ret = VAPI_post_rr(nic, ib_buf->qp_hndl, &(ib_buf->desc.rr)); */
/* if(VAPI_OK != ret) { */
/* MCA_BMI_IB_VAPI_RET(ret, "VAPI_post_rr"); */
/* ompi_output(0, "Error in buffer reposting"); */
/* } */
/* } */
frag->ret = VAPI_post_rr(nic, frag->endpoint->lcl_qp_hndl, &(frag->rr_desc));
if(VAPI_OK != frag->ret) {
MCA_BMI_IB_VAPI_RET(frag->ret, "VAPI_post_rr");
ompi_output(0, "Error in buffer reposting");
}
}
/* void mca_bmi_ib_prepare_ack(mca_bmi_ib_module_t *ib_bmi, */
/* void* addr_to_reg, int len_to_reg, */

Просмотреть файл

@ -22,7 +22,7 @@
#include <stdint.h>
#include "class/ompi_free_list.h"
#include "bmi_ib_vapi.h"
#include "bmi_ib_memory.h"
#include "bmi_ib.h"
#define NUM_IB_SEND_BUF (1)
#define NUM_IB_RECV_BUF (4)
@ -183,9 +183,13 @@ int mca_bmi_ib_register_mem(
/* struct mca_bmi_base_endpoint_t *peer, */
/* ib_buffer_t *ib_buf, void*); */
/* void mca_bmi_ib_buffer_repost( */
/* VAPI_hca_hndl_t nic, */
/* void* addr); */
void mca_bmi_ib_buffer_repost(
VAPI_hca_hndl_t nic,
void* addr);
int mca_bmi_ib_qp_query(mca_bmi_ib_module_t* ib_bmi, VAPI_qp_hndl_t qp_hndl, VAPI_qp_num_t qp_num);
/* void mca_bmi_ib_prepare_ack( */
/* struct mca_bmi_ib_module_t *ib_module, */

Просмотреть файл

@ -18,6 +18,7 @@
#include "ompi_config.h"
#include "bmi_ib.h"
#include "bmi_ib_frag.h"
#include "bmi_ib_recvfrag.h"

Просмотреть файл

@ -21,6 +21,6 @@
#include "mca/bmi/bmi.h"
void mca_bmi_ib_process_recv(mca_bmi_ib_module_t*, mca_bmi_ib_recv_frag_t*);
void mca_bmi_ib_process_recv(struct mca_bmi_ib_module_t*, struct mca_bmi_ib_recv_frag_t*);
#endif

Просмотреть файл

@ -19,7 +19,9 @@
#include "ompi_config.h"
#include "include/types.h"
#include "bmi_ib_sendfrag.h"
#include "bmi_ib_frag.h"
#include "mca/bmi/bmi.h"
#include "bmi_ib_endpoint.h"
void mca_bmi_ib_sendfrag_complete( mca_bmi_ib_module_t * ib_bmi, mca_bmi_ib_send_frag_t* frag)
{

Просмотреть файл

@ -60,7 +60,7 @@
}
/* Debug Print */
#if 1
#if 0
#define D_PRINT(fmt, args...) { \
ompi_output(0, "[%s:%d:%s] " fmt, __FILE__, __LINE__, __func__, \
##args); \