1
1
openmpi/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h
Galen Shipman 73e9ef46fc use int32_t not size_t (ORTE interface change)..
This commit was SVN r11323.
2006-08-22 17:13:10 +00:00

257 строки
9.5 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_IB_ENDPOINT_H
#define MCA_BTL_IB_ENDPOINT_H
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "btl_mvapi_frag.h"
#include "btl_mvapi.h"
#include "btl_mvapi_eager_rdma.h"
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
#include <vapi.h>
#include <mtl_common.h>
#include <vapi_common.h>
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_btl_mvapi_endpoint_t);
struct mca_btl_mvapi_frag_t;
struct mca_btl_mvapi_port_info_t {
uint32_t subnet;
};
typedef struct mca_btl_mvapi_port_info_t mca_btl_mvapi_port_info_t;
/**
* State of IB endpoint connection.
*/
typedef enum {
/* Defines the state in which this BTL instance
* has started the process of connection */
MCA_BTL_IB_CONNECTING,
/* Waiting for ack from endpoint */
MCA_BTL_IB_CONNECT_ACK,
/*Waiting for final connection ACK from endpoint */
MCA_BTL_IB_WAITING_ACK,
/* Connected ... both sender & receiver have
* buffers associated with this connection */
MCA_BTL_IB_CONNECTED,
/* Connection is closed, there are no resources
* associated with this */
MCA_BTL_IB_CLOSED,
/* Maximum number of retries have been used.
* Report failure on send to upper layer */
MCA_BTL_IB_FAILED
} mca_btl_mvapi_endpoint_state_t;
struct mca_btl_mvapi_rem_info_t {
VAPI_qp_num_t rem_qp_num_hp;
/* High priority remote side QP number */
VAPI_qp_num_t rem_qp_num_lp;
/* Low prioirty remote size QP number */
IB_lid_t rem_lid;
/* Local identifier of the remote process */
uint32_t rem_subnet;
/* subnet of remote process */
} ;
typedef struct mca_btl_mvapi_rem_info_t mca_btl_mvapi_rem_info_t;
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
opal_list_item_t super;
struct mca_btl_mvapi_module_t* endpoint_btl;
/**< BTL instance that created this connection */
struct mca_btl_mvapi_proc_t* endpoint_proc;
/**< proc structure corresponding to endpoint */
mca_btl_mvapi_endpoint_state_t endpoint_state;
/**< current state of the connection */
size_t endpoint_retries;
/**< number of connection retries attempted */
double endpoint_tstamp;
/**< timestamp of when the first connection was attempted */
opal_mutex_t endpoint_lock;
/**< lock for concurrent access to endpoint state */
opal_list_t pending_send_frags;
/**< list of pending send frags for this endpoint */
opal_list_t pending_frags_hp; /**< list of pending high priority frags */
opal_list_t pending_frags_lp; /**< list of pending low priority frags */
mca_btl_mvapi_rem_info_t rem_info;
VAPI_qp_hndl_t lcl_qp_hndl_hp; /* High priority local QP handle */
VAPI_qp_hndl_t lcl_qp_hndl_lp; /* Low priority local QP handle */
VAPI_qp_prop_t lcl_qp_prop_hp; /* High priority local QP properties */
VAPI_qp_prop_t lcl_qp_prop_lp; /* Low priority local QP properties */
int32_t sd_tokens_hp; /**< number of high priority send tokens */
int32_t sd_tokens_lp; /**< number of low priority send tokens */
int32_t get_tokens; /**< number of available get tokens */
int32_t rd_posted_hp; /**< number of high priority descriptors posted to the nic*/
int32_t rd_posted_lp; /**< number of low priority descriptors posted to the nic*/
int32_t rd_credits_hp; /**< number of high priority credits to return to peer */
int32_t rd_credits_lp; /**< number of low priority credits to return to peer */
int32_t sd_credits_hp; /**< number of send wqe entries being used to return credits */
int32_t sd_credits_lp; /**< number of send wqe entries being used to return credits */
int32_t sd_wqe_hp; /**< number of available high priority send wqe entries */
int32_t sd_wqe_lp; /**< number of available low priority send wqe entries */
uint32_t subnet;
uint32_t eager_recv_count; /**< number of eager received */
mca_btl_mvapi_eager_rdma_remote_t eager_rdma_remote;
/**< info about remote RDMA buffer */
mca_btl_mvapi_eager_rdma_local_t eager_rdma_local;
/**< info about local RDMA buffer */
int32_t eager_rdma_index; /**< index into RDMA buffers pointer array */
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_mvapi_endpoint_t;
int mca_btl_mvapi_endpoint_send(mca_btl_base_endpoint_t* endpoint, struct mca_btl_mvapi_frag_t* frag);
int mca_btl_mvapi_endpoint_connect(mca_btl_base_endpoint_t*);
void mca_btl_mvapi_endpoint_send_credits_hp(mca_btl_base_endpoint_t*);
void mca_btl_mvapi_endpoint_send_credits_lp(mca_btl_base_endpoint_t*);
void mca_btl_mvapi_post_recv(void);
void mca_btl_mvapi_endpoint_connect_eager_rdma(mca_btl_mvapi_endpoint_t*);
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, \
additional) \
{ \
do { \
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
if(endpoint->rd_posted_hp <= mca_btl_mvapi_component.rd_low+additional && \
endpoint->rd_posted_hp < mvapi_btl->rd_num){ \
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_num - \
endpoint->rd_posted_hp, \
endpoint, \
&mvapi_btl->recv_free_eager, \
endpoint->rd_posted_hp, \
endpoint->rd_credits_hp, \
mvapi_btl->nic, \
endpoint->lcl_qp_hndl_hp); \
} \
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, \
additional) \
{ \
do { \
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
if(endpoint->rd_posted_lp <= mca_btl_mvapi_component.rd_low+additional && \
endpoint->rd_posted_lp < mvapi_btl->rd_num){ \
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_num - \
endpoint->rd_posted_lp, \
endpoint, \
&mvapi_btl->recv_free_max, \
endpoint->rd_posted_lp, \
endpoint->rd_credits_lp, \
mvapi_btl->nic, \
endpoint->lcl_qp_hndl_lp); \
} \
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(cnt, \
my_endpoint, \
frag_list, \
rd_posted, \
rd_credits, \
nic, \
qp ) \
{ \
do { \
int32_t i; \
int rc; \
int32_t num_post = cnt; \
mca_btl_mvapi_module_t *mvapi_btl = my_endpoint->endpoint_btl; \
VAPI_rr_desc_t* desc_post = mvapi_btl->rr_desc_post; \
for(i = 0; i < num_post; i++) { \
ompi_free_list_item_t* item; \
mca_btl_mvapi_frag_t* frag = NULL; \
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
frag = (mca_btl_mvapi_frag_t*) item; \
frag->endpoint = my_endpoint; \
frag->sg_entry.len = frag->size + \
((unsigned char*) frag->segment.seg_addr.pval- \
(unsigned char*) frag->hdr); \
desc_post[i] = frag->rr_desc; \
}\
rc = EVAPI_post_rr_list( nic, \
qp, \
num_post, \
desc_post); \
if(VAPI_OK != rc) { \
BTL_ERROR(("error posting receive descriptors: %s",\
VAPI_strerror(rc))); \
} else { \
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
OPAL_THREAD_ADD32(&(rd_credits), num_post); \
}\
} while(0); \
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif