2005-07-01 01:28:35 +04:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2005-07-01 01:28:35 +04:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MCA_BTL_IB_ENDPOINT_H
|
|
|
|
#define MCA_BTL_IB_ENDPOINT_H
|
|
|
|
|
2005-07-03 20:22:16 +04:00
|
|
|
#include "opal/class/opal_list.h"
|
2005-07-04 03:09:55 +04:00
|
|
|
#include "opal/event/event.h"
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "ompi/mca/pml/pml.h"
|
|
|
|
#include "ompi/mca/btl/btl.h"
|
2005-07-01 01:28:35 +04:00
|
|
|
#include "btl_openib_frag.h"
|
|
|
|
#include "btl_openib.h"
|
2006-03-26 12:30:50 +04:00
|
|
|
#include "btl_openib_eager_rdma.h"
|
2005-07-15 19:13:19 +04:00
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "ompi/mca/btl/base/btl_base_error.h"
|
2006-03-26 12:30:50 +04:00
|
|
|
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
2005-07-15 19:13:19 +04:00
|
|
|
|
2005-07-01 01:28:35 +04:00
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
|
|
|
|
|
2005-10-01 02:58:09 +04:00
|
|
|
|
|
|
|
struct mca_btl_openib_frag_t;
|
|
|
|
|
|
|
|
struct mca_btl_openib_port_info_t {
|
|
|
|
uint16_t subnet;
|
|
|
|
};
|
|
|
|
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
|
|
|
|
|
|
|
|
|
2005-07-01 01:28:35 +04:00
|
|
|
/**
|
|
|
|
* State of IB endpoint connection.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
/* Defines the state in which this BTL instance
|
|
|
|
* has started the process of connection */
|
|
|
|
MCA_BTL_IB_CONNECTING,
|
|
|
|
|
|
|
|
/* Waiting for ack from endpoint */
|
|
|
|
MCA_BTL_IB_CONNECT_ACK,
|
2005-10-01 02:58:09 +04:00
|
|
|
|
|
|
|
/*Waiting for final connection ACK from endpoint */
|
|
|
|
MCA_BTL_IB_WAITING_ACK,
|
2005-07-01 01:28:35 +04:00
|
|
|
|
|
|
|
/* Connected ... both sender & receiver have
|
|
|
|
* buffers associated with this connection */
|
|
|
|
MCA_BTL_IB_CONNECTED,
|
|
|
|
|
|
|
|
/* Connection is closed, there are no resources
|
|
|
|
* associated with this */
|
|
|
|
MCA_BTL_IB_CLOSED,
|
|
|
|
|
|
|
|
/* Maximum number of retries have been used.
|
|
|
|
* Report failure on send to upper layer */
|
|
|
|
MCA_BTL_IB_FAILED
|
|
|
|
} mca_btl_openib_endpoint_state_t;
|
|
|
|
|
2005-10-01 02:58:09 +04:00
|
|
|
struct mca_btl_openib_rem_info_t {
|
|
|
|
|
2005-11-10 23:15:02 +03:00
|
|
|
uint32_t rem_qp_num_hp;
|
|
|
|
uint32_t rem_qp_num_lp;
|
2005-10-01 02:58:09 +04:00
|
|
|
/* Remote QP number (Low and High priority) */
|
|
|
|
|
|
|
|
uint16_t rem_lid;
|
|
|
|
/* Local identifier of the remote process */
|
|
|
|
|
|
|
|
|
2005-11-10 23:15:02 +03:00
|
|
|
uint32_t rem_psn_hp;
|
|
|
|
uint32_t rem_psn_lp;
|
2005-10-01 02:58:09 +04:00
|
|
|
/* Remote processes port sequence number (Low and High) */
|
|
|
|
|
|
|
|
uint16_t rem_subnet;
|
|
|
|
/* subnet of remote process */
|
|
|
|
|
|
|
|
|
|
|
|
};
|
|
|
|
typedef struct mca_btl_openib_rem_info_t mca_btl_openib_rem_info_t;
|
|
|
|
|
|
|
|
|
|
|
|
|
2005-07-01 01:28:35 +04:00
|
|
|
/**
|
|
|
|
* An abstraction that represents a connection to a endpoint process.
|
|
|
|
* An instance of mca_btl_base_endpoint_t is associated w/ each process
|
|
|
|
* and BTL pair at startup. However, connections to the endpoint
|
|
|
|
* are established dynamically on an as-needed basis:
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct mca_btl_base_endpoint_t {
|
2005-07-03 20:22:16 +04:00
|
|
|
opal_list_item_t super;
|
2005-07-01 01:28:35 +04:00
|
|
|
|
|
|
|
struct mca_btl_openib_module_t* endpoint_btl;
|
|
|
|
/**< BTL instance that created this connection */
|
|
|
|
|
|
|
|
struct mca_btl_openib_proc_t* endpoint_proc;
|
|
|
|
/**< proc structure corresponding to endpoint */
|
|
|
|
|
|
|
|
mca_btl_openib_endpoint_state_t endpoint_state;
|
|
|
|
/**< current state of the connection */
|
|
|
|
|
|
|
|
size_t endpoint_retries;
|
|
|
|
/**< number of connection retries attempted */
|
|
|
|
|
|
|
|
double endpoint_tstamp;
|
|
|
|
/**< timestamp of when the first connection was attempted */
|
|
|
|
|
2005-10-21 06:21:45 +04:00
|
|
|
opal_mutex_t endpoint_lock;
|
2005-07-01 01:28:35 +04:00
|
|
|
/**< lock for concurrent access to endpoint state */
|
2005-10-21 06:21:45 +04:00
|
|
|
|
2005-07-03 20:22:16 +04:00
|
|
|
opal_list_t pending_send_frags;
|
2005-07-20 01:04:22 +04:00
|
|
|
/**< list of pending send frags for this endpotint */
|
2005-07-12 17:38:54 +04:00
|
|
|
|
2005-10-21 06:21:45 +04:00
|
|
|
opal_list_t pending_frags_hp;
|
|
|
|
/**< list of pending high priority frags */
|
|
|
|
|
|
|
|
opal_list_t pending_frags_lp;
|
|
|
|
/**< list of pending low priority frags */
|
|
|
|
|
|
|
|
|
|
|
|
|
2005-10-01 02:58:09 +04:00
|
|
|
mca_btl_openib_rem_info_t rem_info;
|
2005-07-12 17:38:54 +04:00
|
|
|
|
2005-11-10 23:15:02 +03:00
|
|
|
uint32_t lcl_psn_hp;
|
|
|
|
uint32_t lcl_psn_lp;
|
2005-07-12 17:38:54 +04:00
|
|
|
/* Local processes port sequence number (Low and High) */
|
|
|
|
|
2005-11-10 23:15:02 +03:00
|
|
|
struct ibv_qp* lcl_qp_hp;
|
|
|
|
struct ibv_qp* lcl_qp_lp;
|
2005-07-12 17:38:54 +04:00
|
|
|
/* Local QP (Low and High) */
|
|
|
|
|
2005-11-10 23:15:02 +03:00
|
|
|
struct ibv_qp_attr* lcl_qp_attr_hp;
|
|
|
|
struct ibv_qp_attr* lcl_qp_attr_lp;
|
2005-07-12 17:38:54 +04:00
|
|
|
/* Local QP attributes (Low and High) */
|
2005-11-10 23:15:02 +03:00
|
|
|
|
|
|
|
int32_t sd_tokens_hp; /**< number of high priority send tokens */
|
|
|
|
int32_t sd_tokens_lp; /**< number of low priority send tokens */
|
|
|
|
int32_t get_tokens; /**< number of available get tokens */
|
|
|
|
|
|
|
|
int32_t rd_posted_hp; /**< number of high priority descriptors posted to the nic*/
|
|
|
|
int32_t rd_posted_lp; /**< number of low priority descriptors posted to the nic*/
|
|
|
|
int32_t rd_credits_hp; /**< number of high priority credits to return to peer */
|
|
|
|
int32_t rd_credits_lp; /**< number of low priority credits to return to peer */
|
2006-01-13 02:42:44 +03:00
|
|
|
int32_t sd_credits_hp; /**< number of send wqe entries being used to return credits */
|
|
|
|
int32_t sd_credits_lp; /**< number of send wqe entries being used to return credits */
|
|
|
|
int32_t sd_wqe_hp; /**< number of available send wqe entries */
|
|
|
|
int32_t sd_wqe_lp; /**< number of available send wqe entries */
|
|
|
|
|
2005-10-01 02:58:09 +04:00
|
|
|
uint16_t subnet; /**< subnet of this endpoint*/
|
2006-03-26 12:30:50 +04:00
|
|
|
|
|
|
|
uint32_t eager_recv_count; /**< number of eager received */
|
|
|
|
mca_btl_openib_eager_rdma_remote_t eager_rdma_remote;
|
|
|
|
/**< info about remote RDMA buffer */
|
|
|
|
mca_btl_openib_eager_rdma_local_t eager_rdma_local;
|
|
|
|
/**< info about local RDMA buffer */
|
|
|
|
size_t eager_rdma_index; /**< index into RDMA buffers pointer array */
|
2005-07-01 01:28:35 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
|
|
|
typedef mca_btl_base_endpoint_t mca_btl_openib_endpoint_t;
|
|
|
|
|
|
|
|
int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint, struct mca_btl_openib_frag_t* frag);
|
|
|
|
int mca_btl_openib_endpoint_connect(mca_btl_base_endpoint_t*);
|
|
|
|
void mca_btl_openib_post_recv(void);
|
2006-01-13 02:42:44 +03:00
|
|
|
void mca_btl_openib_endpoint_send_credits_hp(mca_btl_base_endpoint_t*);
|
|
|
|
void mca_btl_openib_endpoint_send_credits_lp(mca_btl_base_endpoint_t*);
|
2006-03-26 12:30:50 +04:00
|
|
|
void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*);
|
2005-07-12 17:38:54 +04:00
|
|
|
|
2005-10-03 20:35:12 +04:00
|
|
|
#define MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, \
|
|
|
|
additional) \
|
2005-07-20 19:17:18 +04:00
|
|
|
{ \
|
2005-10-03 20:35:12 +04:00
|
|
|
do { \
|
|
|
|
mca_btl_openib_module_t * openib_btl = endpoint->endpoint_btl; \
|
|
|
|
OPAL_THREAD_LOCK(&openib_btl->ib_lock); \
|
2005-11-10 23:15:02 +03:00
|
|
|
if(endpoint->rd_posted_hp <= mca_btl_openib_component.rd_low+additional && \
|
2006-01-13 02:42:44 +03:00
|
|
|
endpoint->rd_posted_hp < openib_btl->rd_num) { \
|
2005-11-10 23:15:02 +03:00
|
|
|
MCA_BTL_OPENIB_ENDPOINT_POST_RR_SUB(openib_btl->rd_num - \
|
|
|
|
endpoint->rd_posted_hp, \
|
2005-10-03 20:35:12 +04:00
|
|
|
endpoint, \
|
|
|
|
&openib_btl->recv_free_eager, \
|
2005-11-10 23:15:02 +03:00
|
|
|
endpoint->rd_posted_hp, \
|
|
|
|
endpoint->rd_credits_hp, \
|
|
|
|
endpoint->lcl_qp_hp); \
|
2005-07-20 19:17:18 +04:00
|
|
|
} \
|
2005-10-03 20:35:12 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock); \
|
|
|
|
} while(0); \
|
2005-07-01 01:28:35 +04:00
|
|
|
}
|
|
|
|
|
2005-10-03 20:35:12 +04:00
|
|
|
#define MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, \
|
|
|
|
additional) { \
|
|
|
|
do { \
|
|
|
|
mca_btl_openib_module_t * openib_btl = endpoint->endpoint_btl; \
|
|
|
|
OPAL_THREAD_LOCK(&openib_btl->ib_lock); \
|
2005-11-10 23:15:02 +03:00
|
|
|
if(endpoint->rd_posted_lp <= mca_btl_openib_component.rd_low+additional && \
|
|
|
|
endpoint->rd_posted_lp < openib_btl->rd_num){ \
|
|
|
|
MCA_BTL_OPENIB_ENDPOINT_POST_RR_SUB(openib_btl->rd_num - \
|
|
|
|
endpoint->rd_posted_lp, \
|
2005-10-03 20:35:12 +04:00
|
|
|
endpoint, \
|
|
|
|
&openib_btl->recv_free_max, \
|
2005-11-10 23:15:02 +03:00
|
|
|
endpoint->rd_posted_lp, \
|
|
|
|
endpoint->rd_credits_lp, \
|
|
|
|
endpoint->lcl_qp_lp \
|
2005-07-20 19:17:18 +04:00
|
|
|
); } \
|
2005-10-03 20:35:12 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock); \
|
|
|
|
} while(0); \
|
2005-07-20 19:17:18 +04:00
|
|
|
}
|
|
|
|
|
2005-10-03 20:35:12 +04:00
|
|
|
#define MCA_BTL_OPENIB_ENDPOINT_POST_RR_SUB(cnt, \
|
|
|
|
my_endpoint, \
|
|
|
|
frag_list, \
|
2005-11-10 23:15:02 +03:00
|
|
|
rd_posted, \
|
|
|
|
rd_credits, \
|
2005-10-03 20:35:12 +04:00
|
|
|
qp ) \
|
2005-11-10 23:15:02 +03:00
|
|
|
do { \
|
|
|
|
int32_t i; \
|
2005-10-03 20:35:12 +04:00
|
|
|
int rc; \
|
2005-11-10 23:15:02 +03:00
|
|
|
int32_t num_post = cnt; \
|
2005-10-03 20:35:12 +04:00
|
|
|
struct ibv_recv_wr* bad_wr; \
|
2005-11-10 23:15:02 +03:00
|
|
|
for(i = 0; i < num_post; i++) { \
|
2006-06-13 02:09:03 +04:00
|
|
|
ompi_free_list_item_t* item; \
|
2005-11-10 23:15:02 +03:00
|
|
|
mca_btl_openib_frag_t* frag; \
|
2005-10-03 20:35:12 +04:00
|
|
|
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
|
|
|
|
frag = (mca_btl_openib_frag_t*) item; \
|
|
|
|
frag->endpoint = my_endpoint; \
|
|
|
|
frag->sg_entry.length = frag->size + \
|
|
|
|
((unsigned char*) frag->segment.seg_addr.pval- \
|
|
|
|
(unsigned char*) frag->hdr); \
|
|
|
|
if(ibv_post_recv(qp, \
|
2005-11-10 23:15:02 +03:00
|
|
|
&frag->wr_desc.rd_desc, \
|
2005-10-03 20:35:12 +04:00
|
|
|
&bad_wr)) { \
|
2005-08-02 17:20:50 +04:00
|
|
|
BTL_ERROR(("error posting receive errno says %s\n", strerror(errno))); \
|
2005-07-20 19:17:18 +04:00
|
|
|
return OMPI_ERROR; \
|
|
|
|
}\
|
|
|
|
}\
|
2005-11-10 23:15:02 +03:00
|
|
|
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
|
|
|
|
OPAL_THREAD_ADD32(&(rd_credits), num_post); \
|
|
|
|
} while(0);
|
2005-07-01 01:28:35 +04:00
|
|
|
|
2005-10-21 06:21:45 +04:00
|
|
|
#define BTL_OPENIB_INSERT_PENDING(frag, frag_list, tokens, lock) \
|
2005-11-10 23:15:02 +03:00
|
|
|
do{ \
|
2005-10-21 06:21:45 +04:00
|
|
|
OPAL_THREAD_LOCK(&lock); \
|
|
|
|
opal_list_append(&frag_list, (opal_list_item_t *)frag); \
|
|
|
|
OPAL_THREAD_UNLOCK(&lock); \
|
|
|
|
OPAL_THREAD_ADD32(&tokens, 1); \
|
2005-11-10 23:15:02 +03:00
|
|
|
} while(0);
|
2005-10-21 06:21:45 +04:00
|
|
|
|
2005-07-01 01:28:35 +04:00
|
|
|
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|