2005-06-30 21:28:35 +00:00
|
|
|
/*
|
2005-11-05 19:57:48 +00:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2006-08-24 16:38:08 +00:00
|
|
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
2005-11-05 19:57:48 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2005-06-30 21:28:35 +00:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MCA_BTL_IB_ENDPOINT_H
|
|
|
|
#define MCA_BTL_IB_ENDPOINT_H
|
|
|
|
|
2005-07-03 16:22:16 +00:00
|
|
|
#include "opal/class/opal_list.h"
|
2005-07-03 23:09:55 +00:00
|
|
|
#include "opal/event/event.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/mca/pml/pml.h"
|
|
|
|
#include "ompi/mca/btl/btl.h"
|
2005-06-30 21:28:35 +00:00
|
|
|
#include "btl_openib_frag.h"
|
|
|
|
#include "btl_openib.h"
|
2006-03-26 08:30:50 +00:00
|
|
|
#include "btl_openib_eager_rdma.h"
|
2005-07-15 15:13:19 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/mca/btl/base/btl_base_error.h"
|
2005-07-15 15:13:19 +00:00
|
|
|
|
2005-06-30 21:28:35 +00:00
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
2005-09-30 22:58:09 +00:00
|
|
|
|
|
|
|
struct mca_btl_openib_frag_t;
|
|
|
|
|
2005-06-30 21:28:35 +00:00
|
|
|
/**
|
|
|
|
* State of IB endpoint connection.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
/* Defines the state in which this BTL instance
|
|
|
|
* has started the process of connection */
|
|
|
|
MCA_BTL_IB_CONNECTING,
|
|
|
|
|
|
|
|
/* Waiting for ack from endpoint */
|
|
|
|
MCA_BTL_IB_CONNECT_ACK,
|
2005-09-30 22:58:09 +00:00
|
|
|
|
|
|
|
/*Waiting for final connection ACK from endpoint */
|
|
|
|
MCA_BTL_IB_WAITING_ACK,
|
2005-06-30 21:28:35 +00:00
|
|
|
|
|
|
|
/* Connected ... both sender & receiver have
|
|
|
|
* buffers associated with this connection */
|
|
|
|
MCA_BTL_IB_CONNECTED,
|
|
|
|
|
|
|
|
/* Connection is closed, there are no resources
|
|
|
|
* associated with this */
|
|
|
|
MCA_BTL_IB_CLOSED,
|
|
|
|
|
|
|
|
/* Maximum number of retries have been used.
|
|
|
|
* Report failure on send to upper layer */
|
|
|
|
MCA_BTL_IB_FAILED
|
|
|
|
} mca_btl_openib_endpoint_state_t;
|
|
|
|
|
2005-09-30 22:58:09 +00:00
|
|
|
struct mca_btl_openib_rem_info_t {
|
|
|
|
|
2005-11-10 20:15:02 +00:00
|
|
|
uint32_t rem_qp_num_hp;
|
|
|
|
uint32_t rem_qp_num_lp;
|
2005-09-30 22:58:09 +00:00
|
|
|
/* Remote QP number (Low and High priority) */
|
|
|
|
|
|
|
|
uint16_t rem_lid;
|
|
|
|
/* Local identifier of the remote process */
|
|
|
|
|
|
|
|
|
2005-11-10 20:15:02 +00:00
|
|
|
uint32_t rem_psn_hp;
|
|
|
|
uint32_t rem_psn_lp;
|
2005-09-30 22:58:09 +00:00
|
|
|
/* Remote processes port sequence number (Low and High) */
|
|
|
|
|
2007-01-12 22:42:20 +00:00
|
|
|
uint64_t rem_subnet_id;
|
|
|
|
/* subnet id of remote process */
|
2005-09-30 22:58:09 +00:00
|
|
|
|
Bring over all the work from the /tmp/ib-hw-detect branch. In
addition to my design and testing, it was conceptually approved by
Gil, Gleb, Pasha, Brad, and Galen. Functionally [probably somewhat
lightly] tested by Galen. We may still have to shake out some bugs
during the next few months, but it seems to be working for all the
cases that I can throw at it.
Here's a summary of the changes from that branch:
* Move MCA parameter registration to a new file (btl_openib_mca.c):
* Properly check the retun status of registering MCA params
* Check for valid values of MCA parameters
* Make help strings better
* Otherwise, the only default value of an MCA param that was
changed was max_btls; it went from 4 to -1 (meaning: use all
available)
* Properly prototyped internal functions in _component.c
* Made a bunch of functions static that didn't need to be public
* Renamed to remove "mca_" prefix from static functions
* Call new MCA param registration function
* Call new INI file read/lookup/finalize functions
* Updated a bunch of macros to be "BTL_" instead of "ORTE_"
* Be a little more consistent with return values
* Handle -1 for the max_btls MCA param
* Fixed a free() that should have been an OBJ_RELEASE()
* Some re-indenting
* Added INI-file parsing
* New flex file: btl_openib_ini.l
* New default HCA params .ini file (probably to be expanded over
time by other HCA vendors)
* Added more show_help messages for parsing problems
* Read in INI files and cache the values for later lookup
* When component opens an HCA, lookup to see if any corresponding
values were found in the INI files (ID'ed by the HCA vendor_id
and vendor_part_id)
* Added btl_openib_verbose MCA param that shows what the INI-file
stuff does (e.g., shows which MTU your HCA ends up using)
* Added btl_openib_hca_param_files as a colon-delimited list of INI
files to check for values during startup (in order,
left-to-right, just like the MCA base directory param).
* MTU is currently the only value supported in this framework.
* It is not a fatal error if we don't find params for the HCA in
the INI file(s). Instead, just print a warning. New MCA param
btl_openib_warn_no_hca_params_found can be used to disable
printing the warning.
* Add MTU to peer negotiation when making a connection
* Exchange maximum MTU; select the lesser of the two
This commit was SVN r11182.
2006-08-14 19:30:37 +00:00
|
|
|
/* MTU of remote process */
|
|
|
|
uint32_t rem_mtu;
|
2006-09-05 16:04:04 +00:00
|
|
|
|
|
|
|
/* index of remote endpoint in endpoint array */
|
|
|
|
uint32_t rem_index;
|
2005-09-30 22:58:09 +00:00
|
|
|
};
|
|
|
|
typedef struct mca_btl_openib_rem_info_t mca_btl_openib_rem_info_t;
|
|
|
|
|
|
|
|
|
|
|
|
|
2005-06-30 21:28:35 +00:00
|
|
|
/**
|
|
|
|
* An abstraction that represents a connection to a endpoint process.
|
|
|
|
* An instance of mca_btl_base_endpoint_t is associated w/ each process
|
|
|
|
* and BTL pair at startup. However, connections to the endpoint
|
|
|
|
* are established dynamically on an as-needed basis:
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct mca_btl_base_endpoint_t {
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_item_t super;
|
2005-06-30 21:28:35 +00:00
|
|
|
|
|
|
|
struct mca_btl_openib_module_t* endpoint_btl;
|
|
|
|
/**< BTL instance that created this connection */
|
|
|
|
|
|
|
|
struct mca_btl_openib_proc_t* endpoint_proc;
|
|
|
|
/**< proc structure corresponding to endpoint */
|
|
|
|
|
|
|
|
mca_btl_openib_endpoint_state_t endpoint_state;
|
|
|
|
/**< current state of the connection */
|
|
|
|
|
|
|
|
size_t endpoint_retries;
|
|
|
|
/**< number of connection retries attempted */
|
|
|
|
|
|
|
|
double endpoint_tstamp;
|
|
|
|
/**< timestamp of when the first connection was attempted */
|
|
|
|
|
2005-10-21 02:21:45 +00:00
|
|
|
opal_mutex_t endpoint_lock;
|
2005-06-30 21:28:35 +00:00
|
|
|
/**< lock for concurrent access to endpoint state */
|
2005-10-21 02:21:45 +00:00
|
|
|
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_t pending_send_frags;
|
2005-07-19 21:04:22 +00:00
|
|
|
/**< list of pending send frags for this endpotint */
|
2005-07-12 13:38:54 +00:00
|
|
|
|
2006-09-05 16:00:18 +00:00
|
|
|
opal_list_t pending_frags[2]; /**< list of pending frags */
|
2006-09-28 11:41:45 +00:00
|
|
|
opal_list_t pending_get_frags; /**< list of pending rget ops */
|
|
|
|
opal_list_t pending_put_frags; /**< list of pending rput ops */
|
2005-10-21 02:21:45 +00:00
|
|
|
|
2005-09-30 22:58:09 +00:00
|
|
|
mca_btl_openib_rem_info_t rem_info;
|
2005-07-12 13:38:54 +00:00
|
|
|
|
2005-11-10 20:15:02 +00:00
|
|
|
uint32_t lcl_psn_hp;
|
|
|
|
uint32_t lcl_psn_lp;
|
2005-07-12 13:38:54 +00:00
|
|
|
/* Local processes port sequence number (Low and High) */
|
|
|
|
|
2006-09-07 13:05:41 +00:00
|
|
|
struct ibv_qp* lcl_qp[2]; /* Local QP (Low and High) */
|
2005-07-12 13:38:54 +00:00
|
|
|
|
2005-11-10 20:15:02 +00:00
|
|
|
struct ibv_qp_attr* lcl_qp_attr_hp;
|
|
|
|
struct ibv_qp_attr* lcl_qp_attr_lp;
|
2005-07-12 13:38:54 +00:00
|
|
|
/* Local QP attributes (Low and High) */
|
2005-11-10 20:15:02 +00:00
|
|
|
|
2006-09-05 16:00:18 +00:00
|
|
|
int32_t sd_tokens[2]; /**< number of send tokens */
|
2005-11-10 20:15:02 +00:00
|
|
|
int32_t get_tokens; /**< number of available get tokens */
|
|
|
|
|
2006-09-07 13:05:41 +00:00
|
|
|
int32_t rd_posted[2]; /**< number of descriptors posted to the nic*/
|
|
|
|
int32_t rd_credits[2]; /**< number of credits to return to peer */
|
|
|
|
int32_t sd_credits[2]; /**< number of send wqe entries being used to return credits */
|
2006-09-05 16:00:18 +00:00
|
|
|
int32_t sd_wqe[2]; /**< number of available send wqe entries */
|
2006-01-12 23:42:44 +00:00
|
|
|
|
2007-01-12 22:42:20 +00:00
|
|
|
uint64_t subnet_id; /**< subnet id of this endpoint*/
|
2006-03-26 08:30:50 +00:00
|
|
|
|
2006-09-05 09:16:22 +00:00
|
|
|
int32_t eager_recv_count; /**< number of eager received */
|
2006-03-26 08:30:50 +00:00
|
|
|
mca_btl_openib_eager_rdma_remote_t eager_rdma_remote;
|
|
|
|
/**< info about remote RDMA buffer */
|
|
|
|
mca_btl_openib_eager_rdma_local_t eager_rdma_local;
|
|
|
|
/**< info about local RDMA buffer */
|
2006-09-05 16:04:04 +00:00
|
|
|
uint32_t index; /**< index of the endpoint in endpoints array */
|
2006-09-12 09:17:59 +00:00
|
|
|
struct mca_btl_openib_frag_t *credit_frag[2]; /**< frags for sending explicit high priority credits */
|
2007-01-12 23:14:45 +00:00
|
|
|
bool nbo; /**< does the endpoint require network byte ordering? */
|
|
|
|
bool use_eager_rdma; /**< use eager rdma for this peer? */
|
2005-06-30 21:28:35 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
|
|
|
typedef mca_btl_base_endpoint_t mca_btl_openib_endpoint_t;
|
|
|
|
|
2006-08-24 16:38:08 +00:00
|
|
|
OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
|
|
|
|
|
2005-06-30 21:28:35 +00:00
|
|
|
int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint, struct mca_btl_openib_frag_t* frag);
|
|
|
|
int mca_btl_openib_endpoint_connect(mca_btl_base_endpoint_t*);
|
|
|
|
void mca_btl_openib_post_recv(void);
|
2006-09-12 09:17:59 +00:00
|
|
|
void mca_btl_openib_endpoint_send_credits(mca_btl_base_endpoint_t*, const int);
|
2006-03-26 08:30:50 +00:00
|
|
|
void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*);
|
2005-06-30 21:28:35 +00:00
|
|
|
|
2006-09-07 13:05:41 +00:00
|
|
|
static inline int btl_openib_endpoint_post_rr(mca_btl_base_endpoint_t *endpoint,
|
|
|
|
const int additional, const int prio)
|
|
|
|
{
|
|
|
|
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
|
|
|
if(endpoint->rd_posted[prio] <=
|
|
|
|
mca_btl_openib_component.rd_low + additional &&
|
|
|
|
endpoint->rd_posted[prio] < openib_btl->rd_num) {
|
|
|
|
int rc;
|
|
|
|
int32_t i, num_post = openib_btl->rd_num - endpoint->rd_posted[prio];
|
|
|
|
struct ibv_recv_wr* bad_wr;
|
|
|
|
ompi_free_list_t *free_list;
|
|
|
|
|
|
|
|
if(BTL_OPENIB_HP_QP == prio)
|
|
|
|
free_list = &openib_btl->recv_free_eager;
|
|
|
|
else
|
|
|
|
free_list = &openib_btl->recv_free_max;
|
|
|
|
|
|
|
|
for(i = 0; i < num_post; i++) {
|
|
|
|
ompi_free_list_item_t* item;
|
|
|
|
mca_btl_openib_frag_t* frag;
|
|
|
|
OMPI_FREE_LIST_WAIT(free_list, item, rc);
|
|
|
|
frag = (mca_btl_openib_frag_t*)item;
|
|
|
|
frag->endpoint = endpoint;
|
|
|
|
if(ibv_post_recv(endpoint->lcl_qp[prio], &frag->wr_desc.rd_desc,
|
|
|
|
&bad_wr)) {
|
|
|
|
BTL_ERROR(("error posting receive errno says %s\n",
|
|
|
|
strerror(errno)));
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
OPAL_THREAD_ADD32(&endpoint->rd_posted[prio], num_post);
|
|
|
|
OPAL_THREAD_ADD32(&endpoint->rd_credits[prio], num_post);
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
|
2006-09-07 13:31:50 +00:00
|
|
|
return OMPI_SUCCESS;
|
2005-07-20 15:17:18 +00:00
|
|
|
}
|
|
|
|
|
2006-09-12 09:17:59 +00:00
|
|
|
static inline int btl_openib_check_send_credits(
|
|
|
|
mca_btl_openib_endpoint_t *endpoint, const int prio)
|
|
|
|
{
|
|
|
|
if(!mca_btl_openib_component.use_srq &&
|
|
|
|
endpoint->rd_credits[prio] >= mca_btl_openib_component.rd_win)
|
|
|
|
return OPAL_THREAD_ADD32(&endpoint->sd_credits[prio], 1) == 1;
|
|
|
|
|
|
|
|
if(BTL_OPENIB_LP_QP == prio) /* nothing more for low prio QP */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* for high prio check eager RDMA credits */
|
|
|
|
if(endpoint->eager_rdma_local.credits >= mca_btl_openib_component.rd_win)
|
|
|
|
return OPAL_THREAD_ADD32(&endpoint->sd_credits[prio], 1) == 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-06-30 21:28:35 +00:00
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|