1
1
openmpi/orte/mca/oob/ud/oob_ud_qp.c

298 строки
9.2 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "oob_ud_qp.h"
#include "oob_ud.h"
static void mca_oob_ud_qp_constructor (mca_oob_ud_qp_t *qp);
static void mca_oob_ud_qp_destructor (mca_oob_ud_qp_t *qp);
OBJ_CLASS_INSTANCE(mca_oob_ud_qp_t, opal_free_list_item_t,
mca_oob_ud_qp_constructor,
mca_oob_ud_qp_destructor);
static inline int mca_oob_ud_qp_process_send_completions (mca_oob_ud_qp_t *qp,
int num_completions);
#define MCA_OOB_UD_CLEAR_CQ(cq) \
do { \
if (NULL == (cq)->channel) { \
struct ibv_wc wc; \
while (ibv_poll_cq ((cq), 1, &wc)); \
} \
} while (0); \
int mca_oob_ud_qp_init (mca_oob_ud_qp_t *qp, struct mca_oob_ud_port_t *port,
struct ibv_comp_channel *recv_channel,
struct ibv_comp_channel *send_channel, bool onecq)
{
struct ibv_qp_init_attr init_attr;
OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:qp_init creating UD QP on port %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), port->port_num));
/* create a UD queue pair */
memset(&init_attr, 0, sizeof(init_attr));
init_attr.qp_type = IBV_QPT_UD;
qp->ib_recv_cq = ibv_create_cq (port->device->ib_context, 16384,
port, recv_channel, 0);
if (false == onecq) {
qp->ib_send_cq = ibv_create_cq (port->device->ib_context, 16384,
port, send_channel, 0);
} else {
qp->ib_send_cq = qp->ib_recv_cq;
}
init_attr.send_cq = qp->ib_send_cq;
init_attr.recv_cq = qp->ib_recv_cq;
init_attr.cap.max_send_sge = 32;
init_attr.cap.max_recv_sge = 32; /* GRH, data */
init_attr.cap.max_inline_data = 0; /* don't use inline data for now */
/* NTH: fix these */
init_attr.cap.max_recv_wr = 4096;
init_attr.cap.max_send_wr = 4096;
qp->ib_qp = ibv_create_qp (port->device->ib_pd, &init_attr);
if (NULL == qp->ib_qp) {
OPAL_OUTPUT_VERBOSE((1, mca_oob_base_output,
"%s oob:ud:qp_init could not create queue pair. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
return ORTE_ERROR;
}
/* end: create the UD queue pair */
qp->port = port;
return ORTE_SUCCESS;
}
int mca_oob_ud_qp_to_reset (mca_oob_ud_qp_t *qp)
{
struct ibv_qp_attr attr;
/* move the QP into the ERR state */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_ERR;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, IBV_QP_STATE)) {
opal_output(0, "%s oob:ud:qp_to_reset error modifying qp to ERR. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
/* poll thread/event will clear failed work requests */
MCA_OOB_UD_CLEAR_CQ(qp->ib_send_cq);
MCA_OOB_UD_CLEAR_CQ(qp->ib_recv_cq);
/* move the QP into the RESET state */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RESET;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, IBV_QP_STATE)) {
opal_output(0, "%s oob:ud:qp_to_reset error modifying qp to RESET. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
int mca_oob_ud_qp_to_rts (mca_oob_ud_qp_t *qp)
{
struct mca_oob_ud_port_t *port = qp->port;
int attr_mask;
struct ibv_qp_attr attr;
/* move the QP into the INIT state */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_INIT;
attr.pkey_index = 0; /* NTH: might need to modify the pkey index later */
attr.port_num = port->port_num;
attr.qkey = 0;
attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, attr_mask)) {
opal_output(0, "%s oob:ud:qp_to_reset error modifying qp to INIT. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
/* Move QP to RTR */
attr.qp_state = IBV_QPS_RTR;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, IBV_QP_STATE)) {
opal_output(0, "%s oob:ud:qp_to_reset error modifying qp to RTR. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
/* Setup attributes */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RTS;
attr.sq_psn = 0;
attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, attr_mask)) {
opal_output(0, "%s oob:ud:qp_to_reset error modifying qp to RTS. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
/* purge all work requests on a qp */
int mca_oob_ud_qp_purge (mca_oob_ud_qp_t *qp)
{
int rc;
rc = mca_oob_ud_qp_to_reset (qp);
if (ORTE_SUCCESS != rc) {
return rc;
}
return mca_oob_ud_qp_to_rts (qp);
}
static void mca_oob_ud_qp_constructor (mca_oob_ud_qp_t *qp)
{
memset ((char *)qp + sizeof(qp->super), 0, sizeof (*qp) - sizeof (qp->super));
}
static void mca_oob_ud_qp_destructor (mca_oob_ud_qp_t *qp)
{
int rc;
if (NULL != qp->ib_qp) {
/* clear qp and move to reset */
(void) mca_oob_ud_qp_to_reset (qp);
/* destroy qp */
rc = ibv_destroy_qp (qp->ib_qp);
if (0 != rc) {
opal_output (0, "IBV_DESTROY_QP FAILED! rc = %d, errno = %d", rc, errno);
}
}
if (NULL != qp->ib_send_cq) {
(void) ibv_destroy_cq (qp->ib_send_cq);
}
if (NULL != qp->ib_recv_cq && qp->ib_recv_cq != qp->ib_send_cq) {
(void) ibv_destroy_cq (qp->ib_recv_cq);
}
}
static inline int mca_oob_ud_qp_process_send_completions (mca_oob_ud_qp_t *qp,
int num_completions)
{
struct ibv_wc wc[1];
int count, rc, ret, i;
OPAL_OUTPUT_VERBOSE((20, mca_oob_base_output, "%s oob:ud:qp_process_send_completions polling "
"for %d completions", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
num_completions));
rc = ORTE_SUCCESS;
for (count = 0 ; count < num_completions ; ) {
ret = ibv_poll_cq (qp->ib_send_cq, 1, wc);
if (ret < 0) {
opal_output (0, "%s oob:ud:qp_process_send_completions error polling for completions. "
"errno = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
for (i = 0 ; i < ret ; ++i) {
if (IBV_WC_SUCCESS != wc[i].status) {
opal_output (0, "wc status = %d", wc[i].status);
rc = ORTE_ERROR;
}
}
count += ret;
}
return rc;
}
int mca_oob_ud_qp_post_send (mca_oob_ud_qp_t *qp, struct ibv_send_wr *wr,
int num_completions) {
struct ibv_send_wr *bad_wr;
int rc;
rc = ibv_post_send (qp->ib_qp, wr, &bad_wr);
if (0 != rc) {
opal_output (0, "%s oob:ud:qp_post_send ibv_post_send failed. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
return mca_oob_ud_qp_process_send_completions (qp, num_completions);
}
int mca_oob_ud_qp_post_recv (mca_oob_ud_qp_t *qp, struct ibv_recv_wr *wr) {
struct ibv_recv_wr *bad_wr;
int rc;
rc = ibv_post_recv (qp->ib_qp, wr, &bad_wr);
if (0 != rc) {
opal_output (0, "%s oob:ud:qp_post_recv ibv_post_send failed. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
int mca_oob_ud_qp_data_aquire (struct mca_oob_ud_port_t *port, mca_oob_ud_qp_t **qp_ptr) {
int rc;
opal_free_list_item_t *item;
do {
OPAL_FREE_LIST_GET(&port->data_qps, item, rc);
if (OPAL_SUCCESS != rc) {
OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:qp_data_aquire error "
"allocating new data qp. error = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc));
break;
}
*qp_ptr = (mca_oob_ud_qp_t *) item;
if (NULL == (*qp_ptr)->ib_qp) {
rc = mca_oob_ud_qp_init (*qp_ptr, port, NULL, NULL, true);
if (ORTE_SUCCESS != rc) {
break;
}
rc = mca_oob_ud_qp_to_rts (*qp_ptr);
}
} while (0);
return rc;
}
int mca_oob_ud_qp_data_release (mca_oob_ud_qp_t *qp) {
int rc;
rc = mca_oob_ud_qp_purge (qp);
if (ORTE_SUCCESS != rc) {
return rc;
}
OPAL_FREE_LIST_RETURN(&qp->port->data_qps, qp);
return ORTE_SUCCESS;
}