1
1
openmpi/orte/mca/oob/ud/oob_ud_qp.c
Igor Ivanov 34d861dfe9 orte/oob: Fix issue #1301
Signed-off-by: Igor Ivanov <Igor.Ivanov@itseez.com>
2016-01-20 12:08:00 +02:00

322 строки
11 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "oob_ud_component.h"
#include "oob_ud_qp.h"
#include "oob_ud.h"
#include "orte/util/show_help.h"
static void mca_oob_ud_qp_constructor (mca_oob_ud_qp_t *qp);
static void mca_oob_ud_qp_destructor (mca_oob_ud_qp_t *qp);
OBJ_CLASS_INSTANCE(mca_oob_ud_qp_t, opal_free_list_item_t,
mca_oob_ud_qp_constructor,
mca_oob_ud_qp_destructor);
static inline int mca_oob_ud_qp_process_send_completions (mca_oob_ud_qp_t *qp,
int num_completions);
#define MCA_OOB_UD_CLEAR_CQ(cq) \
do { \
if (NULL == (cq)->channel) { \
struct ibv_wc wc; \
while (ibv_poll_cq ((cq), 1, &wc)); \
} \
} while (0); \
int mca_oob_ud_qp_init (mca_oob_ud_qp_t *qp, struct mca_oob_ud_port_t *port,
struct ibv_comp_channel *recv_channel,
struct ibv_comp_channel *send_channel, bool onecq)
{
struct ibv_qp_init_attr init_attr;
int max_cqe = min(port->device->attr.max_cqe, 16384);
opal_output_verbose(10, orte_oob_base_framework.framework_output,
"%s oob:ud:qp_init creating UD QP on port %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), port->port_num);
/* create a UD queue pair */
memset(&init_attr, 0, sizeof(init_attr));
init_attr.qp_type = IBV_QPT_UD;
qp->ib_recv_cq = ibv_create_cq (port->device->ib_context, max_cqe,
port, recv_channel, 0);
if (NULL == qp->ib_recv_cq) {
orte_show_help("help-oob-ud.txt", "create-cq-failed", true,
orte_process_info.nodename, max_cqe, strerror(errno));
return ORTE_ERROR;
}
if (false == onecq) {
qp->ib_send_cq = ibv_create_cq (port->device->ib_context, max_cqe,
port, send_channel, 0);
if (NULL == qp->ib_send_cq) {
orte_show_help("help-oob-ud.txt", "create-cq-failed", true,
orte_process_info.nodename, max_cqe, strerror(errno));
return ORTE_ERROR;
}
} else {
qp->ib_send_cq = qp->ib_recv_cq;
}
init_attr.send_cq = qp->ib_send_cq;
init_attr.recv_cq = qp->ib_recv_cq;
mca_oob_ud_device_t *device = (mca_oob_ud_device_t *) opal_list_get_first (&mca_oob_ud_component.ud_devices);
opal_output_verbose(80, orte_oob_base_framework.framework_output,
"%s oob:ud:qp_init create queue pair for device: device->attr.max_sge = %d, device->attr.max_qp_wr = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), device->attr.max_sge, device->attr.max_qp_wr);
init_attr.cap.max_send_sge = mca_oob_ud_component.ud_qp_max_send_sge;
init_attr.cap.max_recv_sge = mca_oob_ud_component.ud_qp_max_recv_sge; /* GRH, data */
init_attr.cap.max_inline_data = mca_oob_ud_component.ud_qp_max_inline_data;
init_attr.cap.max_recv_wr = min(mca_oob_ud_component.ud_qp_max_recv_wr, device->attr.max_qp_wr);
init_attr.cap.max_send_wr = min(mca_oob_ud_component.ud_qp_max_send_wr, device->attr.max_qp_wr);
qp->ib_qp = ibv_create_qp (port->device->ib_pd, &init_attr);
if (NULL == qp->ib_qp) {
orte_show_help("help-oob-ud.txt", "create-qp-failed", true,
orte_process_info.nodename, init_attr.cap.max_send_sge, init_attr.cap.max_recv_sge,
init_attr.cap.max_send_wr, init_attr.cap.max_recv_wr, init_attr.cap.max_inline_data,
strerror(errno));
return ORTE_ERROR;
}
/* end: create the UD queue pair */
qp->port = port;
return ORTE_SUCCESS;
}
int mca_oob_ud_qp_to_reset (mca_oob_ud_qp_t *qp)
{
struct ibv_qp_attr attr;
/* move the QP into the ERR state */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_ERR;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, IBV_QP_STATE)) {
orte_show_help("help-oob-ud.txt", "modify-qp-failed", true,
orte_process_info.nodename, IBV_QP_STATE, strerror(errno));
return ORTE_ERROR;
}
/* poll thread/event will clear failed work requests */
MCA_OOB_UD_CLEAR_CQ(qp->ib_send_cq);
MCA_OOB_UD_CLEAR_CQ(qp->ib_recv_cq);
/* move the QP into the RESET state */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RESET;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, IBV_QP_STATE)) {
orte_show_help("help-oob-ud.txt", "modify-qp-failed", true,
orte_process_info.nodename, IBV_QP_STATE, strerror(errno));
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
int mca_oob_ud_qp_to_rts (mca_oob_ud_qp_t *qp)
{
struct mca_oob_ud_port_t *port = qp->port;
int attr_mask;
struct ibv_qp_attr attr;
/* move the QP into the INIT state */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_INIT;
attr.pkey_index = 0; /* NTH: might need to modify the pkey index later */
attr.port_num = port->port_num;
attr.qkey = 0;
attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, attr_mask)) {
orte_show_help("help-oob-ud.txt", "modify-qp-failed", true,
orte_process_info.nodename, attr_mask, strerror(errno));
return ORTE_ERROR;
}
/* Move QP to RTR */
attr.qp_state = IBV_QPS_RTR;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, IBV_QP_STATE)) {
orte_show_help("help-oob-ud.txt", "modify-qp-failed", true,
orte_process_info.nodename, attr_mask, strerror(errno));
return ORTE_ERROR;
}
/* Setup attributes */
memset(&attr, 0, sizeof(attr));
attr.qp_state = IBV_QPS_RTS;
attr.sq_psn = 0;
attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
if (0 != ibv_modify_qp(qp->ib_qp, &attr, attr_mask)) {
orte_show_help("help-oob-ud.txt", "modify-qp-failed", true,
orte_process_info.nodename, attr_mask, strerror(errno));
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
/* purge all work requests on a qp */
int mca_oob_ud_qp_purge (mca_oob_ud_qp_t *qp)
{
int rc;
rc = mca_oob_ud_qp_to_reset (qp);
if (ORTE_SUCCESS != rc) {
return rc;
}
return mca_oob_ud_qp_to_rts (qp);
}
static void mca_oob_ud_qp_constructor (mca_oob_ud_qp_t *qp)
{
memset ((char *)qp + sizeof(qp->super), 0, sizeof (*qp) - sizeof (qp->super));
}
static void mca_oob_ud_qp_destructor (mca_oob_ud_qp_t *qp)
{
int rc;
if (NULL != qp->ib_qp) {
/* clear qp and move to reset */
(void) mca_oob_ud_qp_to_reset (qp);
/* destroy qp */
rc = ibv_destroy_qp (qp->ib_qp);
if (0 != rc) {
orte_show_help("help-oob-ud.txt", "destroy-qp-failed", true,
orte_process_info.nodename, strerror(errno));
}
}
if (NULL != qp->ib_send_cq) {
(void) ibv_destroy_cq (qp->ib_send_cq);
}
if (NULL != qp->ib_recv_cq && qp->ib_recv_cq != qp->ib_send_cq) {
(void) ibv_destroy_cq (qp->ib_recv_cq);
}
}
static inline int mca_oob_ud_qp_process_send_completions (mca_oob_ud_qp_t *qp,
int num_completions)
{
struct ibv_wc wc[1];
int count, rc, ret, i;
opal_output_verbose(10, orte_oob_base_framework.framework_output,
"%s oob:ud:qp_process_send_completions polling for %d completions",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
num_completions);
rc = ORTE_SUCCESS;
for (count = 0 ; count < num_completions ; ) {
ret = ibv_poll_cq (qp->ib_send_cq, 1, wc);
if (ret < 0) {
orte_show_help("help-oob-ud.txt", "poll-cq-failed", true,
orte_process_info.nodename, 1, strerror(errno));
return ORTE_ERROR;
}
for (i = 0 ; i < ret ; ++i) {
if (IBV_WC_SUCCESS != wc[i].status) {
orte_show_help("help-oob-ud.txt", "poll-cq-failed-wc", true,
orte_process_info.nodename, 1, i, wc[i].status);
rc = ORTE_ERROR;
}
}
count += ret;
}
return rc;
}
int mca_oob_ud_qp_post_send (mca_oob_ud_qp_t *qp, struct ibv_send_wr *wr,
int num_completions) {
struct ibv_send_wr *bad_wr;
int rc;
rc = ibv_post_send (qp->ib_qp, wr, &bad_wr);
if (0 != rc) {
orte_show_help("help-oob-ud.txt", "post-send-failed", true,
orte_process_info.nodename, strerror(errno));
return ORTE_ERROR;
}
return mca_oob_ud_qp_process_send_completions (qp, num_completions);
}
int mca_oob_ud_qp_post_recv (mca_oob_ud_qp_t *qp, struct ibv_recv_wr *wr) {
struct ibv_recv_wr *bad_wr;
int rc;
rc = ibv_post_recv (qp->ib_qp, wr, &bad_wr);
if (0 != rc) {
orte_show_help("help-oob-ud.txt", "post-recv-failed", true,
orte_process_info.nodename, strerror(errno));
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
int mca_oob_ud_qp_data_aquire (struct mca_oob_ud_port_t *port, mca_oob_ud_qp_t **qp_ptr) {
int rc = ORTE_SUCCESS;
opal_free_list_item_t *item;
do {
item = opal_free_list_get_st (&port->data_qps);
if (NULL == item) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:qp_data_aquire error allocating new data qp. error = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc);
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
break;
}
*qp_ptr = (mca_oob_ud_qp_t *) item;
if (NULL == (*qp_ptr)->ib_qp) {
rc = mca_oob_ud_qp_init (*qp_ptr, port, NULL, NULL, true);
if (ORTE_SUCCESS != rc) {
break;
}
rc = mca_oob_ud_qp_to_rts (*qp_ptr);
}
} while (0);
return rc;
}
int mca_oob_ud_qp_data_release (mca_oob_ud_qp_t *qp) {
int rc;
rc = mca_oob_ud_qp_purge (qp);
if (ORTE_SUCCESS != rc) {
return rc;
}
opal_free_list_return_st (&qp->port->data_qps, &qp->super);
return ORTE_SUCCESS;
}