openmpi/ompi/mca/bcol/iboffload/bcol_iboffload_task.h

/*
 * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
 * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
 * Copyright (c) 2013      The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#ifndef MCA_BCOL_IBOFFLOAD_TASK_H
#define MCA_BCOL_IBOFFLOAD_TASK_H

#include "ompi_config.h"

#include <infiniband/verbs.h>
#include <infiniband/mverbs.h>
#include <infiniband/mqe.h>

#include "bcol_iboffload.h"
#include "bcol_iboffload_frag.h"
#include "bcol_iboffload_collreq.h"
#include "bcol_iboffload_endpoint.h"
#include "bcol_iboffload_collfrag.h"

#define SENDWR(task)  ((task)->element.post.send_wr)

BEGIN_C_DECLS

/* the mca_bcol_ibv_mwr_task_t name was replaced with mca_bcol_iboffload_task_t */
struct mca_bcol_iboffload_task_t {
    ompi_free_list_item_t super;

    /* pointer to the memory descriptor associated with the task */
    mca_bcol_iboffload_frag_t *frag;

    /* pointer to the bcol descriptor,
     * we need it for send task only becasue we complete them in async maner
     */
    mca_bcol_iboffload_collfrag_t *collfrag;

    /* task to be posted */
    struct mqe_task element;

    /* allocate ibv_sge structs array - in a CALC case
     * for example it will have two entries.
     */
    struct ibv_sge *sg_entries;

    /* sg_entries array length */
    int sg_entries_num;

    /* Each task is a member of some free list,
       if the pointer is NULL => we assume the task
       is a member of the common task list (tasks_free) */
    ompi_free_list_t *task_list;

    /* Pointer to the next task */
    struct mca_bcol_iboffload_task_t *next_task;

    /* pasha - it is crappy work around for driver interface
     * the send_wr and recv_wr should be part of mqe_task and not pointers !
     */
    union {
        struct ibv_m_send_wr  send_wr;
        struct ibv_recv_wr    recv_wr;
    } wr;

    /* If we'll decide to post a task to a different qp */
    struct mqe_qp_entry task_mqe_qp_entry;

    /* Pointer to endpoint for this task */
    mca_bcol_iboffload_endpoint_t *endpoint;
};
typedef struct mca_bcol_iboffload_task_t mca_bcol_iboffload_task_t;
OBJ_CLASS_DECLARATION(mca_bcol_iboffload_task_t);


/* calc_tasks_free free list init function */
void
mca_bcol_iboffload_calc_task_init(ompi_free_list_item_t* item, void* ctx);

/* iovec_tasks_free free list init function */
void
mca_bcol_iboffload_iovec_task_init(ompi_free_list_item_t* item, void* ctx);

static inline __opal_attribute_always_inline__ void
        mca_bcol_iboffload_return_frag_tolist(
                        mca_bcol_iboffload_frag_t *frag,
                        ompi_free_list_t *list)
{
    if (NULL != frag) {
        mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
        assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != frag->type);

        if (MCA_BCOL_IBOFFLOAD_DUMMY_OWNER != frag->type &&
                                      0 == frag->ref_counter) {
            if (MCA_BCOL_IBOFFLOAD_BCOL_OWNER == frag->type) {
                OMPI_FREE_LIST_RETURN_MT((&(list[frag->qp_index])),
                        (ompi_free_list_item_t*) frag);
            } else if (MCA_BCOL_IBOFFLOAD_ML_OWNER == frag->type) {
                OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)),
                        (ompi_free_list_item_t*) frag);
            }
        }
    }
}

static inline __opal_attribute_always_inline__ void
        mca_bcol_iboffload_return_recv_frags_toendpoint(
                        mca_bcol_iboffload_frag_t *frags,
                        mca_bcol_iboffload_endpoint_t *ep,
                        int qp_index)
{
    mca_bcol_iboffload_frag_t *recv_frag = frags;
    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;

    while (NULL != recv_frag) {
        assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != recv_frag->type);
        if (MCA_BCOL_IBOFFLOAD_ML_OWNER != recv_frag->type) {
            opal_list_prepend(&ep->qps[qp_index].preposted_frags,
                            (opal_list_item_t *) recv_frag);
        } else {
            OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)),
                (ompi_free_list_item_t*) recv_frag);
        }

        recv_frag = recv_frag->next;
    }
}

/* Wait task allocation and initialization */
static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
        mca_bcol_iboffload_get_wait_task(mca_bcol_iboffload_module_t *iboffload,
                                         uint32_t source, int num_waits,
                                         mca_bcol_iboffload_frag_t *frags,
                                         int qp_index, struct ibv_qp *qp)
{
    ompi_free_list_item_t *item;
    mca_bcol_iboffload_task_t *task;

    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[source];

    /* blocking allocation for send fragment */
    OMPI_FREE_LIST_GET_MT(&cm->tasks_free, item);
    if (OPAL_UNLIKELY(NULL == item)) {
        mca_bcol_iboffload_return_recv_frags_toendpoint(frags, endpoint, qp_index);
        return NULL;
    }

    task = (mca_bcol_iboffload_task_t *) item;
    /* set pointer to corresponding recv fragment */
    IBOFFLOAD_SET_FRAGS_ON_TASK(frags, task);

    task->next_task = NULL;
    task->endpoint = endpoint;

    /* set opcode */
    task->element.opcode = MQE_WR_CQE_WAIT;
    task->element.flags = 0; /* Here maybe ANY flag, anyway driver ignore it */
    /* set task id */
    task->element.wr_id = (uint64_t) (uintptr_t) task;
    /* set CQ */
    task->element.wait.cq = endpoint->qp_config.init_attr[qp_index].recv_cq;

    /* set number of tasks to task */
    task->element.wait.count = num_waits;
    /* set pointer to QP */

    if (NULL == qp) { /* NULL means use MQ's QP */
        task->element.wait.mqe_qp = NULL;
    } else { /* Post wait to the SQ of this QP */
        task->task_mqe_qp_entry.next = NULL;
        task->task_mqe_qp_entry.qp = qp;

        task->element.wait.mqe_qp = &task->task_mqe_qp_entry;
    }

    IBOFFLOAD_VERBOSE(10, ("Allocating task %p, cq: %p, num waits: %d, qp_index - %d, "
                           "destination %d for comm rank: %d.\n",
                           (void *) task, (void *) task->element.wait.cq,
                            task->element.wait.count, qp_index, source,
                            endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));
    return task;
}

static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
mca_bcol_iboffload_prepare_send_task(
        mca_bcol_iboffload_module_t *iboffload,
        mca_bcol_iboffload_endpoint_t *endpoint,
        int qp_index, ompi_free_list_t *task_list,
        mca_bcol_iboffload_collfrag_t *collfrag)
{
    ompi_free_list_item_t *item;
    mca_bcol_iboffload_task_t *task;

    IBOFFLOAD_VERBOSE(10, ("Destination rank - %d, QP index - %d, "
                           "for comm rank - %d\n", endpoint->index, qp_index,
                            endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));

    /* get item from free list */
    OMPI_FREE_LIST_GET_MT(task_list, item);
    if (OPAL_UNLIKELY(NULL == item)) {
        return NULL;
    }

    task = (mca_bcol_iboffload_task_t*) item;
    task->endpoint = endpoint;

    ++(collfrag->n_sends);
    task->collfrag = collfrag;

    task->next_task = NULL;
    task->element.wr_id = (uint64_t) (uintptr_t) task;

    task->element.post.qp = endpoint->qps[qp_index].qp->lcl_qp;

    task->element.opcode = MQE_WR_SEND;

    /* define send work request */
    SENDWR(task) = &(task->wr.send_wr);

    SENDWR(task)->next = NULL;

    SENDWR(task)->wr_id = (uint64_t) (uintptr_t) collfrag;
    IBOFFLOAD_VERBOSE(10, ("coll_frag - %p.\n", collfrag));

    /* Allways send IMM on sends ! */
    task->element.flags  = MQE_WR_FLAG_IMM_EXE;

    /* Always signal completion */
    SENDWR(task)->send_flags = IBV_SEND_SIGNALED;

    return task;
}

static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
mca_bcol_iboffload_get_send_task(
        mca_bcol_iboffload_module_t *iboffload,
        uint32_t destination, int qp_index,
        mca_bcol_iboffload_frag_t *frag,
        mca_bcol_iboffload_collfrag_t *collfrag,
        bool enable_inline)
{
    mca_bcol_iboffload_task_t *task;

    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination];

    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n",
                qp_index));

    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
                                                &cm->tasks_free,
                                                collfrag);

    if (OPAL_UNLIKELY(NULL == task)) {
        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
        return NULL;
    }

    /* no support for multiple frags */
    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);

    /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */
    if (0 == frag->sg_entry.length) {
        SENDWR(task)->imm_data = 0;
        SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;

        SENDWR(task)->wr.rdma.rkey = endpoint->remote_zero_rdma_addr.rkey;
        SENDWR(task)->wr.rdma.remote_addr = endpoint->remote_zero_rdma_addr.addr;
    } else {
        SENDWR(task)->opcode = IBV_WR_SEND;
    }

    /* single sge */
    SENDWR(task)->num_sge = 1;
    SENDWR(task)->sg_list = &(frag->sg_entry);

    /* Use inline send when it is possible */
    if (enable_inline &&
            frag->sg_entry.length < cm->max_inline_data) {
        IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length));
        SENDWR(task)->send_flags |= IBV_SEND_INLINE;
    }

    return task;
}

static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
mca_bcol_iboffload_get_send_vec_task(
        mca_bcol_iboffload_module_t *iboffload,
        uint32_t destination, int qp_index,
        size_t nitems,
        struct iovec *buff_iovec,
        uint32_t lkey,
        mca_bcol_iboffload_frag_t *frag,
        mca_bcol_iboffload_collfrag_t *collfrag,
        bool enable_inline)
{
    mca_bcol_iboffload_task_t *task;
    int i;

    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination];

    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n",
                qp_index));

    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
                                                &iboffload->iovec_tasks_free,
                                                collfrag);

    if (OPAL_UNLIKELY(NULL == task)) {
        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
        return NULL;
    }

    /* no support for multiple frags */
    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);

    /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */
    SENDWR(task)->opcode = IBV_WR_SEND;

    assert (task->sg_entries != NULL);

    for (i = 0; (size_t) i < nitems; ++i){
        task->sg_entries[i].length = buff_iovec[i].iov_len;
        task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base;
        task->sg_entries[i].lkey = lkey;
    }

    /* multiple sge */
    SENDWR(task)->num_sge = nitems;
    SENDWR(task)->sg_list = (task->sg_entries);

   /* Use inline send when it is possible */
    if (enable_inline &&
            frag->sg_entry.length < cm->max_inline_data) {
        IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length));
        SENDWR(task)->send_flags |= IBV_SEND_INLINE;
    }

    return task;
}
static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
    mca_bcol_iboffload_get_rdma_vec_task(
        uint32_t destination, size_t offset, size_t nitems,
        mca_bcol_iboffload_frag_t *frag,
        mca_bcol_iboffload_module_t *iboffload,
        struct iovec *buff_iovec, uint32_t lkey,
        mca_bcol_iboffload_collfrag_t *collfrag)
{
    int i;
    mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req;

    mca_bcol_iboffload_task_t *task;
    mca_bcol_iboffload_endpoint_t *endpoint =
                            iboffload->endpoints[destination];

    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint,
                                                coll_request->qp_index,
                                                &iboffload->iovec_tasks_free,
                                                collfrag);
    if (OPAL_UNLIKELY(NULL == task)) {
        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
        return NULL;
    }

    /* no support for multiple frags */
    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);

    SENDWR(task)->imm_data = 0;
    SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
    SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;

    SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t)
       ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);

    for (i = 0; (size_t) i < nitems; ++i){
        task->sg_entries[i].length = buff_iovec[i].iov_len;
        task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base;
        task->sg_entries[i].lkey = lkey;
    }

    /* single sge */
    SENDWR(task)->num_sge = nitems;
    SENDWR(task)->sg_list = (task->sg_entries);

    IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset));
    return task;
}

static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
    mca_bcol_iboffload_get_rdma_task(
        uint32_t destination, size_t offset,
        mca_bcol_iboffload_frag_t *frag,
        mca_bcol_iboffload_module_t *iboffload,
        mca_bcol_iboffload_collfrag_t *collfrag)
{
    mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req;

    mca_bcol_iboffload_task_t *task;
    mca_bcol_iboffload_endpoint_t *endpoint =
                            iboffload->endpoints[destination];

    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint,
                                                coll_request->qp_index,
                                                &cm->tasks_free, collfrag);
    if (OPAL_UNLIKELY(NULL == task)) {
        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
        return NULL;
    }

    /* no support for multiple frags */
    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);

    SENDWR(task)->imm_data = 0;
    SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
    SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
    /* Pasha: I really not happy with the way we calculate remote addresses.
       why we don't use rbuf + offset ?*/
    SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t)
       ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
    /* single sge */
    SENDWR(task)->num_sge = 1;
    SENDWR(task)->sg_list = &(frag->sg_entry);

    IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset));
    return task;
}

/* Pasha: hacking version of calc operation */
    static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
mca_bcol_iboffload_get_calc_task(mca_bcol_iboffload_module_t *iboffload,
        uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag,
        struct ibv_sge *l_operand, struct ibv_sge *r_operand,
        mca_bcol_iboffload_collreq_t *coll_request,
        bool enable_inline)
/* Some specifications for this function:
 *  1) We assume that the len of two operands (ibv_sge structs) is a same.
 *  2) Possibly we use the results (ibv_sge structs) from previous
 *     calc operations => maybe the frag pointer is NULL.
 */
{
    mca_bcol_iboffload_task_t *task;
    mca_bcol_iboffload_endpoint_t *endpoint =
                                     iboffload->endpoints[destination];

    mca_bcol_iboffload_collfrag_t *collfrag =
                                    (mca_bcol_iboffload_collfrag_t *)
                                     opal_list_get_last(&coll_request->work_requests);

    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
                                                &cm->calc_tasks_free, collfrag);
    if (OPAL_UNLIKELY(NULL == task)) {
        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
        return NULL;
    }

    if (NULL != frag) {
        IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
    } else {
        task->frag = NULL;
    }

    task->sg_entries[0] = *l_operand;
    task->sg_entries[1] = *r_operand;

    SENDWR(task)->num_sge = 2;
    SENDWR(task)->sg_list = task->sg_entries;

    SENDWR(task)->opcode = MCA_BCOL_IBOFFLOAD_SEND_CALC;
#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
    SENDWR(task)->wr.calc_send.data_type = coll_request->actual_ib_dtype;
    SENDWR(task)->wr.calc_send.calc_op = coll_request->actual_ib_op;
#else
    SENDWR(task)->wr.calc.data_type = coll_request->actual_ib_dtype;
    SENDWR(task)->wr.calc.calc_op = coll_request->actual_ib_op;
#endif

    return task;
}

static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
    mca_bcol_iboffload_get_rdma_calc_task(mca_bcol_iboffload_module_t *iboffload,
        uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag,
        struct ibv_sge *l_operand, struct ibv_sge *r_operand,
        mca_bcol_iboffload_collreq_t *coll_request,
        size_t offset)
/* Some specifications for this function:
 *  1) We assume that the len of two operands (ibv_sge structs) is a same.
 *  2) Possibly we use the results (ibv_sge structs) from previous
 *     calc operations => maybe the frag pointer is NULL.
 */
{
    mca_bcol_iboffload_task_t *task;
    mca_bcol_iboffload_endpoint_t *endpoint =
                                     iboffload->endpoints[destination];

    mca_bcol_iboffload_collfrag_t *collfrag =
                                    (mca_bcol_iboffload_collfrag_t *)
                                     opal_list_get_last(&coll_request->work_requests);

    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
                                                &cm->calc_tasks_free, collfrag);
    if (OPAL_UNLIKELY(NULL == task)) {
        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
        return NULL;
    }

    if (NULL != frag) {
        IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
    } else {
        task->frag = NULL;
    }

    task->sg_entries[0] = *l_operand;

    /* Hack - we don't really use it.
    task->sg_entries[1] = *r_operand;
    */
    /* We use only single entry
    SENDWR(task)->num_sge = 2;
    */
    SENDWR(task)->num_sge = 1;
    SENDWR(task)->sg_list = task->sg_entries;

#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
    SENDWR(task)->opcode = IBV_M_WR_CALC_RDMA_WRITE_WITH_IMM;
    SENDWR(task)->wr.calc_rdma.data_type = coll_request->actual_ib_dtype;
    SENDWR(task)->wr.calc_rdma.calc_op = coll_request->actual_ib_op;
    SENDWR(task)->wr.calc_rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
    SENDWR(task)->wr.calc_rdma.remote_addr = (uint64_t) (uintptr_t)
        ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
#else
    IBOFFLOAD_ERROR(("Fatal error: RDMA CALC was called, but the driver does not support this operation"));
    return NULL;
#endif

    return task;
}

static inline __opal_attribute_always_inline__
              int release_frags_on_task(mca_bcol_iboffload_task_t *task,
                                        ompi_free_list_t *list)
{
    int rc, qp_index;

    mca_bcol_iboffload_frag_t *temp_frag = task->frag;
    mca_bcol_iboffload_endpoint_t *endpoint = task->endpoint;

    mca_bcol_iboffload_component_t *cm =
                       &mca_bcol_iboffload_component;

    IBOFFLOAD_VERBOSE(10, ("\nCalling release_frags_on_task"));

    while (NULL != temp_frag) {
        qp_index = temp_frag->qp_index;

        --(temp_frag->ref_counter);

        /* Return credits */
        if (MQE_WR_CQE_WAIT == task->element.opcode) {
            ++(endpoint->qps[qp_index].rd_wqe);

            IBOFFLOAD_VERBOSE(10, ("Return rd_wqe %d pp_win %d",
                        endpoint->qps[qp_index].rd_wqe,
                        cm->qp_infos[qp_index].rd_pp_win));

            /* Call for recv prepost */
            if (endpoint->qps[qp_index].rd_wqe >=
                        cm->qp_infos[qp_index].rd_pp_win) {
                IBOFFLOAD_VERBOSE(10, ("Prepost to endpoint->index - %d, qp_index - %d", endpoint->index, qp_index));
                rc = mca_bcol_iboffload_prepost_recv(endpoint, qp_index,
                        endpoint->qps[qp_index].rd_wqe);
                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
                    IBOFFLOAD_ERROR(("QP %d: failed to prepost.\n", qp_index));
                    return OMPI_ERROR;
                }
                /* What happens if we can not prepost ?*/
            }
        } else if (MQE_WR_SEND == task->element.opcode) {
            ++(endpoint->qps[qp_index].sd_wqe);

            assert(endpoint->qps[qp_index].sd_wqe <= cm->qp_infos[qp_index].rd_num);

            IBOFFLOAD_VERBOSE(10, ("Return sd_wqe %d, qp_index - %d, endpoint - %p",
                                    endpoint->qps[qp_index].sd_wqe, qp_index, endpoint));
        } else {
            /* We should not arrive to this case */
            IBOFFLOAD_ERROR(("Unsupporeted operation"));

            return OMPI_ERROR;
        }

        mca_bcol_iboffload_return_frag_tolist(temp_frag, list);
        temp_frag = temp_frag->next;
    }

    return OMPI_SUCCESS;
}

END_C_DECLS

#endif