/*
 * Copyright (c) 2013 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#ifndef BTL_USNIC_SEND_H
#define BTL_USNIC_SEND_H

#include <infiniband/verbs.h>

#include "btl_usnic.h"
#include "btl_usnic_frag.h"
#include "btl_usnic_ack.h"
#if MSGDEBUG1
#include "btl_usnic_util.h"
#endif

/*
 * Check if conditions are right, and if so, put endpoint on 
 * list of endpoints that have sends to be done
 */
static inline void
ompi_btl_usnic_check_rts(
    ompi_btl_usnic_endpoint_t *endpoint)
{
    /*
     * If endpoint not already ready,
     * and has packets to send,
     * and it has send credits,
     * and its retransmission window is open,
     * make it ready
     */
    if (!endpoint->endpoint_ready_to_send &&
        !opal_list_is_empty(&endpoint->endpoint_frag_send_queue) &&
         endpoint->endpoint_send_credits > 0 &&
         WINDOW_OPEN(endpoint)) {
        opal_list_append(&endpoint->endpoint_module->endpoints_with_sends,
                &endpoint->super);
        endpoint->endpoint_ready_to_send = true;
#if MSGDEBUG1
        opal_output(0, "make endpoint %p RTS\n", endpoint);
    } else {
        opal_output(0, "rts:%d empty:%d cred:%d open%d\n",
                endpoint->endpoint_ready_to_send,
                opal_list_is_empty(&endpoint->endpoint_frag_send_queue),
                endpoint->endpoint_send_credits,
                WINDOW_OPEN(endpoint));
#endif
    }
}

/*
 * Common point for posting a segment to VERBS
 */
static inline void
ompi_btl_usnic_post_segment(
    ompi_btl_usnic_module_t *module,
    ompi_btl_usnic_endpoint_t *endpoint,
    ompi_btl_usnic_send_segment_t *sseg)
{
    struct ibv_send_wr *bad_wr;
    ompi_btl_usnic_channel_t *channel;
    struct ibv_send_wr *wr;
    int ret;

#if MSGDEBUG1
    opal_output(0, "post_send: type=%d, addr=%p, len=%d\n",
                sseg->ss_base.us_type,
                (void*) sseg->ss_send_desc.sg_list->addr, 
                sseg->ss_send_desc.sg_list->length);
    ompi_btl_usnic_dump_hex((void *)(sseg->ss_send_desc.sg_list->addr + sizeof(ompi_btl_usnic_btl_header_t)), 16);
#endif

    /* set target address */
    wr = &sseg->ss_send_desc;
    wr->wr.ud.ah = endpoint->endpoint_remote_ah;

    /* get channel and remote QPN */
    channel = &module->mod_channels[sseg->ss_channel];
    wr->wr.ud.remote_qpn =
        endpoint->endpoint_remote_addr.qp_num[sseg->ss_channel];

    /* Post segment to the NIC */
    ret = ibv_post_send(channel->qp, &sseg->ss_send_desc, &bad_wr);
    if (OPAL_UNLIKELY(0 != ret)) {
        ompi_btl_usnic_util_abort("ibv_post_send() failed", 
                                  __FILE__, __LINE__, ret);
        /* Never returns */
    }

    /* track # of time non-ACKs are posted */
    if (sseg->ss_base.us_type != OMPI_BTL_USNIC_SEG_ACK) {
        ++sseg->ss_send_posted;
        ++sseg->ss_parent_frag->sf_seg_post_cnt;
    }

    /* consume a WQE */
    --channel->sd_wqe;

    /* Stats */
    ++module->num_total_sends;
    ++channel->num_channel_sends;
}
/*
 * Post a send to the verbs work queue
 */
static inline void
ompi_btl_usnic_endpoint_send_segment(
    ompi_btl_usnic_module_t *module,
    ompi_btl_usnic_send_segment_t *sseg)
{
    ompi_btl_usnic_send_frag_t *frag;
    ompi_btl_usnic_endpoint_t *endpoint;
    uint16_t sfi;

    frag = sseg->ss_parent_frag;
    endpoint = frag->sf_endpoint;

    /* Do we have room in the endpoint's sender window?

       Sender window:

                       |-------- WINDOW_SIZE ----------|
                      +---------------------------------+
                      |         next_seq_to_send        |
                      |     somewhere in this range     |
                     ^+---------------------------------+
                     |
                     +-- ack_seq_rcvd: one less than the window left edge

       Assuming that next_seq_to_send is > ack_seq_rcvd (verified
       by assert), then the good condition to send is:

            next_seq_to_send <= ack_seq_rcvd + WINDOW_SIZE

       And therefore the bad condition is

            next_seq_to_send > ack_seq_rcvd + WINDOW_SIZE
    */
    assert(endpoint->endpoint_next_seq_to_send > 
           endpoint->endpoint_ack_seq_rcvd);
    assert(WINDOW_OPEN(endpoint));

    /* Assign sequence number and increment */
    sseg->ss_base.us_btl_header->seq = endpoint->endpoint_next_seq_to_send++;

    /* Fill in remote address to indicate PUT or not */
    sseg->ss_base.us_btl_header->put_addr =
        frag->sf_base.uf_dst_seg[0].seg_addr.pval;

    /* piggy-back an ACK if needed */
    ompi_btl_usnic_piggyback_ack(endpoint, sseg);

#if MSGDEBUG1
    {
        uint8_t mac[6];
    char mac_str1[128];
    char mac_str2[128];
    ompi_btl_usnic_sprintf_mac(mac_str1, module->local_addr.mac);
        ompi_btl_usnic_gid_to_mac(&endpoint->endpoint_remote_addr.gid, mac);
    ompi_btl_usnic_sprintf_mac(mac_str2, mac);

        opal_output(0, "--> Sending %s: seq: %" UDSEQ ", sender: 0x%016lx from device %s MAC %s, qp %u, seg %p, room %d, wc len %u, remote MAC %s, qp %u",
            (sseg->ss_parent_frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_LARGE_SEND)?
                "CHUNK" : "FRAG",
            sseg->ss_base.us_btl_header->seq, 
            sseg->ss_base.us_btl_header->sender, 
            endpoint->endpoint_module->device->name,
            mac_str1, module->local_addr.qp_num[sseg->ss_channel],
            sseg, sseg->ss_hotel_room,
            sseg->ss_base.us_sg_entry[0].length,
            mac_str2, endpoint->endpoint_remote_addr.qp_num[sseg->ss_channel]);
    }
#endif

    /* do the actual send */
    ompi_btl_usnic_post_segment(module, endpoint, sseg);

    /* Track this header by stashing in an array on the endpoint that
       is the same length as the sender's window (i.e., WINDOW_SIZE).
       To find a unique slot in this array, use (seq % WINDOW_SIZE).
     */
    sfi = WINDOW_SIZE_MOD(sseg->ss_base.us_btl_header->seq);
    endpoint->endpoint_sent_segs[sfi] = sseg;
    sseg->ss_ack_pending = true;

    /* bookkeeping */
    --endpoint->endpoint_send_credits;

    /* Stats */
    if (sseg->ss_parent_frag->sf_base.uf_type
            == OMPI_BTL_USNIC_FRAG_LARGE_SEND) {
        ++module->num_chunk_sends;
    } else {
        ++module->num_frag_sends;
    }

    /* If we have room in the sender's window, we also have room in
       endpoint hotel */
    opal_hotel_checkin_with_res(&endpoint->endpoint_hotel, sseg,
            &sseg->ss_hotel_room);
}

/*
 * This enqueues a fragment send into the system.  A send of a fragment
 * may result in the sending of multiple segments
 */
static inline int
ompi_btl_usnic_endpoint_enqueue_frag(
    ompi_btl_usnic_endpoint_t *endpoint,
    ompi_btl_usnic_send_frag_t *frag)
{
    ompi_btl_usnic_module_t *module;

    module = endpoint->endpoint_module;
#if MSGDEBUG1
    opal_output(0, "enq_frag: frag=%p, endpoint=%p, type=%d, len=%d\n",
            frag, endpoint, frag->sf_base.uf_type,
            frag->sf_base.uf_base.des_src->seg_len);
    if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_LARGE_SEND) {
        ompi_btl_usnic_large_send_frag_t *lfrag;
        lfrag = (ompi_btl_usnic_large_send_frag_t *)frag;
        opal_output(0, "   large size=%d\n", lfrag->lsf_base.sf_size);
    }
#endif

    /* add to tail of in-progress list */
    opal_list_append(&endpoint->endpoint_frag_send_queue,
            &frag->sf_base.uf_base.super.super);

    /* possibly make this endpoint ready to send again */
    ompi_btl_usnic_check_rts(endpoint);
    
    /* post sends now if space available */
    ompi_btl_usnic_module_progress_sends(module);

    return OMPI_SUCCESS;
}

void ompi_btl_usnic_frag_complete(ompi_btl_usnic_send_frag_t *frag);

void ompi_btl_usnic_frag_send_complete(ompi_btl_usnic_module_t *module,
                                    ompi_btl_usnic_send_segment_t *sseg);

void ompi_btl_usnic_chunk_send_complete(ompi_btl_usnic_module_t *module,
                                    ompi_btl_usnic_send_segment_t *sseg);

int ompi_btl_usnic_send_slower( ompi_btl_usnic_module_t *module,
    ompi_btl_usnic_endpoint_t *endpoint,
    ompi_btl_usnic_send_frag_t *frag,
    mca_btl_base_tag_t tag);

#endif /* BTL_USNIC_SEND_H */