diff --git a/ompi/mca/btl/usnic/btl_usnic.h b/ompi/mca/btl/usnic/btl_usnic.h index 58ae77656f..84f463e5d6 100644 --- a/ompi/mca/btl/usnic/btl_usnic.h +++ b/ompi/mca/btl/usnic/btl_usnic.h @@ -190,8 +190,18 @@ typedef mca_btl_base_recv_reg_t ompi_btl_usnic_recv_reg_t; * Size for sequence numbers (just to ensure we use the same size * everywhere) */ -typedef uint64_t ompi_btl_usnic_seq_t; -#define UDSEQ PRIu64 +typedef uint16_t ompi_btl_usnic_seq_t; +#define UDSEQ PRIu16 + +/* sequence number comparison macros that allow for rollover. + * Relies on the fact that sequence numbers should be relatively close + * together as compared to (1<<31) + */ +#define SEQ_DIFF(A,B) ((int16_t)((A)-(B))) +#define SEQ_LT(A,B) (SEQ_DIFF(A,B) < 0) +#define SEQ_LE(A,B) (SEQ_DIFF(A,B) <= 0) +#define SEQ_GT(A,B) (SEQ_DIFF(A,B) > 0) +#define SEQ_GE(A,B) (SEQ_DIFF(A,B) >= 0) /** * Register the usnic BTL MCA params diff --git a/ompi/mca/btl/usnic/btl_usnic_ack.c b/ompi/mca/btl/usnic/btl_usnic_ack.c index 6131609193..6a0624ff4a 100644 --- a/ompi/mca/btl/usnic/btl_usnic_ack.c +++ b/ompi/mca/btl/usnic/btl_usnic_ack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,7 +71,7 @@ ompi_btl_usnic_handle_ack( module = endpoint->endpoint_module; /* ignore if this is an old ACK */ - if (ack_seq < endpoint->endpoint_ack_seq_rcvd) { + if (SEQ_LT(ack_seq, endpoint->endpoint_ack_seq_rcvd)) { #if MSGDEBUG1 opal_output(0, "Got OLD DUP ACK seq %"UDSEQ" < %"UDSEQ"\n", ack_seq, endpoint->endpoint_ack_seq_rcvd); @@ -89,7 +89,7 @@ ompi_btl_usnic_handle_ack( /* Does this ACK have a new sequence number that we haven't seen before? */ - for (is = endpoint->endpoint_ack_seq_rcvd + 1; is <= ack_seq; ++is) { + for (is = endpoint->endpoint_ack_seq_rcvd + 1; SEQ_LE(is, ack_seq); ++is) { sseg = endpoint->endpoint_sent_segs[WINDOW_SIZE_MOD(is)]; #if MSGDEBUG1 @@ -99,7 +99,7 @@ ompi_btl_usnic_handle_ack( #endif assert(sseg != NULL); - assert(sseg->ss_base.us_btl_header->seq == is); + assert(sseg->ss_base.us_btl_header->pkt_seq == is); #if MSGDEBUG1 if (sseg->ss_hotel_room == -1) { opal_output(0, "=== ACKed frag in sent_frags array is not in hotel/enqueued, module %p, endpoint %p, seg %p, seq %" UDSEQ ", slot %lu", @@ -264,7 +264,7 @@ ompi_btl_usnic_ack_timeout( { opal_output(0, "Send timeout! seg %p, room %d, seq %" UDSEQ "\n", (void*)seg, seg->ss_hotel_room, - seg->ss_base.us_btl_header->seq); + seg->ss_base.us_btl_header->pkt_seq); } #endif diff --git a/ompi/mca/btl/usnic/btl_usnic_ack.h b/ompi/mca/btl/usnic/btl_usnic_ack.h index 5dbb06a73c..5af81748ed 100644 --- a/ompi/mca/btl/usnic/btl_usnic_ack.h +++ b/ompi/mca/btl/usnic/btl_usnic_ack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,12 +70,13 @@ ompi_btl_usnic_piggyback_ack( ompi_btl_usnic_remove_from_endpoints_needing_ack(endpoint); sseg->ss_base.us_btl_header->ack_seq = endpoint->endpoint_next_contig_seq_to_recv - 1; + sseg->ss_base.us_btl_header->ack_present = 1; #if MSGDEBUG1 opal_output(0, "Piggy-backing ACK for sequence %"UDSEQ"\n", sseg->ss_base.us_btl_header->ack_seq); #endif } else { - sseg->ss_base.us_btl_header->ack_seq = 0; + sseg->ss_base.us_btl_header->ack_present = 0; } } diff --git a/ompi/mca/btl/usnic/btl_usnic_endpoint.h b/ompi/mca/btl/usnic/btl_usnic_endpoint.h index 76d87d5ed6..8654744f94 100644 --- a/ompi/mca/btl/usnic/btl_usnic_endpoint.h +++ b/ompi/mca/btl/usnic/btl_usnic_endpoint.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,8 +44,8 @@ struct ompi_btl_usnic_send_segment_t; */ #define WINDOW_SIZE 4096 #define WINDOW_SIZE_MOD(a) (((a) & (WINDOW_SIZE - 1))) -#define WINDOW_OPEN(E) ((E)->endpoint_next_seq_to_send < \ - ((E)->endpoint_ack_seq_rcvd + WINDOW_SIZE)) +#define WINDOW_OPEN(E) (SEQ_LT((E)->endpoint_next_seq_to_send, \ + ((E)->endpoint_ack_seq_rcvd + WINDOW_SIZE))) #define WINDOW_EMPTY(E) ((E)->endpoint_ack_seq_rcvd == \ ((E)->endpoint_next_seq_to_send-1)) diff --git a/ompi/mca/btl/usnic/btl_usnic_frag.c b/ompi/mca/btl/usnic/btl_usnic_frag.c index 297d8d26ef..62bb0e18f9 100644 --- a/ompi/mca/btl/usnic/btl_usnic_frag.c +++ b/ompi/mca/btl/usnic/btl_usnic_frag.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,6 +109,7 @@ ack_seg_constructor( /* ACK value embedded in BTL header */ bseg->us_btl_header->payload_type = OMPI_BTL_USNIC_PAYLOAD_TYPE_ACK; bseg->us_btl_header->payload_len = 0; + bseg->us_btl_header->ack_present = 1; bseg->us_sg_entry[0].length = sizeof(bseg->us_btl_header); } diff --git a/ompi/mca/btl/usnic/btl_usnic_frag.h b/ompi/mca/btl/usnic/btl_usnic_frag.h index 1a9a70250f..94d3a1d37e 100644 --- a/ompi/mca/btl/usnic/btl_usnic_frag.h +++ b/ompi/mca/btl/usnic/btl_usnic_frag.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -121,9 +121,8 @@ typedef struct { /* Hashed RTE process name of the sender */ uint64_t sender; - /* Sliding window sequence number (echoed back in an ACK). This - is 64 bits. */ - ompi_btl_usnic_seq_t seq; + /* Sliding window sequence number (echoed back in an ACK). */ + ompi_btl_usnic_seq_t pkt_seq; ompi_btl_usnic_seq_t ack_seq; /* for piggy-backing ACKs */ /* payload legnth (in bytes). We unfortunately have to include @@ -137,6 +136,9 @@ typedef struct { /* Type of BTL header (see enum, above) */ uint8_t payload_type; + /* true if there is piggy-backed ACK */ + uint8_t ack_present; + /* tag for upper layer */ mca_btl_base_tag_t tag; } ompi_btl_usnic_btl_header_t; diff --git a/ompi/mca/btl/usnic/btl_usnic_module.c b/ompi/mca/btl/usnic/btl_usnic_module.c index 7cc4379c3f..51404fad20 100644 --- a/ompi/mca/btl/usnic/btl_usnic_module.c +++ b/ompi/mca/btl/usnic/btl_usnic_module.c @@ -969,7 +969,7 @@ usnic_do_resends( endpoint = sseg->ss_parent_frag->sf_endpoint; /* clobber any stale piggy-backed ACK */ - sseg->ss_base.us_btl_header->ack_seq = 0; + sseg->ss_base.us_btl_header->ack_present = 0; /* Only post this segment if not already posted */ if (sseg->ss_send_posted == 0) { @@ -1816,10 +1816,8 @@ static ompi_btl_usnic_seq_t get_initial_seq_no(void) { ompi_btl_usnic_seq_t isn; - /* only utilize the bottom 62 bits to avoid hitting seq # overflow */ - isn = (((uint64_t)opal_rand(&ompi_btl_usnic_rand_buff) & ((1LL<<30)-1)) << 32) | - ((uint64_t)opal_rand(&ompi_btl_usnic_rand_buff) & ((1LL<<32)-1)); - isn += 2; /* guarantee > 1 */ + + isn = (ompi_btl_usnic_seq_t)opal_rand(&ompi_btl_usnic_rand_buff); return isn; } diff --git a/ompi/mca/btl/usnic/btl_usnic_recv.c b/ompi/mca/btl/usnic/btl_usnic_recv.c index 7c5b252265..5a585e530d 100644 --- a/ompi/mca/btl/usnic/btl_usnic_recv.c +++ b/ompi/mca/btl/usnic/btl_usnic_recv.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -93,7 +93,7 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module, opal_output(0, "=== Unknown sender; dropped: from MAC %s to MAC %s, seq %" UDSEQ, src_mac, dest_mac, - bseg->us_btl_header->seq); + bseg->us_btl_header->pkt_seq); #endif ++module->stats.num_unk_recvs; goto repost_no_endpoint; @@ -113,12 +113,12 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module, #if MSGDEBUG1 opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ ", len=%d\n", - (void*) endpoint, hdr->seq, hdr->payload_len); + (void*) endpoint, hdr->pkt_seq, hdr->payload_len); #if 0 opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ " from %s to %s: GOOD! (rel seq %d, lowest seq %" UDSEQ ", highest seq: %" UDSEQ ", rwstart %d) seg %p, module %p\n", (void*) endpoint, - seg->rs_base.us_btl_header->seq, + seg->rs_base.us_btl_header->pkt_seq, src_mac, dest_mac, window_index, endpoint->endpoint_next_contig_seq_to_recv, @@ -182,7 +182,7 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module, opal_output(0, "<-- Received CHUNK fid %d ep %p, seq %" UDSEQ " from %s to %s: GOOD! (rel seq %d, lowest seq %" UDSEQ ", highest seq: %" UDSEQ ", rwstart %d) seg %p, module %p\n", seg->rs_base.us_btl_chunk_header->ch_frag_id, (void*) endpoint, - seg->rs_base.us_btl_chunk_header->ch_hdr.seq, + seg->rs_base.us_btl_chunk_header->ch_hdr.pkt_seq, src_mac, dest_mac, window_index, endpoint->endpoint_next_contig_seq_to_recv, diff --git a/ompi/mca/btl/usnic/btl_usnic_recv.h b/ompi/mca/btl/usnic/btl_usnic_recv.h index c591b718e2..ca02dc3138 100644 --- a/ompi/mca/btl/usnic/btl_usnic_recv.h +++ b/ompi/mca/btl/usnic/btl_usnic_recv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -116,11 +116,12 @@ ompi_btl_usnic_check_rx_seq( { uint32_t i; ompi_btl_usnic_seq_t seq; + int delta; /* * Handle piggy-backed ACK if present */ - if (seg->rs_base.us_btl_header->ack_seq != 0) { + if (seg->rs_base.us_btl_header->ack_present) { #if MSGDEBUG1 opal_output(0, "Handle piggy-packed ACK seq %"UDSEQ"\n", seg->rs_base.us_btl_header->ack_seq); #endif @@ -151,12 +152,12 @@ ompi_btl_usnic_check_rx_seq( or seq >= next_contig_seg_to_recv + WINDOW_SIZE */ - seq = seg->rs_base.us_btl_header->seq; - if (seq < endpoint->endpoint_next_contig_seq_to_recv || - seq >= endpoint->endpoint_next_contig_seq_to_recv + WINDOW_SIZE) { + seq = seg->rs_base.us_btl_header->pkt_seq; + delta = SEQ_DIFF(seq, endpoint->endpoint_next_contig_seq_to_recv); + if (delta < 0 || delta >= WINDOW_SIZE) { #if MSGDEBUG1 opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ " outside of window (%" UDSEQ " - %" UDSEQ "), %p, module %p -- DROPPED\n", - (void*)endpoint, seg->rs_base.us_btl_header->seq, + (void*)endpoint, seg->rs_base.us_btl_header->pkt_seq, endpoint->endpoint_next_contig_seq_to_recv, (endpoint->endpoint_next_contig_seq_to_recv + WINDOW_SIZE - 1), @@ -165,7 +166,7 @@ ompi_btl_usnic_check_rx_seq( #endif /* Stats */ - if (seq < endpoint->endpoint_next_contig_seq_to_recv) { + if (delta < 0) { ++endpoint->endpoint_module->stats.num_oow_low_recvs; } else { ++endpoint->endpoint_module->stats.num_oow_high_recvs; @@ -196,19 +197,19 @@ ompi_btl_usnic_check_rx_seq( rfstart = (rfstart + num_acks_sent) % WINDOW_SIZE */ - i = seq - endpoint->endpoint_next_contig_seq_to_recv; + i = SEQ_DIFF(seq, endpoint->endpoint_next_contig_seq_to_recv); i = WINDOW_SIZE_MOD(i + endpoint->endpoint_rfstart); if (endpoint->endpoint_rcvd_segs[i]) { #if MSGDEBUG1 opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ ", seg %p: duplicate -- DROPPED\n", - (void*) endpoint, seg->rs_base.us_btl_header->seq, (void*) seg); + (void*) endpoint, seg->rs_base.us_btl_header->pkt_seq, (void*) seg); #endif /* highest_seq_rcvd is for debug stats only; it's not used in any window calculations */ - assert(seq <= endpoint->endpoint_highest_seq_rcvd); + assert(SEQ_LE(seq, endpoint->endpoint_highest_seq_rcvd)); /* next_contig_seq_to_recv-1 is the ack number we'll send */ - assert (seq > endpoint->endpoint_next_contig_seq_to_recv - 1); + assert (SEQ_GT(seq, endpoint->endpoint_next_contig_seq_to_recv - 1)); /* Stats */ ++endpoint->endpoint_module->stats.num_dup_recvs; @@ -216,7 +217,7 @@ ompi_btl_usnic_check_rx_seq( } /* Stats: is this the highest sequence number we've received? */ - if (seq > endpoint->endpoint_highest_seq_rcvd) { + if (SEQ_GT(seq, endpoint->endpoint_highest_seq_rcvd)) { endpoint->endpoint_highest_seq_rcvd = seq; } @@ -246,6 +247,7 @@ ompi_btl_usnic_recv_fast(ompi_btl_usnic_module_t *module, mca_btl_active_message_callback_t* reg; ompi_btl_usnic_seq_t seq; ompi_btl_usnic_endpoint_t *endpoint; + int delta; int i; bseg = &seg->rs_base; @@ -264,9 +266,9 @@ ompi_btl_usnic_recv_fast(ompi_btl_usnic_module_t *module, (void*)(seg->rs_recv_desc.sg_list[0].addr), seg->rs_recv_desc.sg_list[0].length); - seq = seg->rs_base.us_btl_header->seq; - if (seq < endpoint->endpoint_next_contig_seq_to_recv || - seq >= endpoint->endpoint_next_contig_seq_to_recv + WINDOW_SIZE) { + seq = seg->rs_base.us_btl_header->pkt_seq; + delta = SEQ_DIFF(seq, endpoint->endpoint_next_contig_seq_to_recv); + if (delta < 0 || delta >= WINDOW_SIZE) { goto drop; } diff --git a/ompi/mca/btl/usnic/btl_usnic_send.h b/ompi/mca/btl/usnic/btl_usnic_send.h index 3eda92d647..8ef4ffecc6 100644 --- a/ompi/mca/btl/usnic/btl_usnic_send.h +++ b/ompi/mca/btl/usnic/btl_usnic_send.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -157,12 +157,13 @@ ompi_btl_usnic_endpoint_send_segment( next_seq_to_send > ack_seq_rcvd + WINDOW_SIZE */ - assert(endpoint->endpoint_next_seq_to_send > - endpoint->endpoint_ack_seq_rcvd); + assert(SEQ_GT(endpoint->endpoint_next_seq_to_send, + endpoint->endpoint_ack_seq_rcvd)); assert(WINDOW_OPEN(endpoint)); /* Assign sequence number and increment */ - sseg->ss_base.us_btl_header->seq = endpoint->endpoint_next_seq_to_send++; + sseg->ss_base.us_btl_header->pkt_seq = + endpoint->endpoint_next_seq_to_send++; /* Fill in remote address to indicate PUT or not */ sseg->ss_base.us_btl_header->put_addr = @@ -183,7 +184,7 @@ ompi_btl_usnic_endpoint_send_segment( opal_output(0, "--> Sending %s: seq: %" UDSEQ ", sender: 0x%016lx from device %s MAC %s, qp %u, seg %p, room %d, wc len %u, remote MAC %s, qp %u", (sseg->ss_parent_frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_LARGE_SEND)? "CHUNK" : "FRAG", - sseg->ss_base.us_btl_header->seq, + sseg->ss_base.us_btl_header->pkt_seq, sseg->ss_base.us_btl_header->sender, endpoint->endpoint_module->device->name, mac_str1, module->local_addr.qp_num[sseg->ss_channel], @@ -200,7 +201,7 @@ ompi_btl_usnic_endpoint_send_segment( is the same length as the sender's window (i.e., WINDOW_SIZE). To find a unique slot in this array, use (seq % WINDOW_SIZE). */ - sfi = WINDOW_SIZE_MOD(sseg->ss_base.us_btl_header->seq); + sfi = WINDOW_SIZE_MOD(sseg->ss_base.us_btl_header->pkt_seq); endpoint->endpoint_sent_segs[sfi] = sseg; sseg->ss_ack_pending = true;