From c101385f64906cc3dda427d45de68b54b0e48792 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 24 Aug 2015 16:00:06 -0600 Subject: [PATCH] btl/openib: fix sequence number generation for debug mode When using eager RDMA in debug builds the openib btl generates a sequence number for each send. The code independently updated the head index and the sequence number for the eager rdma transaction. If multiple threads enter this code at the same time and run in the following order: thread 1: update sequence (0 -> 1) thread 2: update sequence (1 -> 2) thread 2: update head (0 -> 1) thread 1: update head (1 -> 2) the sequence number for head[0] gets 1 and the sequence number for head[1] gets 0. The fix is to generate the sequence number from the head index. Signed-off-by: Nathan Hjelm --- opal/mca/btl/openib/btl_openib_eager_rdma.h | 37 ++++++++++++++++----- opal/mca/btl/openib/btl_openib_endpoint.h | 13 ++++---- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib_eager_rdma.h b/opal/mca/btl/openib/btl_openib_eager_rdma.h index b11378cdd1..993a5958fc 100644 --- a/opal/mca/btl/openib/btl_openib_eager_rdma.h +++ b/opal/mca/btl/openib/btl_openib_eager_rdma.h @@ -1,5 +1,8 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006-2007 Voltaire All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -81,17 +84,33 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo mca_btl_openib_component.eager_rdma_num) \ (I) = 0; \ } while (0) -#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \ - do { \ - int32_t new_head; \ - do { \ - OLD_HEAD = HEAD; \ - new_head = OLD_HEAD + 1; \ - if(new_head == mca_btl_openib_component.eager_rdma_num) \ - new_head = 0; \ - } while(!OPAL_ATOMIC_CMPSET_32(&HEAD, OLD_HEAD, new_head)); \ + + +#if OPAL_ENABLE_DEBUG + +/** + * @brief read and increment the remote head index and generate a sequence + * number + */ + +#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \ + do { \ + (SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \ + (OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \ } while(0) +#else + +/** + * @brief read and increment the remote head index + */ + +#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \ + do { \ + (OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \ + } while(0) + +#endif END_C_DECLS #endif diff --git a/opal/mca/btl/openib/btl_openib_endpoint.h b/opal/mca/btl/openib/btl_openib_endpoint.h index 60f527a21a..ed80aec639 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.h +++ b/opal/mca/btl/openib/btl_openib_endpoint.h @@ -569,17 +569,18 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep, MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length); MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr); #if OPAL_ENABLE_DEBUG - do { - ftr->seq = ep->eager_rdma_remote.seq; - } while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq, - (int32_t) ftr->seq, - (int32_t) (ftr->seq+1))); + /* NTH: generate the sequence from the remote head index to ensure that the + * wrong sequence isn't set. The way this code used to look the sequence number + * and head were updated independently and it led to false positives for incorrect + * sequence numbers. */ + MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head, ftr->seq); +#else + MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head); #endif if(ep->nbo) BTL_OPENIB_FOOTER_HTON(*ftr); sr_desc->wr.rdma.rkey = ep->eager_rdma_remote.rkey; - MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head); #if BTL_OPENIB_FAILOVER_ENABLED /* frag->ftr is unused on the sending fragment, so use it * to indicate it is an eager fragment. A non-zero value