1
1

Merge pull request #828 from hjelmn/openib_thread_fix

openib thread fixes
Этот коммит содержится в:
Nathan Hjelm 2015-09-01 09:12:50 -06:00
родитель d8cb3fe705 64e4419d76
Коммит f926796e57
3 изменённых файлов: 35 добавлений и 21 удалений

Просмотреть файл

@ -2534,12 +2534,6 @@ btl_openib_component_init(int *num_btl_modules,
malloc_hook_set = true;
}
#endif
/* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */
if (enable_mpi_threads && !mca_btl_base_thread_multiple_override) {
opal_output_verbose(5, opal_btl_base_framework.framework_output,
"btl:openib: MPI_THREAD_MULTIPLE not suppported; skipping this component");
goto no_btls;
}
/* Per https://svn.open-mpi.org/trac/ompi/ticket/1305, check to
see if $sysfsdir/class/infiniband exists. If it does not,

Просмотреть файл

@ -1,5 +1,8 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -81,17 +84,33 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
mca_btl_openib_component.eager_rdma_num) \
(I) = 0; \
} while (0)
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \
do { \
int32_t new_head; \
do { \
OLD_HEAD = HEAD; \
new_head = OLD_HEAD + 1; \
if(new_head == mca_btl_openib_component.eager_rdma_num) \
new_head = 0; \
} while(!OPAL_ATOMIC_CMPSET_32(&HEAD, OLD_HEAD, new_head)); \
#if OPAL_ENABLE_DEBUG
/**
* @brief read and increment the remote head index and generate a sequence
* number
*/
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \
do { \
(SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \
(OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \
} while(0)
#else
/**
* @brief read and increment the remote head index
*/
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \
do { \
(OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \
} while(0)
#endif
END_C_DECLS
#endif

Просмотреть файл

@ -569,17 +569,18 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length);
MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
#if OPAL_ENABLE_DEBUG
do {
ftr->seq = ep->eager_rdma_remote.seq;
} while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq,
(int32_t) ftr->seq,
(int32_t) (ftr->seq+1)));
/* NTH: generate the sequence from the remote head index to ensure that the
* wrong sequence isn't set. The way this code used to look the sequence number
* and head were updated independently and it led to false positives for incorrect
* sequence numbers. */
MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head, ftr->seq);
#else
MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head);
#endif
if(ep->nbo)
BTL_OPENIB_FOOTER_HTON(*ftr);
sr_desc->wr.rdma.rkey = ep->eager_rdma_remote.rkey;
MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head);
#if BTL_OPENIB_FAILOVER_ENABLED
/* frag->ftr is unused on the sending fragment, so use it
* to indicate it is an eager fragment. A non-zero value