Fix a bug in the uGNI btl that could cause certain descriptor callbacks to be called twice.
There was a race condition in the eager get protocol where the RDMA complete message could be received before the local completion of the SMSG message that started the eager get protocol. cmr:v1.7 This commit was SVN r27740.
Этот коммит содержится в:
родитель
a159bfaf25
Коммит
84e34ee0d7
@ -1,6 +1,6 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -56,10 +56,11 @@ typedef union mca_btl_ugni_frag_hdr_t {
|
|||||||
} mca_btl_ugni_frag_hdr_t;
|
} mca_btl_ugni_frag_hdr_t;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */
|
MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */
|
||||||
MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */
|
MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */
|
||||||
MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */
|
MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */
|
||||||
MCA_BTL_UGNI_FRAG_IGNORE = 8 /* ignore local smsg completion */
|
MCA_BTL_UGNI_FRAG_IGNORE = 8, /* ignore local smsg completion */
|
||||||
|
MCA_BTL_UGNI_FRAG_SMSG_COMPLETE = 16 /* SMSG has completed for this message */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mca_btl_ugni_base_frag_t;
|
struct mca_btl_ugni_base_frag_t;
|
||||||
@ -68,15 +69,15 @@ typedef void (*frag_cb_t) (struct mca_btl_ugni_base_frag_t *, int);
|
|||||||
|
|
||||||
typedef struct mca_btl_ugni_base_frag_t {
|
typedef struct mca_btl_ugni_base_frag_t {
|
||||||
mca_btl_base_descriptor_t base;
|
mca_btl_base_descriptor_t base;
|
||||||
size_t hdr_size;
|
uint32_t msg_id;
|
||||||
|
uint16_t hdr_size;
|
||||||
|
uint16_t flags;
|
||||||
mca_btl_ugni_frag_hdr_t hdr;
|
mca_btl_ugni_frag_hdr_t hdr;
|
||||||
mca_btl_ugni_segment_t segments[2];
|
mca_btl_ugni_segment_t segments[2];
|
||||||
ompi_common_ugni_post_desc_t post_desc;
|
ompi_common_ugni_post_desc_t post_desc;
|
||||||
mca_btl_base_endpoint_t *endpoint;
|
mca_btl_base_endpoint_t *endpoint;
|
||||||
mca_btl_ugni_reg_t *registration;
|
mca_btl_ugni_reg_t *registration;
|
||||||
ompi_free_list_t *my_list;
|
ompi_free_list_t *my_list;
|
||||||
uint32_t msg_id;
|
|
||||||
uint32_t flags;
|
|
||||||
frag_cb_t cbfunc;
|
frag_cb_t cbfunc;
|
||||||
} mca_btl_ugni_base_frag_t;
|
} mca_btl_ugni_base_frag_t;
|
||||||
|
|
||||||
@ -105,7 +106,6 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
|
|||||||
if (OPAL_LIKELY(NULL != item)) {
|
if (OPAL_LIKELY(NULL != item)) {
|
||||||
(*frag)->my_list = list;
|
(*frag)->my_list = list;
|
||||||
(*frag)->endpoint = ep;
|
(*frag)->endpoint = ep;
|
||||||
(*frag)->flags = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
@ -119,6 +119,8 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
|
|||||||
frag->registration = NULL;
|
frag->registration = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
frag->flags = 0;
|
||||||
|
|
||||||
OMPI_FREE_LIST_RETURN(frag->my_list, (ompi_free_list_item_t *) frag);
|
OMPI_FREE_LIST_RETURN(frag->my_list, (ompi_free_list_item_t *) frag);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -120,7 +120,12 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
|||||||
case MCA_BTL_UGNI_TAG_RDMA_COMPLETE:
|
case MCA_BTL_UGNI_TAG_RDMA_COMPLETE:
|
||||||
frag.hdr.rdma = ((mca_btl_ugni_rdma_frag_hdr_t *) data_ptr)[0];
|
frag.hdr.rdma = ((mca_btl_ugni_rdma_frag_hdr_t *) data_ptr)[0];
|
||||||
|
|
||||||
mca_btl_ugni_frag_complete (frag.hdr.rdma.ctx, OMPI_SUCCESS);
|
if (((mca_btl_ugni_base_frag_t *)frag.hdr.rdma.ctx)->flags & MCA_BTL_UGNI_FRAG_SMSG_COMPLETE) {
|
||||||
|
mca_btl_ugni_frag_complete (frag.hdr.rdma.ctx, OMPI_SUCCESS);
|
||||||
|
} else {
|
||||||
|
/* let the local smsg completion finish this frag */
|
||||||
|
((mca_btl_ugni_base_frag_t *)frag.hdr.rdma.ctx)->flags &= ~MCA_BTL_UGNI_FRAG_IGNORE;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case MCA_BTL_UGNI_TAG_DISCONNECT:
|
case MCA_BTL_UGNI_TAG_DISCONNECT:
|
||||||
/* remote endpoint has disconnected */
|
/* remote endpoint has disconnected */
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -64,6 +64,8 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
|
|||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
frag->flags |= MCA_BTL_UGNI_FRAG_SMSG_COMPLETE;
|
||||||
|
|
||||||
if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) {
|
if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) {
|
||||||
mca_btl_ugni_frag_complete (frag, OMPI_SUCCESS);
|
mca_btl_ugni_frag_complete (frag, OMPI_SUCCESS);
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user