1
1

Fix a bug in the uGNI btl that could cause certain descriptor callbacks to be called twice.

There was a race condition in the eager get protocol where the RDMA complete message could be received before the local completion of the SMSG message that started the eager get protocol.

cmr:v1.7

This commit was SVN r27740.
Этот коммит содержится в:
Nathan Hjelm 2013-01-03 23:11:13 +00:00
родитель a159bfaf25
Коммит 84e34ee0d7
3 изменённых файлов: 21 добавлений и 12 удалений

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -56,10 +56,11 @@ typedef union mca_btl_ugni_frag_hdr_t {
} mca_btl_ugni_frag_hdr_t; } mca_btl_ugni_frag_hdr_t;
enum { enum {
MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */ MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */
MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */ MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */
MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */ MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */
MCA_BTL_UGNI_FRAG_IGNORE = 8 /* ignore local smsg completion */ MCA_BTL_UGNI_FRAG_IGNORE = 8, /* ignore local smsg completion */
MCA_BTL_UGNI_FRAG_SMSG_COMPLETE = 16 /* SMSG has completed for this message */
}; };
struct mca_btl_ugni_base_frag_t; struct mca_btl_ugni_base_frag_t;
@ -68,15 +69,15 @@ typedef void (*frag_cb_t) (struct mca_btl_ugni_base_frag_t *, int);
typedef struct mca_btl_ugni_base_frag_t { typedef struct mca_btl_ugni_base_frag_t {
mca_btl_base_descriptor_t base; mca_btl_base_descriptor_t base;
size_t hdr_size; uint32_t msg_id;
uint16_t hdr_size;
uint16_t flags;
mca_btl_ugni_frag_hdr_t hdr; mca_btl_ugni_frag_hdr_t hdr;
mca_btl_ugni_segment_t segments[2]; mca_btl_ugni_segment_t segments[2];
ompi_common_ugni_post_desc_t post_desc; ompi_common_ugni_post_desc_t post_desc;
mca_btl_base_endpoint_t *endpoint; mca_btl_base_endpoint_t *endpoint;
mca_btl_ugni_reg_t *registration; mca_btl_ugni_reg_t *registration;
ompi_free_list_t *my_list; ompi_free_list_t *my_list;
uint32_t msg_id;
uint32_t flags;
frag_cb_t cbfunc; frag_cb_t cbfunc;
} mca_btl_ugni_base_frag_t; } mca_btl_ugni_base_frag_t;
@ -105,7 +106,6 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
if (OPAL_LIKELY(NULL != item)) { if (OPAL_LIKELY(NULL != item)) {
(*frag)->my_list = list; (*frag)->my_list = list;
(*frag)->endpoint = ep; (*frag)->endpoint = ep;
(*frag)->flags = 0;
} }
return rc; return rc;
@ -119,6 +119,8 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
frag->registration = NULL; frag->registration = NULL;
} }
frag->flags = 0;
OMPI_FREE_LIST_RETURN(frag->my_list, (ompi_free_list_item_t *) frag); OMPI_FREE_LIST_RETURN(frag->my_list, (ompi_free_list_item_t *) frag);
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -120,7 +120,12 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
case MCA_BTL_UGNI_TAG_RDMA_COMPLETE: case MCA_BTL_UGNI_TAG_RDMA_COMPLETE:
frag.hdr.rdma = ((mca_btl_ugni_rdma_frag_hdr_t *) data_ptr)[0]; frag.hdr.rdma = ((mca_btl_ugni_rdma_frag_hdr_t *) data_ptr)[0];
mca_btl_ugni_frag_complete (frag.hdr.rdma.ctx, OMPI_SUCCESS); if (((mca_btl_ugni_base_frag_t *)frag.hdr.rdma.ctx)->flags & MCA_BTL_UGNI_FRAG_SMSG_COMPLETE) {
mca_btl_ugni_frag_complete (frag.hdr.rdma.ctx, OMPI_SUCCESS);
} else {
/* let the local smsg completion finish this frag */
((mca_btl_ugni_base_frag_t *)frag.hdr.rdma.ctx)->flags &= ~MCA_BTL_UGNI_FRAG_IGNORE;
}
break; break;
case MCA_BTL_UGNI_TAG_DISCONNECT: case MCA_BTL_UGNI_TAG_DISCONNECT:
/* remote endpoint has disconnected */ /* remote endpoint has disconnected */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -64,6 +64,8 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
return OMPI_ERROR; return OMPI_ERROR;
} }
frag->flags |= MCA_BTL_UGNI_FRAG_SMSG_COMPLETE;
if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) { if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) {
mca_btl_ugni_frag_complete (frag, OMPI_SUCCESS); mca_btl_ugni_frag_complete (frag, OMPI_SUCCESS);
} }