From 889dd32806e1921fcac66c9e0649274dc3c5f8b1 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 3 Aug 2016 10:04:29 -0600 Subject: [PATCH] pml/ob1: reset req_bytes_packed on start On start we were not correctly resetting all request fields. This was leading to a double-completion on persistent receives. This commit updates the base start code to reset the receive req_bytes_packed and the send request convertor. Signed-off-by: Nathan Hjelm --- ompi/mca/pml/base/pml_base_recvreq.h | 20 +++++++++------- ompi/mca/pml/base/pml_base_sendreq.h | 18 ++++++++++---- ompi/mca/pml/ob1/pml_ob1_irecv.c | 4 ++-- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 36 +++++++++++++--------------- ompi/mca/pml/ob1/pml_ob1_sendreq.h | 11 +++------ 5 files changed, 48 insertions(+), 41 deletions(-) diff --git a/ompi/mca/pml/base/pml_base_recvreq.h b/ompi/mca/pml/base/pml_base_recvreq.h index 687b756e03..154225f508 100644 --- a/ompi/mca/pml/base/pml_base_recvreq.h +++ b/ompi/mca/pml/base/pml_base_recvreq.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -87,20 +90,21 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t); */ #define MCA_PML_BASE_RECV_START( request ) \ do { \ - (request)->req_pml_complete = false; \ + (request)->req_bytes_packed = 0; \ + (request)->req_base.req_pml_complete = false; \ \ /* always set the req_status.MPI_TAG to ANY_TAG before starting the \ * request. This field is used if cancelled to find out if the request \ * has been matched or not. \ */ \ - (request)->req_ompi.req_status.MPI_SOURCE = OMPI_ANY_SOURCE; \ - (request)->req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \ - (request)->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ - (request)->req_ompi.req_status._ucount = 0; \ - (request)->req_ompi.req_status._cancelled = 0; \ + (request)->req_base.req_ompi.req_status.MPI_SOURCE = OMPI_ANY_SOURCE; \ + (request)->req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \ + (request)->req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ + (request)->req_base.req_ompi.req_status._ucount = 0; \ + (request)->req_base.req_ompi.req_status._cancelled = 0; \ \ - (request)->req_ompi.req_complete = REQUEST_PENDING; \ - (request)->req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ + (request)->req_base.req_ompi.req_complete = REQUEST_PENDING; \ + (request)->req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ } while (0) /** diff --git a/ompi/mca/pml/base/pml_base_sendreq.h b/ompi/mca/pml/base/pml_base_sendreq.h index 95d442f043..6fb50d0f90 100644 --- a/ompi/mca/pml/base/pml_base_sendreq.h +++ b/ompi/mca/pml/base/pml_base_sendreq.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -110,6 +113,12 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); } \ } +#define MCA_PML_BASE_SEND_REQUEST_RESET(request) \ + if ((request)->req_bytes_packed > 0) { \ + opal_convertor_set_position(&(sendreq)->req_send.req_base.req_convertor, \ + &(size_t){0}); \ + } + /** * Mark the request as started from the PML base point of view. * @@ -118,10 +127,11 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); #define MCA_PML_BASE_SEND_START( request ) \ do { \ - (request)->req_pml_complete = false; \ - (request)->req_ompi.req_complete = REQUEST_PENDING; \ - (request)->req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ - (request)->req_ompi.req_status._cancelled = 0; \ + (request)->req_base.req_pml_complete = false; \ + (request)->req_base.req_ompi.req_complete = REQUEST_PENDING; \ + (request)->req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ + (request)->req_base.req_ompi.req_status._cancelled = 0; \ + MCA_PML_BASE_SEND_REQUEST_RESET(request); \ } while (0) /** diff --git a/ompi/mca/pml/ob1/pml_ob1_irecv.c b/ompi/mca/pml/ob1/pml_ob1_irecv.c index 5a494892df..ddf6c54db8 100644 --- a/ompi/mca/pml/ob1/pml_ob1_irecv.c +++ b/ompi/mca/pml/ob1/pml_ob1_irecv.c @@ -193,7 +193,7 @@ mca_pml_ob1_imrecv( void *buf, recvreq->req_pending = false; recvreq->req_ack_sent = false; - MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base); + MCA_PML_BASE_RECV_START(&recvreq->req_recv); /* Note - sequence number already assigned */ recvreq->req_recv.req_base.req_sequence = seq; @@ -285,7 +285,7 @@ mca_pml_ob1_mrecv( void *buf, recvreq->req_rdma_idx = 0; recvreq->req_pending = false; - MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base); + MCA_PML_BASE_RECV_START(&recvreq->req_recv); /* Note - sequence number already assigned */ recvreq->req_recv.req_base.req_sequence = seq; diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index ef6d8a8706..69ee968517 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -13,7 +13,7 @@ * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science @@ -70,27 +70,25 @@ void mca_pml_ob1_recv_request_process_pending(void) static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request) { mca_pml_ob1_recv_request_t* recvreq = *(mca_pml_ob1_recv_request_t**)request; + assert (false == recvreq->req_recv.req_base.req_free_called); - if(false == recvreq->req_recv.req_base.req_free_called){ + recvreq->req_recv.req_base.req_free_called = true; + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, + &(recvreq->req_recv.req_base), PERUSE_RECV ); - recvreq->req_recv.req_base.req_free_called = true; - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(recvreq->req_recv.req_base), PERUSE_RECV ); - - if( true == recvreq->req_recv.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); - } + if( true == recvreq->req_recv.req_base.req_pml_complete ) { + /* make buffer defined when the request is compeleted, + and before releasing the objects. */ + MEMCHECKER( + memchecker_call(&opal_memchecker_base_mem_defined, + recvreq->req_recv.req_base.req_addr, + recvreq->req_recv.req_base.req_count, + recvreq->req_recv.req_base.req_datatype); + ); + MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); } + *request = MPI_REQUEST_NULL; return OMPI_SUCCESS; } @@ -1171,7 +1169,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) req->req_pending = false; req->req_ack_sent = false; - MCA_PML_BASE_RECV_START(&req->req_recv.req_base); + MCA_PML_BASE_RECV_START(&req->req_recv); OB1_MATCHING_LOCK(&ob1_comm->matching_lock); /** diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 56924b24d9..3ebde85d28 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2012 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -156,12 +156,7 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type) } #define MCA_PML_OB1_SEND_REQUEST_RESET(sendreq) \ - if ((sendreq)->req_send.req_bytes_packed > 0) { \ - size_t _position = 0; \ - opal_convertor_set_position(&(sendreq)->req_send.req_base.req_convertor, \ - &_position); \ - assert( 0 == _position ); \ - } + MCA_PML_BASE_SEND_REQUEST_RESET(&(sendreq)->req_send) static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* sendreq) { @@ -460,7 +455,7 @@ mca_pml_ob1_send_request_start_seq (mca_pml_ob1_send_request_t* sendreq, mca_bml sendreq->req_pending = MCA_PML_OB1_SEND_PENDING_NONE; sendreq->req_send.req_base.req_sequence = seqn; - MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); + MCA_PML_BASE_SEND_START( &sendreq->req_send ); for(size_t i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { mca_bml_base_btl_t* bml_btl;