diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index ec2e9e36ee..67365eb3f5 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -1661,9 +1661,10 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request) osc_pt2pt_gc_clean (module); + ompi_osc_pt2pt_frag_start_receive (module); + /* put this request on the garbage colletion list */ osc_pt2pt_gc_add_request (module, request); - ompi_osc_pt2pt_frag_start_receive (module); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "finished posting receive request")); @@ -1673,6 +1674,7 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request) int ompi_osc_pt2pt_frag_start_receive (ompi_osc_pt2pt_module_t *module) { + module->frag_request = MPI_REQUEST_NULL; return ompi_osc_pt2pt_irecv_w_cb (module->incoming_buffer, mca_osc_pt2pt_component.buffer_size + sizeof (ompi_osc_pt2pt_frag_header_t), MPI_BYTE, OMPI_ANY_SOURCE, OSC_PT2PT_FRAG_TAG, module->comm, &module->frag_request, ompi_osc_pt2pt_callback, module); @@ -1732,11 +1734,14 @@ int ompi_osc_pt2pt_irecv_w_cb (void *ptr, int count, ompi_datatype_t *datatype, request->req_complete_cb = cb; request->req_complete_cb_data = ctx; - if (request_out) { + + ret = MCA_PML_CALL(start(1, &request)); + if (request_out && MPI_REQUEST_NULL != request) { *request_out = request; } - ret = MCA_PML_CALL(start(1, &request)); + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "osc pt2pt: pml start returned %d. state: %d", ret, request->req_state)); return ret; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_module.c b/ompi/mca/osc/pt2pt/osc_pt2pt_module.c index 3d8460b7a6..56c0d4bc0e 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_module.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_module.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -95,11 +95,12 @@ int ompi_osc_pt2pt_free(ompi_win_t *win) if (NULL != module->epoch_outgoing_frag_count) free(module->epoch_outgoing_frag_count); - if (NULL != module->frag_request) { + if (NULL != module->frag_request && MPI_REQUEST_NULL != module->frag_request) { module->frag_request->req_complete_cb = NULL; ompi_request_cancel (module->frag_request); ompi_request_free (&module->frag_request); } + if (NULL != module->comm) { ompi_comm_free(&module->comm); } diff --git a/ompi/mca/pml/cm/pml_cm.h b/ompi/mca/pml/cm/pml_cm.h index db04c7a77f..ba055c474e 100644 --- a/ompi/mca/pml/cm/pml_cm.h +++ b/ompi/mca/pml/cm/pml_cm.h @@ -232,6 +232,12 @@ mca_pml_cm_isend_init(const void* buf, MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, ompi_proc, comm, tag, dst, datatype, sendmode, true, false, buf, count); + /* Work around a leak in start by marking this request as complete. The + * problem occured because we do not have a way to differentiate an + * inital request and an incomplete pml request in start. This line + * allows us to detect this state. */ + sendreq->req_send.req_base.req_pml_complete = true; + *request = (ompi_request_t*) sendreq; return OMPI_SUCCESS; diff --git a/ompi/mca/pml/cm/pml_cm_start.c b/ompi/mca/pml/cm/pml_cm_start.c index f07c7cf059..a3084090ea 100644 --- a/ompi/mca/pml/cm/pml_cm_start.c +++ b/ompi/mca/pml/cm/pml_cm_start.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,78 +35,14 @@ int mca_pml_cm_start(size_t count, ompi_request_t** requests) { int rc; - size_t i; - for (i = 0 ; i < count ; i++) { - mca_pml_cm_request_t *pml_request = - (mca_pml_cm_request_t*)requests[i]; - if (OMPI_REQUEST_PML != requests[i]->req_type) { + + for (size_t i = 0 ; i < count ; i++) { + mca_pml_cm_request_t *pml_request = (mca_pml_cm_request_t*)requests[i]; + if (OMPI_REQUEST_PML != requests[i]->req_type || NULL == pml_request) { continue; } - if (NULL == pml_request) { - continue; - } - /* If the persistent request is currebtly active - obtain the - * request lock and verify the status is incomplete. if the - * pml layer has not completed the request - mark the request - * as free called - so that it will be freed when the request - * completes - and create a new request. - */ - switch (pml_request->req_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if (pml_request->req_pml_complete == true) - break; - case OMPI_REQUEST_ACTIVE: { - /* otherwise fall through */ - ompi_request_t *request; - - OPAL_THREAD_LOCK(&ompi_request_lock); - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - OPAL_THREAD_UNLOCK(&ompi_request_lock); - break; - } - - /* allocate a new request */ - switch (pml_request->req_pml_type) { - case MCA_PML_CM_REQUEST_SEND_HEAVY: { - mca_pml_cm_hvy_send_request_t* sendreq = (mca_pml_cm_hvy_send_request_t*) pml_request; - rc = mca_pml_cm_isend_init( sendreq->req_addr, - sendreq->req_count, - sendreq->req_send.req_base.req_datatype, - sendreq->req_peer, - sendreq->req_tag, - sendreq->req_send.req_send_mode, - sendreq->req_send.req_base.req_comm, - &request ); - break; - } - case MCA_PML_CM_REQUEST_RECV_HEAVY: { - mca_pml_cm_hvy_recv_request_t* recvreq = (mca_pml_cm_hvy_recv_request_t*) pml_request; - rc = mca_pml_cm_irecv_init( recvreq->req_addr, - recvreq->req_count, - recvreq->req_base.req_datatype, - recvreq->req_peer, - recvreq->req_tag, - recvreq->req_base.req_comm, - &request ); - break; - } - default: - rc = OMPI_ERR_REQUEST; - break; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_cm_request_t*)request; - requests[i] = request; - break; - } - default: + if (OMPI_REQUEST_ACTIVE == pml_request->req_ompi.req_state) { return OMPI_ERR_REQUEST; } @@ -113,6 +52,37 @@ mca_pml_cm_start(size_t count, ompi_request_t** requests) { mca_pml_cm_hvy_send_request_t* sendreq = (mca_pml_cm_hvy_send_request_t*)pml_request; + if (!sendreq->req_send.req_base.req_pml_complete) { + ompi_request_t *request; + + /* buffered sends can be mpi complete and pml incomplete. to support this + * case we need to allocate a new request. */ + rc = mca_pml_cm_isend_init (sendreq->req_addr, + sendreq->req_count, + sendreq->req_send.req_base.req_datatype, + sendreq->req_peer, + sendreq->req_tag, + sendreq->req_send.req_send_mode, + sendreq->req_send.req_base.req_comm, + &request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* copy the callback and callback data to the new requests */ + request->req_complete_cb = pml_request->req_ompi.req_complete_cb; + request->req_complete_cb_data = pml_request->req_ompi.req_complete_cb_data; + + /* ensure the old request gets released */ + pml_request->req_free_called = true; + + sendreq = (mca_pml_cm_hvy_send_request_t *) request; + requests[i] = request; + } + + /* reset the completion flag */ + sendreq->req_send.req_base.req_pml_complete = false; + MCA_PML_CM_HVY_SEND_REQUEST_START(sendreq, rc); if(rc != OMPI_SUCCESS) return rc; diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 723e213f13..90edc34e18 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -63,6 +63,12 @@ int mca_pml_ob1_isend_init(const void *buf, &(sendreq)->req_send.req_base, PERUSE_SEND); + /* Work around a leak in start by marking this request as complete. The + * problem occured because we do not have a way to differentiate an + * inital request and an incomplete pml request in start. This line + * allows us to detect this state. */ + sendreq->req_send.req_base.req_pml_complete = true; + *request = (ompi_request_t *) sendreq; return OMPI_SUCCESS; } diff --git a/ompi/mca/pml/ob1/pml_ob1_start.c b/ompi/mca/pml/ob1/pml_ob1_start.c index fb8a37bb4a..172be7980b 100644 --- a/ompi/mca/pml/ob1/pml_ob1_start.c +++ b/ompi/mca/pml/ob1/pml_ob1_start.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,84 +32,25 @@ int mca_pml_ob1_start(size_t count, ompi_request_t** requests) { int rc; - size_t i; - bool reuse_old_request = true; - for(i=0; ireq_type) { + if (NULL == pml_request || OMPI_REQUEST_PML != requests[i]->req_type) { continue; } - /* If the persistent request is currently active - obtain the - * request lock and verify the status is incomplete. if the - * pml layer has not completed the request - mark the request - * as free called - so that it will be freed when the request + /* If the persistent request is currently active - verify the status + * is incomplete. if the pml layer has not completed the request - mark + * the request as free called - so that it will be freed when the request * completes - and create a new request. */ #if OPAL_ENABLE_MULTI_THREADS opal_atomic_rmb(); #endif - reuse_old_request = true; - switch(pml_request->req_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if(pml_request->req_pml_complete == true) - break; - /* otherwise fall through */ - case OMPI_REQUEST_ACTIVE: { - ompi_request_t *request; - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - break; - } - - reuse_old_request = false; - /* allocate a new request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: { - mca_pml_base_send_mode_t sendmode = - ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; - rc = mca_pml_ob1_isend_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - sendmode, - pml_request->req_comm, - &request); - break; - } - case MCA_PML_REQUEST_RECV: - rc = mca_pml_ob1_irecv_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - pml_request->req_comm, - &request); - break; - default: - rc = OMPI_ERR_REQUEST; - break; - } - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_base_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; + if (OMPI_REQUEST_ACTIVE == pml_request->req_ompi.req_state) { + return OMPI_ERR_REQUEST; } /* start the request */ @@ -119,15 +63,46 @@ int mca_pml_ob1_start(size_t count, ompi_request_t** requests) pml_request->req_addr, pml_request->req_count, pml_request->req_datatype); ); - if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) { + + if (!pml_request->req_pml_complete) { + ompi_request_t *request; + + /* buffered sends can be mpi complete and pml incomplete. to support this + * case we need to allocate a new request. */ + rc = mca_pml_ob1_isend_init (pml_request->req_addr, + pml_request->req_count, + pml_request->req_datatype, + pml_request->req_peer, + pml_request->req_tag, + sendreq->req_send.req_send_mode, + pml_request->req_comm, + &request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* copy the callback and callback data to the new requests */ + request->req_complete_cb = pml_request->req_ompi.req_complete_cb; + request->req_complete_cb_data = pml_request->req_ompi.req_complete_cb_data; + + /* ensure the old request gets released */ + pml_request->req_free_called = true; + + sendreq = (mca_pml_ob1_send_request_t *) request; + requests[i] = request; + } else if (sendreq->req_send.req_bytes_packed != 0) { size_t offset = 0; /** * Reset the convertor in case we're dealing with the original * request, which when completed do not reset the convertor. */ - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &offset ); + opal_convertor_set_position (&sendreq->req_send.req_base.req_convertor, + &offset); } + + /* reset the completion flag */ + pml_request->req_pml_complete = false; + MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc); if(rc != OMPI_SUCCESS) return rc; diff --git a/ompi/mpi/c/startall.c b/ompi/mpi/c/startall.c index 38b5644451..e1718e5e4d 100644 --- a/ompi/mpi/c/startall.c +++ b/ompi/mpi/c/startall.c @@ -88,6 +88,7 @@ int MPI_Startall(int count, MPI_Request requests[]) } } } + ret = MCA_PML_CALL(start(count, requests)); OPAL_CR_EXIT_LIBRARY();