From a112d10c93f289a0a5825c161875f0abe9b0fbab Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Sun, 25 Nov 2018 19:28:36 +0200 Subject: [PATCH] pml_ucx: initialize req_mpi_object.comm for error handler without this fix, an error handler invoked on pml_ucx request would segfault while trying to dereference requests[i]->req_mpi_object.comm (picked from master f36eeef) Signed-off-by: Yossi Itigin --- ompi/mca/pml/ucx/pml_ucx.c | 37 +++++++++++++++++------------- ompi/mca/pml/ucx/pml_ucx_request.c | 2 +- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 2e10a3b768..bcb689e1de 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -477,11 +477,12 @@ int mca_pml_ucx_irecv_init(void *buf, size_t count, ompi_datatype_t *datatype, PML_UCX_TRACE_RECV("irecv_init request *%p=%p", buf, count, datatype, src, tag, comm, (void*)request, (void*)req); - req->ompi.req_state = OMPI_REQUEST_INACTIVE; - req->flags = 0; - req->buffer = buf; - req->count = count; - req->datatype.datatype = mca_pml_ucx_get_datatype(datatype); + req->ompi.req_state = OMPI_REQUEST_INACTIVE; + req->ompi.req_mpi_object.comm = comm; + req->flags = 0; + req->buffer = buf; + req->count = count; + req->datatype.datatype = mca_pml_ucx_get_datatype(datatype); PML_UCX_MAKE_RECV_TAG(req->tag, req->recv.tag_mask, tag, src, comm); @@ -510,7 +511,8 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype, } PML_UCX_VERBOSE(8, "got request %p", (void*)req); - *request = req; + req->req_mpi_object.comm = comm; + *request = req; return OMPI_SUCCESS; } @@ -582,13 +584,15 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat return OMPI_ERROR; } - req->ompi.req_state = OMPI_REQUEST_INACTIVE; - req->flags = MCA_PML_UCX_REQUEST_FLAG_SEND; - req->buffer = (void *)buf; - req->count = count; - req->tag = PML_UCX_MAKE_SEND_TAG(tag, comm); - req->send.mode = mode; - req->send.ep = ep; + req->ompi.req_state = OMPI_REQUEST_INACTIVE; + req->ompi.req_mpi_object.comm = comm; + req->flags = MCA_PML_UCX_REQUEST_FLAG_SEND; + req->buffer = (void *)buf; + req->count = count; + req->tag = PML_UCX_MAKE_SEND_TAG(tag, comm); + req->send.mode = mode; + req->send.ep = ep; + if (MCA_PML_BASE_SEND_BUFFERED == mode) { req->datatype.ompi_datatype = datatype; OBJ_RETAIN(datatype); @@ -706,7 +710,8 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, return OMPI_SUCCESS; } else if (!UCS_PTR_IS_ERR(req)) { PML_UCX_VERBOSE(8, "got request %p", (void*)req); - *request = req; + req->req_mpi_object.comm = comm; + *request = req; return OMPI_SUCCESS; } else { PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); @@ -786,7 +791,7 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i } int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, - int *matched, ompi_status_public_t* mpi_status) + int *matched, ompi_status_public_t* mpi_status) { static unsigned progress_count = 0; @@ -811,7 +816,7 @@ int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, } int mca_pml_ucx_probe(int src, int tag, struct ompi_communicator_t* comm, - ompi_status_public_t* mpi_status) + ompi_status_public_t* mpi_status) { ucp_tag_t ucp_tag, ucp_tag_mask; ucp_tag_recv_info_t info; diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c index 8602386e0c..536ac95e79 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.c +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -228,7 +228,7 @@ void mca_pml_ucx_completed_request_init(ompi_request_t *ompi_req) mca_pml_ucx_request_init_common(ompi_req, false, OMPI_REQUEST_ACTIVE, mca_pml_completed_request_free, mca_pml_completed_request_cancel); + ompi_req->req_mpi_object.comm = &ompi_mpi_comm_world.comm; ompi_request_complete(ompi_req, false); - }