From f8eef0fde99d44653835797b15988ebd3cd3ddc4 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 8 Aug 2019 10:45:40 +0900 Subject: [PATCH 1/3] coll/libnbc: fixes ompi ompi_coll_libnbc_request_t parent base ompi_coll_libnbc_request_t on top of ompi_coll_base_nbc_request_t to correctly support the retention of datatypes/operators This fixes a regression introduced in open-mpi/ompi@0fe756d4166eecf2f0ee2598da690c69a7c824c4 Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/libnbc/coll_libnbc_component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index 456302b937..a91492f337 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -580,6 +580,6 @@ request_construct(ompi_coll_libnbc_request_t *request) OBJ_CLASS_INSTANCE(ompi_coll_libnbc_request_t, - ompi_request_t, + ompi_coll_base_nbc_request_t, request_construct, NULL); From 0862c409f1094cfccf5dc31ae7579676daa30b86 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 8 Aug 2019 10:48:06 +0900 Subject: [PATCH 2/3] coll/base: cleanup ompi_coll_base_nbc_request_t elements Since ompi_coll_base_nbc_request_t is to be used in an opal_free_list_t, it must be returned into a "clean" state. So cleanup some data in the callback completion subroutines. This fixes a regression introduced in open-mpi/ompi@0fe756d4166eecf2f0ee2598da690c69a7c824c4 Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_util.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 5f69519692..708ebb0a0e 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -108,9 +108,11 @@ int ompi_rounddown(int num, int factor) static void release_objs_callback(struct ompi_coll_base_nbc_request_t *request) { if (NULL != request->data.objs.objs[0]) { OBJ_RELEASE(request->data.objs.objs[0]); + request->data.objs.objs[0] = NULL; } if (NULL != request->data.objs.objs[1]) { OBJ_RELEASE(request->data.objs.objs[1]); + request->data.objs.objs[1] = NULL; } } @@ -207,15 +209,21 @@ static void release_vecs_callback(ompi_coll_base_nbc_request_t *request) { } else { scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); } - for (int i=0; idata.vecs.stypes && NULL != request->data.vecs.stypes[i]) { - OMPI_DATATYPE_RELEASE(request->data.vecs.stypes[i]); + if (NULL != request->data.vecs.stypes) { + for (int i=0; idata.vecs.stypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.stypes[i]); + } } + request->data.vecs.stypes = NULL; } - for (int i=0; idata.vecs.rtypes && NULL != request->data.vecs.rtypes[i]) { - OMPI_DATATYPE_RELEASE(request->data.vecs.rtypes[i]); + if (NULL != request->data.vecs.rtypes) { + for (int i=0; idata.vecs.rtypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.rtypes[i]); + } } + request->data.vecs.rtypes = NULL; } } From 63d3ccde9ddc922737fe3e307cc1af3a70474265 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 9 Aug 2019 09:57:56 +0900 Subject: [PATCH 3/3] coll/base: only retain datatypes/op if the request has not yet completed a non blocking collective might return ompi_request_null, so we should not retain anything in that case. Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_util.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 708ebb0a0e..422894e45f 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -141,6 +141,9 @@ int ompi_coll_base_retain_op( ompi_request_t *req, ompi_op_t *op, ompi_datatype_t *type) { ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; bool retain = false; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } if (!ompi_op_is_intrinsic(op)) { OBJ_RETAIN(op); request->data.op.op = op; @@ -177,6 +180,9 @@ int ompi_coll_base_retain_datatypes( ompi_request_t *req, ompi_datatype_t *stype ompi_datatype_t *rtype) { ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; bool retain = false; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } if (NULL != stype && !ompi_datatype_is_predefined(stype)) { OBJ_RETAIN(stype); request->data.types.stype = stype; @@ -254,6 +260,9 @@ int ompi_coll_base_retain_datatypes_w( ompi_request_t *req, bool retain = false; ompi_communicator_t *comm = request->super.req_mpi_object.comm; int scount, rcount; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } if (OMPI_COMM_IS_TOPO(comm)) { (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); } else {