1
1

Fix the atomic management of the bcast and reduce freelist

API consistent with other collective modules
Add comments
Other minor cleanups.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2020-05-07 14:42:02 -04:00 коммит произвёл Jeff Squyres
родитель a4be3bb93d
Коммит d71264569e
16 изменённых файлов: 185 добавлений и 329 удалений

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2017 The University of Tennessee and The University * Copyright (c) 2004-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -188,9 +188,13 @@ struct ompi_communicator_t {
/* Collectives module interface and data */ /* Collectives module interface and data */
mca_coll_base_comm_coll_t *c_coll; mca_coll_base_comm_coll_t *c_coll;
/* Non-blocking collective tag */ /* Non-blocking collective tag. These are added here as they should be
_Atomic int32_t c_ibcast_tag; * shared between all non-blocking collective modules (to avoid message
_Atomic int32_t c_ireduce_tag; * collisions between them in the case where multiple outstanding
* non-blocking collective coexists using multiple backends).
*/
opal_atomic_int32_t c_ibcast_tag;
opal_atomic_int32_t c_ireduce_tag;
}; };
typedef struct ompi_communicator_t ompi_communicator_t; typedef struct ompi_communicator_t ompi_communicator_t;

Просмотреть файл

@ -1,5 +1,5 @@
# #
# Copyright (c) 2014 The University of Tennessee and The University # Copyright (c) 2014-2020 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights # of Tennessee Research Foundation. All rights
# reserved. # reserved.
# $COPYRIGHT$ # $COPYRIGHT$
@ -13,11 +13,11 @@
sources = \ sources = \
coll_adapt_component.c \ coll_adapt_component.c \
coll_adapt_module.c \ coll_adapt_module.c \
coll_adapt_bcast.c \ coll_adapt_bcast.c \
coll_adapt_ibcast.c \ coll_adapt_ibcast.c \
coll_adapt_reduce.c \ coll_adapt_reduce.c \
coll_adapt_ireduce.c \ coll_adapt_ireduce.c \
coll_adapt.h \ coll_adapt.h \
coll_adapt_algorithms.h \ coll_adapt_algorithms.h \
coll_adapt_context.h \ coll_adapt_context.h \
coll_adapt_context.c \ coll_adapt_context.c \

Просмотреть файл

@ -38,8 +38,9 @@ typedef struct mca_coll_adapt_component_t {
/* MCA parameter: Priority of this component */ /* MCA parameter: Priority of this component */
int adapt_priority; int adapt_priority;
/* MCA parameter: Output verbose level */ /* MCA parameter: Output stream and verbose level */
int adapt_output; int adapt_output;
int adapt_verbose;
/* MCA parameter: Maximum number of segment in context free list */ /* MCA parameter: Maximum number of segment in context free list */
int adapt_context_free_list_max; int adapt_context_free_list_max;
@ -57,7 +58,6 @@ typedef struct mca_coll_adapt_component_t {
int adapt_ibcast_max_recv_requests; int adapt_ibcast_max_recv_requests;
/* Bcast free list */ /* Bcast free list */
opal_free_list_t *adapt_ibcast_context_free_list; opal_free_list_t *adapt_ibcast_context_free_list;
opal_atomic_int32_t adapt_ibcast_context_free_list_enabled;
/* Reduce MCA parameter */ /* Reduce MCA parameter */
int adapt_ireduce_algorithm; int adapt_ireduce_algorithm;
@ -70,7 +70,6 @@ typedef struct mca_coll_adapt_component_t {
/* Reduce free list */ /* Reduce free list */
opal_free_list_t *adapt_ireduce_context_free_list; opal_free_list_t *adapt_ireduce_context_free_list;
opal_atomic_int32_t adapt_ireduce_context_free_list_enabled;
} mca_coll_adapt_component_t; } mca_coll_adapt_component_t;
@ -91,7 +90,7 @@ OMPI_MODULE_DECLSPEC extern mca_coll_adapt_component_t mca_coll_adapt_component;
int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads); int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads);
mca_coll_base_module_t * ompi_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority); mca_coll_base_module_t * ompi_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority);
/* Free ADAPT quest */ /* ADAPT request free */
int ompi_coll_adapt_request_free(ompi_request_t **request); int ompi_coll_adapt_request_free(ompi_request_t **request);
#endif /* MCA_COLL_ADAPT_EXPORT_H */ #endif /* MCA_COLL_ADAPT_EXPORT_H */

Просмотреть файл

@ -20,82 +20,45 @@ typedef struct ompi_coll_adapt_algorithm_index_s {
} ompi_coll_adapt_algorithm_index_t; } ompi_coll_adapt_algorithm_index_t;
/* Bcast */ /* Bcast */
int ompi_coll_adapt_ibcast_init(void); int ompi_coll_adapt_ibcast_register(void);
int ompi_coll_adapt_ibcast_fini(void); int ompi_coll_adapt_ibcast_fini(void);
int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, int ompi_coll_adapt_bcast(BCAST_ARGS);
struct ompi_communicator_t *comm, mca_coll_base_module_t * module); int ompi_coll_adapt_ibcast(IBCAST_ARGS);
int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, int ompi_coll_adapt_ibcast_generic(IBCAST_ARGS,
struct ompi_communicator_t *comm, ompi_request_t ** request, ompi_coll_tree_t * tree, size_t seg_size, int ibcast_tag);
mca_coll_base_module_t * module); int ompi_coll_adapt_ibcast_binomial(IBCAST_ARGS,
int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, int ibcast_tag);
struct ompi_communicator_t *comm, ompi_request_t ** request, int ompi_coll_adapt_ibcast_in_order_binomial(IBCAST_ARGS,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree, int ibcast_tag);
size_t seg_size, int ibcast_tag); int ompi_coll_adapt_ibcast_binary(IBCAST_ARGS,
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int ibcast_tag);
int root, struct ompi_communicator_t *comm, int ompi_coll_adapt_ibcast_pipeline(IBCAST_ARGS,
ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag);
int ibcast_tag); int ompi_coll_adapt_ibcast_chain(IBCAST_ARGS,
int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int ibcast_tag);
int root, struct ompi_communicator_t *comm, int ompi_coll_adapt_ibcast_linear(IBCAST_ARGS,
ompi_request_t ** request, int ibcast_tag);
mca_coll_base_module_t * module, int ibcast_tag); int ompi_coll_adapt_ibcast_tuned(IBCAST_ARGS,
int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, int ibcast_tag);
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module,
int ibcast_tag);
int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t *module, int ibcast_tag);
/* Reduce */ /* Reduce */
int ompi_coll_adapt_ireduce_init(void); int ompi_coll_adapt_ireduce_register(void);
int ompi_coll_adapt_ireduce_fini(void); int ompi_coll_adapt_ireduce_fini(void);
int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, int ompi_coll_adapt_reduce(REDUCE_ARGS);
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, int ompi_coll_adapt_ireduce(IREDUCE_ARGS);
mca_coll_base_module_t * module); int ompi_coll_adapt_ireduce_generic(IREDUCE_ARGS,
int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, ompi_coll_tree_t * tree, size_t seg_size, int ireduce_tag);
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, int ompi_coll_adapt_ireduce_tuned(IREDUCE_ARGS,
ompi_request_t ** request, mca_coll_base_module_t * module); int ireduce_tag);
int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, int ompi_coll_adapt_ireduce_binomial(IREDUCE_ARGS,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, int ireduce_tag);
struct ompi_communicator_t *comm, ompi_request_t ** request, int ompi_coll_adapt_ireduce_in_order_binomial(IREDUCE_ARGS,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree, int ireduce_tag);
size_t seg_size, int ireduce_tag); int ompi_coll_adapt_ireduce_binary(IREDUCE_ARGS,
int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count, int ireduce_tag);
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, int ompi_coll_adapt_ireduce_pipeline(IREDUCE_ARGS,
struct ompi_communicator_t *comm, ompi_request_t ** request, int ireduce_tag);
mca_coll_base_module_t *module, int ireduce_tag); int ompi_coll_adapt_ireduce_chain(IREDUCE_ARGS,
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count, int ireduce_tag);
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, int ompi_coll_adapt_ireduce_linear(IREDUCE_ARGS,
struct ompi_communicator_t *comm, ompi_request_t ** request, int ireduce_tag);
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);

Просмотреть файл

@ -17,10 +17,13 @@ int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatyp
{ {
if (count == 0) { if (count == 0) {
return MPI_SUCCESS; return MPI_SUCCESS;
} else {
ompi_request_t *request;
int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
} }
ompi_request_t *request = NULL;
int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
if( MPI_SUCCESS != err ) {
if( NULL == request )
return err;
}
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
} }

Просмотреть файл

@ -65,11 +65,10 @@ mca_coll_adapt_component_t mca_coll_adapt_component = {
/* adapt-component specific information */ /* adapt-component specific information */
/* (default) priority */ 0, /* (default) priority */
0,
/* (default) verbose level */ 0, /* (default) output stream */
0, 0, /* (default) verbose level */
/* default values for non-MCA parameters */ /* default values for non-MCA parameters */
/* Not specifying values here gives us all 0's */ /* Not specifying values here gives us all 0's */
@ -78,25 +77,13 @@ mca_coll_adapt_component_t mca_coll_adapt_component = {
/* Open the component */ /* Open the component */
static int adapt_open(void) static int adapt_open(void)
{ {
int param;
mca_coll_adapt_component_t *cs = &mca_coll_adapt_component; mca_coll_adapt_component_t *cs = &mca_coll_adapt_component;
/* if (cs->adapt_verbose > 0) {
* Get the global coll verbosity: it will be ours cs->adapt_output = opal_output_open(NULL);
*/ opal_output_set_verbosity(cs->adapt_output, cs->adapt_verbose);
param = mca_base_var_find("ompi", "coll", "base", "verbose");
if (param >= 0) {
const int *verbose = NULL;
mca_base_var_get_value(param, &verbose, NULL, NULL);
if (verbose && verbose[0] > 0) {
cs->adapt_output = opal_output_open(NULL);
opal_output_set_verbosity(cs->adapt_output, verbose[0]);
}
} }
opal_output_verbose(1, cs->adapt_output,
"coll:adapt:component_open: done!");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -131,16 +118,14 @@ static int adapt_register(void)
OPAL_INFO_LVL_9, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority); MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority);
int adapt_verbose = 0; cs->adapt_verbose = ompi_coll_base_framework.framework_verbose;
(void) mca_base_component_var_register(c, "verbose", (void) mca_base_component_var_register(c, "verbose",
"Verbose level", "Verbose level (default set to the collective framework verbosity)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &adapt_verbose); MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_verbose);
cs->adapt_output = opal_output_open(NULL);
opal_output_set_verbosity(cs->adapt_output, adapt_verbose);
cs->adapt_context_free_list_min = 10; cs->adapt_context_free_list_min = 64;
(void) mca_base_component_var_register(c, "context_free_list_min", (void) mca_base_component_var_register(c, "context_free_list_min",
"Minimum number of segments in context free list", "Minimum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@ -148,7 +133,7 @@ static int adapt_register(void)
MCA_BASE_VAR_SCOPE_READONLY, MCA_BASE_VAR_SCOPE_READONLY,
&cs->adapt_context_free_list_min); &cs->adapt_context_free_list_min);
cs->adapt_context_free_list_max = 10000; cs->adapt_context_free_list_max = 1024;
(void) mca_base_component_var_register(c, "context_free_list_max", (void) mca_base_component_var_register(c, "context_free_list_max",
"Maximum number of segments in context free list", "Maximum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@ -156,15 +141,15 @@ static int adapt_register(void)
MCA_BASE_VAR_SCOPE_READONLY, MCA_BASE_VAR_SCOPE_READONLY,
&cs->adapt_context_free_list_max); &cs->adapt_context_free_list_max);
cs->adapt_context_free_list_inc = 10; cs->adapt_context_free_list_inc = 32;
(void) mca_base_component_var_register(c, "context_free_list_inc", (void) mca_base_component_var_register(c, "context_free_list_inc",
"Increasement number of segments in context free list", "Increasement number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, MCA_BASE_VAR_SCOPE_READONLY,
&cs->adapt_context_free_list_inc); &cs->adapt_context_free_list_inc);
ompi_coll_adapt_ibcast_init(); ompi_coll_adapt_ibcast_register();
ompi_coll_adapt_ireduce_init(); ompi_coll_adapt_ireduce_register();
return adapt_verify_mca_variables(); return adapt_verify_mca_variables();
} }

Просмотреть файл

@ -12,58 +12,15 @@
#include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/coll.h"
#include "coll_adapt_context.h" #include "coll_adapt_context.h"
static void ompi_coll_adapt_bcast_context_constructor(ompi_coll_adapt_bcast_context_t * bcast_context)
{
}
static void ompi_coll_adapt_bcast_context_destructor(ompi_coll_adapt_bcast_context_t * bcast_context)
{
}
static void
ompi_coll_adapt_constant_bcast_context_constructor(ompi_coll_adapt_constant_bcast_context_t * con)
{
}
static void ompi_coll_adapt_constant_bcast_context_destructor(ompi_coll_adapt_constant_bcast_context_t
* con)
{
}
OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t, OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t,
ompi_coll_adapt_bcast_context_constructor, NULL, NULL);
ompi_coll_adapt_bcast_context_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_bcast_context_t, opal_object_t, OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_bcast_context_t, opal_object_t,
ompi_coll_adapt_constant_bcast_context_constructor, NULL, NULL);
ompi_coll_adapt_constant_bcast_context_destructor);
static void ompi_coll_adapt_reduce_context_constructor(ompi_coll_adapt_reduce_context_t *
reduce_context)
{
}
static void ompi_coll_adapt_reduce_context_destructor(ompi_coll_adapt_reduce_context_t *
reduce_context)
{
}
static void
ompi_coll_adapt_constant_reduce_context_constructor(ompi_coll_adapt_constant_reduce_context_t * con)
{
}
static void
ompi_coll_adapt_constant_reduce_context_destructor(ompi_coll_adapt_constant_reduce_context_t * con)
{
}
OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t, OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t,
ompi_coll_adapt_reduce_context_constructor, NULL, NULL);
ompi_coll_adapt_reduce_context_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t, OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t,
ompi_coll_adapt_constant_reduce_context_constructor, NULL, NULL);
ompi_coll_adapt_constant_reduce_context_destructor);

Просмотреть файл

@ -43,7 +43,7 @@ static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ibcast_algorithm_index[
/* /*
* Set up MCA parameters of MPI_Bcast and MPI_IBcast * Set up MCA parameters of MPI_Bcast and MPI_IBcast
*/ */
int ompi_coll_adapt_ibcast_init(void) int ompi_coll_adapt_ibcast_register(void)
{ {
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version; mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
@ -78,7 +78,6 @@ int ompi_coll_adapt_ibcast_init(void)
&mca_coll_adapt_component.adapt_ibcast_max_recv_requests); &mca_coll_adapt_component.adapt_ibcast_max_recv_requests);
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL; mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -90,7 +89,6 @@ int ompi_coll_adapt_ibcast_fini(void)
if (NULL != mca_coll_adapt_component.adapt_ibcast_context_free_list) { if (NULL != mca_coll_adapt_component.adapt_ibcast_context_free_list) {
OBJ_RELEASE(mca_coll_adapt_component.adapt_ibcast_context_free_list); OBJ_RELEASE(mca_coll_adapt_component.adapt_ibcast_context_free_list);
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL; mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n")); OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n"));
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -179,7 +177,6 @@ static int send_cb(ompi_request_t * req)
int num_sent = ++(context->con->num_sent_segs); int num_sent = ++(context->con->num_sent_segs);
int num_recv_fini_t = context->con->num_recv_fini; int num_recv_fini_t = context->con->num_recv_fini;
int rank = ompi_comm_rank(context->con->comm); int rank = ompi_comm_rank(context->con->comm);
opal_mutex_t *mutex_temp = context->con->mutex;
/* Check whether signal the condition */ /* Check whether signal the condition */
if ((rank == context->con->root if ((rank == context->con->root
&& num_sent == context->con->tree->tree_nextsize * context->con->num_segs) && num_sent == context->con->tree->tree_nextsize * context->con->num_segs)
@ -190,13 +187,13 @@ static int send_cb(ompi_request_t * req)
context->con->num_segs)) { context->con->num_segs)) {
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in send\n", OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in send\n",
ompi_comm_rank(context->con->comm))); ompi_comm_rank(context->con->comm)));
OPAL_THREAD_UNLOCK(mutex_temp); OPAL_THREAD_UNLOCK(context->con->mutex);
ibcast_request_fini(context); ibcast_request_fini(context);
} else { } else {
OBJ_RELEASE(context->con); OBJ_RELEASE(context->con);
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list, opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
(opal_free_list_item_t *) context); (opal_free_list_item_t *) context);
OPAL_THREAD_UNLOCK(mutex_temp); OPAL_THREAD_UNLOCK(context->con->mutex);
} }
req->req_free(&req); req->req_free(&req);
/* Call back function return 1, which means successful */ /* Call back function return 1, which means successful */
@ -306,7 +303,6 @@ static int recv_cb(ompi_request_t * req)
int num_sent = context->con->num_sent_segs; int num_sent = context->con->num_sent_segs;
int num_recv_fini_t = ++(context->con->num_recv_fini); int num_recv_fini_t = ++(context->con->num_recv_fini);
int rank = ompi_comm_rank(context->con->comm); int rank = ompi_comm_rank(context->con->comm);
opal_mutex_t *mutex_temp = context->con->mutex;
/* If this process is leaf and has received all the segments */ /* If this process is leaf and has received all the segments */
if ((rank == context->con->root if ((rank == context->con->root
@ -318,13 +314,13 @@ static int recv_cb(ompi_request_t * req)
context->con->num_segs)) { context->con->num_segs)) {
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in recv\n", OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in recv\n",
ompi_comm_rank(context->con->comm))); ompi_comm_rank(context->con->comm)));
OPAL_THREAD_UNLOCK(mutex_temp); OPAL_THREAD_UNLOCK(context->con->mutex);
ibcast_request_fini(context); ibcast_request_fini(context);
} else { } else {
OBJ_RELEASE(context->con); OBJ_RELEASE(context->con);
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list, opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
(opal_free_list_item_t *) context); (opal_free_list_item_t *) context);
OPAL_THREAD_UNLOCK(mutex_temp); OPAL_THREAD_UNLOCK(context->con->mutex);
} }
req->req_free(&req); req->req_free(&req);
return 1; return 1;
@ -334,9 +330,8 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
struct ompi_communicator_t *comm, ompi_request_t ** request, struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module) mca_coll_base_module_t * module)
{ {
if (count == 0) { if (0 == count) {
ompi_request_t *temp_request; ompi_request_t *temp_request = OBJ_NEW(ompi_request_t);
temp_request = OBJ_NEW(ompi_request_t);
OMPI_REQUEST_INIT(temp_request, false); OMPI_REQUEST_INIT(temp_request, false);
temp_request->req_type = 0; temp_request->req_type = 0;
temp_request->req_free = ompi_coll_adapt_request_free; temp_request->req_free = ompi_coll_adapt_request_free;
@ -348,24 +343,22 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
ompi_request_complete(temp_request, 1); ompi_request_complete(temp_request, 1);
*request = temp_request; *request = temp_request;
return MPI_SUCCESS; return MPI_SUCCESS;
} else {
int rank = ompi_comm_rank(comm);
if (rank == root) {
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ibcast root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
root, mca_coll_adapt_component.adapt_ibcast_algorithm,
mca_coll_adapt_component.adapt_ibcast_segment_size,
mca_coll_adapt_component.adapt_ibcast_max_send_requests,
mca_coll_adapt_component.adapt_ibcast_max_recv_requests));
}
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
ibcast_tag = ibcast_tag % 4096;
ompi_coll_adapt_ibcast_fn_t bcast_func =
(ompi_coll_adapt_ibcast_fn_t)
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
algorithm_fn_ptr;
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
} }
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
ibcast_tag = ibcast_tag % 4096;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ibcast tag %d root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
ibcast_tag, root, mca_coll_adapt_component.adapt_ibcast_algorithm,
mca_coll_adapt_component.adapt_ibcast_segment_size,
mca_coll_adapt_component.adapt_ibcast_max_send_requests,
mca_coll_adapt_component.adapt_ibcast_max_recv_requests));
ompi_coll_adapt_ibcast_fn_t bcast_func =
(ompi_coll_adapt_ibcast_fn_t)
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
algorithm_fn_ptr;
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
} }
/* /*
@ -377,7 +370,7 @@ int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *
mca_coll_base_module_t *module, int ibcast_tag) mca_coll_base_module_t *module, int ibcast_tag)
{ {
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n")); OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
return OMPI_SUCCESS; return OMPI_ERR_NOT_IMPLEMENTED;
} }
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
@ -471,12 +464,7 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
mca_coll_base_module_t * module, ompi_coll_tree_t * tree, mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
size_t seg_size, int ibcast_tag) size_t seg_size, int ibcast_tag)
{ {
/* Tempory variables for iteration */ int i, j, rank, err;
int i, j;
/* Rank of this process */
int rank;
/* Record return value */
int err;
/* The min of num_segs and SEND_NUM or RECV_NUM, in case the num_segs is less than SEND_NUM or RECV_NUM */ /* The min of num_segs and SEND_NUM or RECV_NUM, in case the num_segs is less than SEND_NUM or RECV_NUM */
int min; int min;
@ -498,23 +486,21 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
/* Record how many isends have been issued for every child */ /* Record how many isends have been issued for every child */
int *send_array = NULL; int *send_array = NULL;
/* Set up free list */ /* Atomically set up free list */
if (0 == mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled) { if (NULL == mca_coll_adapt_component.adapt_ibcast_context_free_list) {
int32_t context_free_list_enabled = opal_free_list_t* fl = OBJ_NEW(opal_free_list_t);
opal_atomic_add_fetch_32(& opal_free_list_init(fl,
(mca_coll_adapt_component. sizeof(ompi_coll_adapt_bcast_context_t),
adapt_ibcast_context_free_list_enabled), 1); opal_cache_line_size,
if (1 == context_free_list_enabled) { OBJ_CLASS(ompi_coll_adapt_bcast_context_t),
mca_coll_adapt_component.adapt_ibcast_context_free_list = OBJ_NEW(opal_free_list_t); 0, opal_cache_line_size,
opal_free_list_init(mca_coll_adapt_component.adapt_ibcast_context_free_list, mca_coll_adapt_component.adapt_context_free_list_min,
sizeof(ompi_coll_adapt_bcast_context_t), mca_coll_adapt_component.adapt_context_free_list_max,
opal_cache_line_size, mca_coll_adapt_component.adapt_context_free_list_inc,
OBJ_CLASS(ompi_coll_adapt_bcast_context_t), NULL, 0, NULL, NULL, NULL);
0, opal_cache_line_size, if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *)&mca_coll_adapt_component.adapt_ibcast_context_free_list,
mca_coll_adapt_component.adapt_context_free_list_min, &(intptr_t){0}, fl) ) {
mca_coll_adapt_component.adapt_context_free_list_max, OBJ_RELEASE(fl);
mca_coll_adapt_component.adapt_context_free_list_inc,
NULL, 0, NULL, NULL, NULL);
} }
} }

Просмотреть файл

@ -12,13 +12,5 @@
#include "coll_adapt.h" #include "coll_adapt.h"
#include "coll_adapt_inbuf.h" #include "coll_adapt_inbuf.h"
static void ompi_coll_adapt_inbuf_constructor(ompi_coll_adapt_inbuf_t * inbuf) OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t,
{ NULL, NULL);
}
static void ompi_coll_adapt_inbuf_destructor(ompi_coll_adapt_inbuf_t * inbuf)
{
}
OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t, ompi_coll_adapt_inbuf_constructor,
ompi_coll_adapt_inbuf_destructor);

Просмотреть файл

@ -47,7 +47,7 @@ static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ireduce_algorithm_index
/* /*
* Set up MCA parameters of MPI_Reduce and MPI_Ireduce * Set up MCA parameters of MPI_Reduce and MPI_Ireduce
*/ */
int ompi_coll_adapt_ireduce_init(void) int ompi_coll_adapt_ireduce_register(void)
{ {
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version; mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
@ -107,7 +107,6 @@ int ompi_coll_adapt_ireduce_init(void)
&mca_coll_adapt_component.adapt_inbuf_free_list_inc); &mca_coll_adapt_component.adapt_inbuf_free_list_inc);
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL; mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled = 0;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -119,7 +118,6 @@ int ompi_coll_adapt_ireduce_fini(void)
if (NULL != mca_coll_adapt_component.adapt_ireduce_context_free_list) { if (NULL != mca_coll_adapt_component.adapt_ireduce_context_free_list) {
OBJ_RELEASE(mca_coll_adapt_component.adapt_ireduce_context_free_list); OBJ_RELEASE(mca_coll_adapt_component.adapt_ireduce_context_free_list);
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL; mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled = 0;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ireduce fini\n")); OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ireduce fini\n"));
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -148,26 +146,22 @@ static ompi_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_c
static int add_to_list(opal_list_t * list, int id) static int add_to_list(opal_list_t * list, int id)
{ {
ompi_coll_adapt_item_t *item; ompi_coll_adapt_item_t *item;
int ret = 0;
for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list); for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list);
item != (ompi_coll_adapt_item_t *) opal_list_get_end(list); item != (ompi_coll_adapt_item_t *) opal_list_get_end(list);
item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) { item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
if (item->id == id) { if (item->id == id) {
(item->count)++; (item->count)++;
ret = 1; OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return 1\n"));
break; return 1;
} }
} }
if (ret == 0) { /* Add a new object to the list with count set to 1 */
item = OBJ_NEW(ompi_coll_adapt_item_t); item = OBJ_NEW(ompi_coll_adapt_item_t);
item->id = id; item->id = id;
item->count = 1; item->count = 1;
opal_list_append(list, (opal_list_item_t *) item); opal_list_append(list, (opal_list_item_t *) item);
ret = 2; OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return 1\n"));
} return 2;
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return %d\n",
ret));
return ret;
} }
/* /*
@ -250,7 +244,6 @@ static int send_cb(ompi_request_t * req)
adapt_ireduce_context_free_list); adapt_ireduce_context_free_list);
if (context->con->tree->tree_nextsize > 0) { if (context->con->tree->tree_nextsize > 0) {
send_context->buff = context->con->accumbuf[item->id]; send_context->buff = context->con->accumbuf[item->id];
} else { } else {
send_context->buff = send_context->buff =
context->buff + (item->id - context->frag_id) * context->con->segment_increment; context->buff + (item->id - context->frag_id) * context->con->segment_increment;
@ -530,26 +523,22 @@ int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi
{ {
if (count == 0) { if (count == 0) {
return MPI_SUCCESS; return MPI_SUCCESS;
} else {
int rank = ompi_comm_rank(comm);
if (rank == root) {
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ireduce root %d, algorithm %d, coll_adapt_ireduce_segment_size %zu, coll_adapt_ireduce_max_send_requests %d, coll_adapt_ireduce_max_recv_requests %d\n",
root, mca_coll_adapt_component.adapt_ireduce_algorithm,
mca_coll_adapt_component.adapt_ireduce_segment_size,
mca_coll_adapt_component.adapt_ireduce_max_send_requests,
mca_coll_adapt_component.adapt_ireduce_max_recv_requests));
}
/* Get ireduce tag */
int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1);
ireduce_tag = (ireduce_tag % 4096) + 4096;
fflush(stdout);
ompi_coll_adapt_ireduce_fn_t reduce_func =
(ompi_coll_adapt_ireduce_fn_t)
ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
adapt_ireduce_algorithm].algorithm_fn_ptr;
return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag);
} }
int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1);
ireduce_tag = (ireduce_tag % 4096) + 4096;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ireduce tag %d root %d, algorithm %d, coll_adapt_ireduce_segment_size %zu, coll_adapt_ireduce_max_send_requests %d, coll_adapt_ireduce_max_recv_requests %d\n",
ireduce_tag, root, mca_coll_adapt_component.adapt_ireduce_algorithm,
mca_coll_adapt_component.adapt_ireduce_segment_size,
mca_coll_adapt_component.adapt_ireduce_max_send_requests,
mca_coll_adapt_component.adapt_ireduce_max_recv_requests));
ompi_coll_adapt_ireduce_fn_t reduce_func =
(ompi_coll_adapt_ireduce_fn_t)
ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
adapt_ireduce_algorithm].algorithm_fn_ptr;
return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag);
} }
/* /*
@ -562,7 +551,7 @@ int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count,
mca_coll_base_module_t *module, int ireduce_tag) mca_coll_base_module_t *module, int ireduce_tag)
{ {
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n")); OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
return OMPI_SUCCESS; return OMPI_ERR_NOT_IMPLEMENTED;
} }
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count, int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
@ -688,23 +677,21 @@ int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
ompi_datatype_get_true_extent(dtype, &true_lower_bound, &true_extent); ompi_datatype_get_true_extent(dtype, &true_lower_bound, &true_extent);
real_seg_size = true_extent + (ptrdiff_t) (seg_count - 1) * extent; real_seg_size = true_extent + (ptrdiff_t) (seg_count - 1) * extent;
/* Set up free list */ /* Atomically set up free list */
if (0 == mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled) { if (NULL == mca_coll_adapt_component.adapt_ireduce_context_free_list) {
int32_t context_free_list_enabled = opal_free_list_t* fl = OBJ_NEW(opal_free_list_t);
opal_atomic_add_fetch_32(& opal_free_list_init(fl,
(mca_coll_adapt_component. sizeof(ompi_coll_adapt_reduce_context_t),
adapt_ireduce_context_free_list_enabled), 1); opal_cache_line_size,
if (1 == context_free_list_enabled) { OBJ_CLASS(ompi_coll_adapt_reduce_context_t),
mca_coll_adapt_component.adapt_ireduce_context_free_list = OBJ_NEW(opal_free_list_t); 0, opal_cache_line_size,
opal_free_list_init(mca_coll_adapt_component.adapt_ireduce_context_free_list, mca_coll_adapt_component.adapt_context_free_list_min,
sizeof(ompi_coll_adapt_reduce_context_t), mca_coll_adapt_component.adapt_context_free_list_max,
opal_cache_line_size, mca_coll_adapt_component.adapt_context_free_list_inc,
OBJ_CLASS(ompi_coll_adapt_reduce_context_t), NULL, 0, NULL, NULL, NULL);
0, opal_cache_line_size, if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *)&mca_coll_adapt_component.adapt_ireduce_context_free_list,
mca_coll_adapt_component.adapt_context_free_list_min, &(intptr_t){0}, fl) ) {
mca_coll_adapt_component.adapt_context_free_list_max, OBJ_RELEASE(fl);
mca_coll_adapt_component.adapt_context_free_list_inc,
NULL, 0, NULL, NULL, NULL);
} }
} }

Просмотреть файл

@ -11,13 +11,5 @@
#include "coll_adapt_item.h" #include "coll_adapt_item.h"
static void ompi_coll_adapt_item_constructor(ompi_coll_adapt_item_t * item) OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t,
{ NULL, NULL);
}
static void ompi_coll_adapt_item_destructor(ompi_coll_adapt_item_t * item)
{
}
OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t, ompi_coll_adapt_item_constructor,
ompi_coll_adapt_item_destructor);

Просмотреть файл

@ -16,7 +16,7 @@ struct ompi_coll_adapt_item_s {
opal_list_item_t super; opal_list_item_t super;
/* Fragment id */ /* Fragment id */
int id; int id;
/* The number of children which have received the current segment from */ /* The number of children which have received the current segment */
int count; int count;
}; };

Просмотреть файл

@ -19,11 +19,13 @@ int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_
{ {
if (count == 0) { if (count == 0) {
return MPI_SUCCESS; return MPI_SUCCESS;
} else {
ompi_request_t *request;
int err =
ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
} }
ompi_request_t *request = NULL;
int err = ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
if( MPI_SUCCESS != err ) {
if( NULL == request )
return err;
}
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
} }

Просмотреть файл

@ -492,10 +492,6 @@ struct mca_coll_base_comm_t {
/* in-order binary tree (root of the in-order binary tree is rank 0) */ /* in-order binary tree (root of the in-order binary tree is rank 0) */
ompi_coll_tree_t *cached_in_order_bintree; ompi_coll_tree_t *cached_in_order_bintree;
/* linear */
ompi_coll_tree_t *cached_linear;
int cached_linear_root;
}; };
typedef struct mca_coll_base_comm_t mca_coll_base_comm_t; typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t); OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);

Просмотреть файл

@ -215,17 +215,8 @@ static int tuned_open(void)
int rc; int rc;
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
{ if (ompi_coll_base_framework.framework_verbose) {
int param; ompi_coll_tuned_stream = opal_output_open(NULL);
param = mca_base_var_find("ompi", "coll", "base", "verbose");
if (param >= 0) {
const int *verbose = NULL;
mca_base_var_get_value(param, &verbose, NULL, NULL);
if (verbose && verbose[0] > 0) {
ompi_coll_tuned_stream = opal_output_open(NULL);
}
}
} }
#endif /* OPAL_ENABLE_DEBUG */ #endif /* OPAL_ENABLE_DEBUG */

Просмотреть файл

@ -438,9 +438,10 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
int rc = 0; int rc = 0;
if(NULL != request->req_complete_cb) { if(NULL != request->req_complete_cb) {
ompi_request_complete_fn_t temp = request->req_complete_cb; /* Set the request cb to NULL to allow resetting in the callback */
ompi_request_complete_fn_t fct = request->req_complete_cb;
request->req_complete_cb = NULL; request->req_complete_cb = NULL;
rc = temp( request ); rc = fct( request );
} }
if (0 == rc) { if (0 == rc) {
@ -454,9 +455,8 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
if( REQUEST_PENDING != tmp_sync ) if( REQUEST_PENDING != tmp_sync )
wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR); wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR);
} }
} else { } else
request->req_complete = REQUEST_COMPLETED; request->req_complete = REQUEST_COMPLETED;
}
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -468,14 +468,13 @@ static inline int ompi_request_set_callback(ompi_request_t* request,
{ {
request->req_complete_cb_data = cb_data; request->req_complete_cb_data = cb_data;
request->req_complete_cb = cb; request->req_complete_cb = cb;
int rc = 0;
/* If request is completed and the callback is not called, need to call callback */ /* If request is completed and the callback is not called, need to call callback */
if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) { if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) {
ompi_request_complete_fn_t temp = request->req_complete_cb; ompi_request_complete_fn_t fct = request->req_complete_cb;
request->req_complete_cb = NULL; request->req_complete_cb = NULL;
rc = temp( request ); return fct( request );
} }
return rc; return OMPI_SUCCESS;
} }
END_C_DECLS END_C_DECLS