Fix the atomic management of the bcast and reduce freelist
API consistent with other collective modules Add comments Other minor cleanups. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
a4be3bb93d
Коммит
d71264569e
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
@ -188,9 +188,13 @@ struct ompi_communicator_t {
|
|||||||
/* Collectives module interface and data */
|
/* Collectives module interface and data */
|
||||||
mca_coll_base_comm_coll_t *c_coll;
|
mca_coll_base_comm_coll_t *c_coll;
|
||||||
|
|
||||||
/* Non-blocking collective tag */
|
/* Non-blocking collective tag. These are added here as they should be
|
||||||
_Atomic int32_t c_ibcast_tag;
|
* shared between all non-blocking collective modules (to avoid message
|
||||||
_Atomic int32_t c_ireduce_tag;
|
* collisions between them in the case where multiple outstanding
|
||||||
|
* non-blocking collective coexists using multiple backends).
|
||||||
|
*/
|
||||||
|
opal_atomic_int32_t c_ibcast_tag;
|
||||||
|
opal_atomic_int32_t c_ireduce_tag;
|
||||||
};
|
};
|
||||||
typedef struct ompi_communicator_t ompi_communicator_t;
|
typedef struct ompi_communicator_t ompi_communicator_t;
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#
|
#
|
||||||
# Copyright (c) 2014 The University of Tennessee and The University
|
# Copyright (c) 2014-2020 The University of Tennessee and The University
|
||||||
# of Tennessee Research Foundation. All rights
|
# of Tennessee Research Foundation. All rights
|
||||||
# reserved.
|
# reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
|
@ -38,8 +38,9 @@ typedef struct mca_coll_adapt_component_t {
|
|||||||
/* MCA parameter: Priority of this component */
|
/* MCA parameter: Priority of this component */
|
||||||
int adapt_priority;
|
int adapt_priority;
|
||||||
|
|
||||||
/* MCA parameter: Output verbose level */
|
/* MCA parameter: Output stream and verbose level */
|
||||||
int adapt_output;
|
int adapt_output;
|
||||||
|
int adapt_verbose;
|
||||||
|
|
||||||
/* MCA parameter: Maximum number of segment in context free list */
|
/* MCA parameter: Maximum number of segment in context free list */
|
||||||
int adapt_context_free_list_max;
|
int adapt_context_free_list_max;
|
||||||
@ -57,7 +58,6 @@ typedef struct mca_coll_adapt_component_t {
|
|||||||
int adapt_ibcast_max_recv_requests;
|
int adapt_ibcast_max_recv_requests;
|
||||||
/* Bcast free list */
|
/* Bcast free list */
|
||||||
opal_free_list_t *adapt_ibcast_context_free_list;
|
opal_free_list_t *adapt_ibcast_context_free_list;
|
||||||
opal_atomic_int32_t adapt_ibcast_context_free_list_enabled;
|
|
||||||
|
|
||||||
/* Reduce MCA parameter */
|
/* Reduce MCA parameter */
|
||||||
int adapt_ireduce_algorithm;
|
int adapt_ireduce_algorithm;
|
||||||
@ -70,7 +70,6 @@ typedef struct mca_coll_adapt_component_t {
|
|||||||
|
|
||||||
/* Reduce free list */
|
/* Reduce free list */
|
||||||
opal_free_list_t *adapt_ireduce_context_free_list;
|
opal_free_list_t *adapt_ireduce_context_free_list;
|
||||||
opal_atomic_int32_t adapt_ireduce_context_free_list_enabled;
|
|
||||||
|
|
||||||
} mca_coll_adapt_component_t;
|
} mca_coll_adapt_component_t;
|
||||||
|
|
||||||
@ -91,7 +90,7 @@ OMPI_MODULE_DECLSPEC extern mca_coll_adapt_component_t mca_coll_adapt_component;
|
|||||||
int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads);
|
int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads);
|
||||||
mca_coll_base_module_t * ompi_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority);
|
mca_coll_base_module_t * ompi_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority);
|
||||||
|
|
||||||
/* Free ADAPT quest */
|
/* ADAPT request free */
|
||||||
int ompi_coll_adapt_request_free(ompi_request_t **request);
|
int ompi_coll_adapt_request_free(ompi_request_t **request);
|
||||||
|
|
||||||
#endif /* MCA_COLL_ADAPT_EXPORT_H */
|
#endif /* MCA_COLL_ADAPT_EXPORT_H */
|
||||||
|
@ -20,82 +20,45 @@ typedef struct ompi_coll_adapt_algorithm_index_s {
|
|||||||
} ompi_coll_adapt_algorithm_index_t;
|
} ompi_coll_adapt_algorithm_index_t;
|
||||||
|
|
||||||
/* Bcast */
|
/* Bcast */
|
||||||
int ompi_coll_adapt_ibcast_init(void);
|
int ompi_coll_adapt_ibcast_register(void);
|
||||||
int ompi_coll_adapt_ibcast_fini(void);
|
int ompi_coll_adapt_ibcast_fini(void);
|
||||||
int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
|
int ompi_coll_adapt_bcast(BCAST_ARGS);
|
||||||
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
|
int ompi_coll_adapt_ibcast(IBCAST_ARGS);
|
||||||
int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
|
int ompi_coll_adapt_ibcast_generic(IBCAST_ARGS,
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
ompi_coll_tree_t * tree, size_t seg_size, int ibcast_tag);
|
||||||
mca_coll_base_module_t * module);
|
int ompi_coll_adapt_ibcast_binomial(IBCAST_ARGS,
|
||||||
int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
|
|
||||||
size_t seg_size, int ibcast_tag);
|
|
||||||
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
|
|
||||||
int root, struct ompi_communicator_t *comm,
|
|
||||||
ompi_request_t ** request, mca_coll_base_module_t * module,
|
|
||||||
int ibcast_tag);
|
int ibcast_tag);
|
||||||
int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
|
int ompi_coll_adapt_ibcast_in_order_binomial(IBCAST_ARGS,
|
||||||
int root, struct ompi_communicator_t *comm,
|
int ibcast_tag);
|
||||||
ompi_request_t ** request,
|
int ompi_coll_adapt_ibcast_binary(IBCAST_ARGS,
|
||||||
mca_coll_base_module_t * module, int ibcast_tag);
|
int ibcast_tag);
|
||||||
int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root,
|
int ompi_coll_adapt_ibcast_pipeline(IBCAST_ARGS,
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
int ibcast_tag);
|
||||||
mca_coll_base_module_t * module, int ibcast_tag);
|
int ompi_coll_adapt_ibcast_chain(IBCAST_ARGS,
|
||||||
int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
|
int ibcast_tag);
|
||||||
int root, struct ompi_communicator_t *comm,
|
int ompi_coll_adapt_ibcast_linear(IBCAST_ARGS,
|
||||||
ompi_request_t ** request, mca_coll_base_module_t * module,
|
int ibcast_tag);
|
||||||
|
int ompi_coll_adapt_ibcast_tuned(IBCAST_ARGS,
|
||||||
int ibcast_tag);
|
int ibcast_tag);
|
||||||
int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, int ibcast_tag);
|
|
||||||
int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, int ibcast_tag);
|
|
||||||
int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t *module, int ibcast_tag);
|
|
||||||
|
|
||||||
/* Reduce */
|
/* Reduce */
|
||||||
int ompi_coll_adapt_ireduce_init(void);
|
int ompi_coll_adapt_ireduce_register(void);
|
||||||
int ompi_coll_adapt_ireduce_fini(void);
|
int ompi_coll_adapt_ireduce_fini(void);
|
||||||
int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
|
int ompi_coll_adapt_reduce(REDUCE_ARGS);
|
||||||
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
|
int ompi_coll_adapt_ireduce(IREDUCE_ARGS);
|
||||||
mca_coll_base_module_t * module);
|
int ompi_coll_adapt_ireduce_generic(IREDUCE_ARGS,
|
||||||
int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
|
ompi_coll_tree_t * tree, size_t seg_size, int ireduce_tag);
|
||||||
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
|
int ompi_coll_adapt_ireduce_tuned(IREDUCE_ARGS,
|
||||||
ompi_request_t ** request, mca_coll_base_module_t * module);
|
int ireduce_tag);
|
||||||
int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
|
int ompi_coll_adapt_ireduce_binomial(IREDUCE_ARGS,
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
|
int ireduce_tag);
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
int ompi_coll_adapt_ireduce_in_order_binomial(IREDUCE_ARGS,
|
||||||
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
|
int ireduce_tag);
|
||||||
size_t seg_size, int ireduce_tag);
|
int ompi_coll_adapt_ireduce_binary(IREDUCE_ARGS,
|
||||||
int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count,
|
int ireduce_tag);
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
|
int ompi_coll_adapt_ireduce_pipeline(IREDUCE_ARGS,
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
int ireduce_tag);
|
||||||
mca_coll_base_module_t *module, int ireduce_tag);
|
int ompi_coll_adapt_ireduce_chain(IREDUCE_ARGS,
|
||||||
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
|
int ireduce_tag);
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
|
int ompi_coll_adapt_ireduce_linear(IREDUCE_ARGS,
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
int ireduce_tag);
|
||||||
mca_coll_base_module_t * module, int ireduce_tag);
|
|
||||||
int ompi_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count,
|
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
|
||||||
int root, struct ompi_communicator_t *comm,
|
|
||||||
ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, int ireduce_tag);
|
|
||||||
int ompi_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count,
|
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, int ireduce_tag);
|
|
||||||
int ompi_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count,
|
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, int ireduce_tag);
|
|
||||||
int ompi_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count,
|
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, int ireduce_tag);
|
|
||||||
int ompi_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
|
|
||||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
|
|
||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
|
||||||
mca_coll_base_module_t * module, int ireduce_tag);
|
|
||||||
|
@ -17,10 +17,13 @@ int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatyp
|
|||||||
{
|
{
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
return MPI_SUCCESS;
|
return MPI_SUCCESS;
|
||||||
} else {
|
}
|
||||||
ompi_request_t *request;
|
ompi_request_t *request = NULL;
|
||||||
int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
|
int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
|
||||||
|
if( MPI_SUCCESS != err ) {
|
||||||
|
if( NULL == request )
|
||||||
|
return err;
|
||||||
|
}
|
||||||
ompi_request_wait(&request, MPI_STATUS_IGNORE);
|
ompi_request_wait(&request, MPI_STATUS_IGNORE);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
@ -65,11 +65,10 @@ mca_coll_adapt_component_t mca_coll_adapt_component = {
|
|||||||
|
|
||||||
/* adapt-component specific information */
|
/* adapt-component specific information */
|
||||||
|
|
||||||
/* (default) priority */
|
0, /* (default) priority */
|
||||||
0,
|
|
||||||
|
|
||||||
/* (default) verbose level */
|
0, /* (default) output stream */
|
||||||
0,
|
0, /* (default) verbose level */
|
||||||
|
|
||||||
/* default values for non-MCA parameters */
|
/* default values for non-MCA parameters */
|
||||||
/* Not specifying values here gives us all 0's */
|
/* Not specifying values here gives us all 0's */
|
||||||
@ -78,24 +77,12 @@ mca_coll_adapt_component_t mca_coll_adapt_component = {
|
|||||||
/* Open the component */
|
/* Open the component */
|
||||||
static int adapt_open(void)
|
static int adapt_open(void)
|
||||||
{
|
{
|
||||||
int param;
|
|
||||||
mca_coll_adapt_component_t *cs = &mca_coll_adapt_component;
|
mca_coll_adapt_component_t *cs = &mca_coll_adapt_component;
|
||||||
|
|
||||||
/*
|
if (cs->adapt_verbose > 0) {
|
||||||
* Get the global coll verbosity: it will be ours
|
|
||||||
*/
|
|
||||||
param = mca_base_var_find("ompi", "coll", "base", "verbose");
|
|
||||||
if (param >= 0) {
|
|
||||||
const int *verbose = NULL;
|
|
||||||
mca_base_var_get_value(param, &verbose, NULL, NULL);
|
|
||||||
if (verbose && verbose[0] > 0) {
|
|
||||||
cs->adapt_output = opal_output_open(NULL);
|
cs->adapt_output = opal_output_open(NULL);
|
||||||
opal_output_set_verbosity(cs->adapt_output, verbose[0]);
|
opal_output_set_verbosity(cs->adapt_output, cs->adapt_verbose);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
opal_output_verbose(1, cs->adapt_output,
|
|
||||||
"coll:adapt:component_open: done!");
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -131,16 +118,14 @@ static int adapt_register(void)
|
|||||||
OPAL_INFO_LVL_9,
|
OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority);
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority);
|
||||||
|
|
||||||
int adapt_verbose = 0;
|
cs->adapt_verbose = ompi_coll_base_framework.framework_verbose;
|
||||||
(void) mca_base_component_var_register(c, "verbose",
|
(void) mca_base_component_var_register(c, "verbose",
|
||||||
"Verbose level",
|
"Verbose level (default set to the collective framework verbosity)",
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||||
OPAL_INFO_LVL_9,
|
OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY, &adapt_verbose);
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_verbose);
|
||||||
cs->adapt_output = opal_output_open(NULL);
|
|
||||||
opal_output_set_verbosity(cs->adapt_output, adapt_verbose);
|
|
||||||
|
|
||||||
cs->adapt_context_free_list_min = 10;
|
cs->adapt_context_free_list_min = 64;
|
||||||
(void) mca_base_component_var_register(c, "context_free_list_min",
|
(void) mca_base_component_var_register(c, "context_free_list_min",
|
||||||
"Minimum number of segments in context free list",
|
"Minimum number of segments in context free list",
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||||
@ -148,7 +133,7 @@ static int adapt_register(void)
|
|||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&cs->adapt_context_free_list_min);
|
&cs->adapt_context_free_list_min);
|
||||||
|
|
||||||
cs->adapt_context_free_list_max = 10000;
|
cs->adapt_context_free_list_max = 1024;
|
||||||
(void) mca_base_component_var_register(c, "context_free_list_max",
|
(void) mca_base_component_var_register(c, "context_free_list_max",
|
||||||
"Maximum number of segments in context free list",
|
"Maximum number of segments in context free list",
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||||
@ -156,15 +141,15 @@ static int adapt_register(void)
|
|||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&cs->adapt_context_free_list_max);
|
&cs->adapt_context_free_list_max);
|
||||||
|
|
||||||
cs->adapt_context_free_list_inc = 10;
|
cs->adapt_context_free_list_inc = 32;
|
||||||
(void) mca_base_component_var_register(c, "context_free_list_inc",
|
(void) mca_base_component_var_register(c, "context_free_list_inc",
|
||||||
"Increasement number of segments in context free list",
|
"Increasement number of segments in context free list",
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||||
OPAL_INFO_LVL_9,
|
OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&cs->adapt_context_free_list_inc);
|
&cs->adapt_context_free_list_inc);
|
||||||
ompi_coll_adapt_ibcast_init();
|
ompi_coll_adapt_ibcast_register();
|
||||||
ompi_coll_adapt_ireduce_init();
|
ompi_coll_adapt_ireduce_register();
|
||||||
|
|
||||||
return adapt_verify_mca_variables();
|
return adapt_verify_mca_variables();
|
||||||
}
|
}
|
||||||
|
@ -12,58 +12,15 @@
|
|||||||
#include "ompi/mca/coll/coll.h"
|
#include "ompi/mca/coll/coll.h"
|
||||||
#include "coll_adapt_context.h"
|
#include "coll_adapt_context.h"
|
||||||
|
|
||||||
static void ompi_coll_adapt_bcast_context_constructor(ompi_coll_adapt_bcast_context_t * bcast_context)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ompi_coll_adapt_bcast_context_destructor(ompi_coll_adapt_bcast_context_t * bcast_context)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
ompi_coll_adapt_constant_bcast_context_constructor(ompi_coll_adapt_constant_bcast_context_t * con)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ompi_coll_adapt_constant_bcast_context_destructor(ompi_coll_adapt_constant_bcast_context_t
|
|
||||||
* con)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t,
|
OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t,
|
||||||
ompi_coll_adapt_bcast_context_constructor,
|
NULL, NULL);
|
||||||
ompi_coll_adapt_bcast_context_destructor);
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_bcast_context_t, opal_object_t,
|
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_bcast_context_t, opal_object_t,
|
||||||
ompi_coll_adapt_constant_bcast_context_constructor,
|
NULL, NULL);
|
||||||
ompi_coll_adapt_constant_bcast_context_destructor);
|
|
||||||
|
|
||||||
static void ompi_coll_adapt_reduce_context_constructor(ompi_coll_adapt_reduce_context_t *
|
|
||||||
reduce_context)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ompi_coll_adapt_reduce_context_destructor(ompi_coll_adapt_reduce_context_t *
|
|
||||||
reduce_context)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
ompi_coll_adapt_constant_reduce_context_constructor(ompi_coll_adapt_constant_reduce_context_t * con)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
ompi_coll_adapt_constant_reduce_context_destructor(ompi_coll_adapt_constant_reduce_context_t * con)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t,
|
OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t,
|
||||||
ompi_coll_adapt_reduce_context_constructor,
|
NULL, NULL);
|
||||||
ompi_coll_adapt_reduce_context_destructor);
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t,
|
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t,
|
||||||
ompi_coll_adapt_constant_reduce_context_constructor,
|
NULL, NULL);
|
||||||
ompi_coll_adapt_constant_reduce_context_destructor);
|
|
||||||
|
@ -43,7 +43,7 @@ static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ibcast_algorithm_index[
|
|||||||
/*
|
/*
|
||||||
* Set up MCA parameters of MPI_Bcast and MPI_IBcast
|
* Set up MCA parameters of MPI_Bcast and MPI_IBcast
|
||||||
*/
|
*/
|
||||||
int ompi_coll_adapt_ibcast_init(void)
|
int ompi_coll_adapt_ibcast_register(void)
|
||||||
{
|
{
|
||||||
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
|
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
|
||||||
|
|
||||||
@ -78,7 +78,6 @@ int ompi_coll_adapt_ibcast_init(void)
|
|||||||
&mca_coll_adapt_component.adapt_ibcast_max_recv_requests);
|
&mca_coll_adapt_component.adapt_ibcast_max_recv_requests);
|
||||||
|
|
||||||
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
|
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
|
||||||
mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0;
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +89,6 @@ int ompi_coll_adapt_ibcast_fini(void)
|
|||||||
if (NULL != mca_coll_adapt_component.adapt_ibcast_context_free_list) {
|
if (NULL != mca_coll_adapt_component.adapt_ibcast_context_free_list) {
|
||||||
OBJ_RELEASE(mca_coll_adapt_component.adapt_ibcast_context_free_list);
|
OBJ_RELEASE(mca_coll_adapt_component.adapt_ibcast_context_free_list);
|
||||||
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
|
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
|
||||||
mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0;
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n"));
|
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n"));
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -179,7 +177,6 @@ static int send_cb(ompi_request_t * req)
|
|||||||
int num_sent = ++(context->con->num_sent_segs);
|
int num_sent = ++(context->con->num_sent_segs);
|
||||||
int num_recv_fini_t = context->con->num_recv_fini;
|
int num_recv_fini_t = context->con->num_recv_fini;
|
||||||
int rank = ompi_comm_rank(context->con->comm);
|
int rank = ompi_comm_rank(context->con->comm);
|
||||||
opal_mutex_t *mutex_temp = context->con->mutex;
|
|
||||||
/* Check whether signal the condition */
|
/* Check whether signal the condition */
|
||||||
if ((rank == context->con->root
|
if ((rank == context->con->root
|
||||||
&& num_sent == context->con->tree->tree_nextsize * context->con->num_segs)
|
&& num_sent == context->con->tree->tree_nextsize * context->con->num_segs)
|
||||||
@ -190,13 +187,13 @@ static int send_cb(ompi_request_t * req)
|
|||||||
context->con->num_segs)) {
|
context->con->num_segs)) {
|
||||||
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in send\n",
|
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in send\n",
|
||||||
ompi_comm_rank(context->con->comm)));
|
ompi_comm_rank(context->con->comm)));
|
||||||
OPAL_THREAD_UNLOCK(mutex_temp);
|
OPAL_THREAD_UNLOCK(context->con->mutex);
|
||||||
ibcast_request_fini(context);
|
ibcast_request_fini(context);
|
||||||
} else {
|
} else {
|
||||||
OBJ_RELEASE(context->con);
|
OBJ_RELEASE(context->con);
|
||||||
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
|
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
|
||||||
(opal_free_list_item_t *) context);
|
(opal_free_list_item_t *) context);
|
||||||
OPAL_THREAD_UNLOCK(mutex_temp);
|
OPAL_THREAD_UNLOCK(context->con->mutex);
|
||||||
}
|
}
|
||||||
req->req_free(&req);
|
req->req_free(&req);
|
||||||
/* Call back function return 1, which means successful */
|
/* Call back function return 1, which means successful */
|
||||||
@ -306,7 +303,6 @@ static int recv_cb(ompi_request_t * req)
|
|||||||
int num_sent = context->con->num_sent_segs;
|
int num_sent = context->con->num_sent_segs;
|
||||||
int num_recv_fini_t = ++(context->con->num_recv_fini);
|
int num_recv_fini_t = ++(context->con->num_recv_fini);
|
||||||
int rank = ompi_comm_rank(context->con->comm);
|
int rank = ompi_comm_rank(context->con->comm);
|
||||||
opal_mutex_t *mutex_temp = context->con->mutex;
|
|
||||||
|
|
||||||
/* If this process is leaf and has received all the segments */
|
/* If this process is leaf and has received all the segments */
|
||||||
if ((rank == context->con->root
|
if ((rank == context->con->root
|
||||||
@ -318,13 +314,13 @@ static int recv_cb(ompi_request_t * req)
|
|||||||
context->con->num_segs)) {
|
context->con->num_segs)) {
|
||||||
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in recv\n",
|
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in recv\n",
|
||||||
ompi_comm_rank(context->con->comm)));
|
ompi_comm_rank(context->con->comm)));
|
||||||
OPAL_THREAD_UNLOCK(mutex_temp);
|
OPAL_THREAD_UNLOCK(context->con->mutex);
|
||||||
ibcast_request_fini(context);
|
ibcast_request_fini(context);
|
||||||
} else {
|
} else {
|
||||||
OBJ_RELEASE(context->con);
|
OBJ_RELEASE(context->con);
|
||||||
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
|
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
|
||||||
(opal_free_list_item_t *) context);
|
(opal_free_list_item_t *) context);
|
||||||
OPAL_THREAD_UNLOCK(mutex_temp);
|
OPAL_THREAD_UNLOCK(context->con->mutex);
|
||||||
}
|
}
|
||||||
req->req_free(&req);
|
req->req_free(&req);
|
||||||
return 1;
|
return 1;
|
||||||
@ -334,9 +330,8 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
|
|||||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
||||||
mca_coll_base_module_t * module)
|
mca_coll_base_module_t * module)
|
||||||
{
|
{
|
||||||
if (count == 0) {
|
if (0 == count) {
|
||||||
ompi_request_t *temp_request;
|
ompi_request_t *temp_request = OBJ_NEW(ompi_request_t);
|
||||||
temp_request = OBJ_NEW(ompi_request_t);
|
|
||||||
OMPI_REQUEST_INIT(temp_request, false);
|
OMPI_REQUEST_INIT(temp_request, false);
|
||||||
temp_request->req_type = 0;
|
temp_request->req_type = 0;
|
||||||
temp_request->req_free = ompi_coll_adapt_request_free;
|
temp_request->req_free = ompi_coll_adapt_request_free;
|
||||||
@ -348,25 +343,23 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
|
|||||||
ompi_request_complete(temp_request, 1);
|
ompi_request_complete(temp_request, 1);
|
||||||
*request = temp_request;
|
*request = temp_request;
|
||||||
return MPI_SUCCESS;
|
return MPI_SUCCESS;
|
||||||
} else {
|
|
||||||
int rank = ompi_comm_rank(comm);
|
|
||||||
if (rank == root) {
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
|
|
||||||
"ibcast root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
|
|
||||||
root, mca_coll_adapt_component.adapt_ibcast_algorithm,
|
|
||||||
mca_coll_adapt_component.adapt_ibcast_segment_size,
|
|
||||||
mca_coll_adapt_component.adapt_ibcast_max_send_requests,
|
|
||||||
mca_coll_adapt_component.adapt_ibcast_max_recv_requests));
|
|
||||||
}
|
}
|
||||||
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
|
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
|
||||||
ibcast_tag = ibcast_tag % 4096;
|
ibcast_tag = ibcast_tag % 4096;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
|
||||||
|
"ibcast tag %d root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
|
||||||
|
ibcast_tag, root, mca_coll_adapt_component.adapt_ibcast_algorithm,
|
||||||
|
mca_coll_adapt_component.adapt_ibcast_segment_size,
|
||||||
|
mca_coll_adapt_component.adapt_ibcast_max_send_requests,
|
||||||
|
mca_coll_adapt_component.adapt_ibcast_max_recv_requests));
|
||||||
|
|
||||||
ompi_coll_adapt_ibcast_fn_t bcast_func =
|
ompi_coll_adapt_ibcast_fn_t bcast_func =
|
||||||
(ompi_coll_adapt_ibcast_fn_t)
|
(ompi_coll_adapt_ibcast_fn_t)
|
||||||
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
|
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
|
||||||
algorithm_fn_ptr;
|
algorithm_fn_ptr;
|
||||||
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
|
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ibcast functions with different algorithms
|
* Ibcast functions with different algorithms
|
||||||
@ -377,7 +370,7 @@ int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *
|
|||||||
mca_coll_base_module_t *module, int ibcast_tag)
|
mca_coll_base_module_t *module, int ibcast_tag)
|
||||||
{
|
{
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
|
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
|
||||||
return OMPI_SUCCESS;
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
|
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
|
||||||
@ -471,12 +464,7 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
|
|||||||
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
|
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
|
||||||
size_t seg_size, int ibcast_tag)
|
size_t seg_size, int ibcast_tag)
|
||||||
{
|
{
|
||||||
/* Tempory variables for iteration */
|
int i, j, rank, err;
|
||||||
int i, j;
|
|
||||||
/* Rank of this process */
|
|
||||||
int rank;
|
|
||||||
/* Record return value */
|
|
||||||
int err;
|
|
||||||
/* The min of num_segs and SEND_NUM or RECV_NUM, in case the num_segs is less than SEND_NUM or RECV_NUM */
|
/* The min of num_segs and SEND_NUM or RECV_NUM, in case the num_segs is less than SEND_NUM or RECV_NUM */
|
||||||
int min;
|
int min;
|
||||||
|
|
||||||
@ -498,15 +486,10 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
|
|||||||
/* Record how many isends have been issued for every child */
|
/* Record how many isends have been issued for every child */
|
||||||
int *send_array = NULL;
|
int *send_array = NULL;
|
||||||
|
|
||||||
/* Set up free list */
|
/* Atomically set up free list */
|
||||||
if (0 == mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled) {
|
if (NULL == mca_coll_adapt_component.adapt_ibcast_context_free_list) {
|
||||||
int32_t context_free_list_enabled =
|
opal_free_list_t* fl = OBJ_NEW(opal_free_list_t);
|
||||||
opal_atomic_add_fetch_32(&
|
opal_free_list_init(fl,
|
||||||
(mca_coll_adapt_component.
|
|
||||||
adapt_ibcast_context_free_list_enabled), 1);
|
|
||||||
if (1 == context_free_list_enabled) {
|
|
||||||
mca_coll_adapt_component.adapt_ibcast_context_free_list = OBJ_NEW(opal_free_list_t);
|
|
||||||
opal_free_list_init(mca_coll_adapt_component.adapt_ibcast_context_free_list,
|
|
||||||
sizeof(ompi_coll_adapt_bcast_context_t),
|
sizeof(ompi_coll_adapt_bcast_context_t),
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(ompi_coll_adapt_bcast_context_t),
|
OBJ_CLASS(ompi_coll_adapt_bcast_context_t),
|
||||||
@ -515,6 +498,9 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
|
|||||||
mca_coll_adapt_component.adapt_context_free_list_max,
|
mca_coll_adapt_component.adapt_context_free_list_max,
|
||||||
mca_coll_adapt_component.adapt_context_free_list_inc,
|
mca_coll_adapt_component.adapt_context_free_list_inc,
|
||||||
NULL, 0, NULL, NULL, NULL);
|
NULL, 0, NULL, NULL, NULL);
|
||||||
|
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *)&mca_coll_adapt_component.adapt_ibcast_context_free_list,
|
||||||
|
&(intptr_t){0}, fl) ) {
|
||||||
|
OBJ_RELEASE(fl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,13 +12,5 @@
|
|||||||
#include "coll_adapt.h"
|
#include "coll_adapt.h"
|
||||||
#include "coll_adapt_inbuf.h"
|
#include "coll_adapt_inbuf.h"
|
||||||
|
|
||||||
static void ompi_coll_adapt_inbuf_constructor(ompi_coll_adapt_inbuf_t * inbuf)
|
OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t,
|
||||||
{
|
NULL, NULL);
|
||||||
}
|
|
||||||
|
|
||||||
static void ompi_coll_adapt_inbuf_destructor(ompi_coll_adapt_inbuf_t * inbuf)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t, ompi_coll_adapt_inbuf_constructor,
|
|
||||||
ompi_coll_adapt_inbuf_destructor);
|
|
||||||
|
@ -47,7 +47,7 @@ static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ireduce_algorithm_index
|
|||||||
/*
|
/*
|
||||||
* Set up MCA parameters of MPI_Reduce and MPI_Ireduce
|
* Set up MCA parameters of MPI_Reduce and MPI_Ireduce
|
||||||
*/
|
*/
|
||||||
int ompi_coll_adapt_ireduce_init(void)
|
int ompi_coll_adapt_ireduce_register(void)
|
||||||
{
|
{
|
||||||
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
|
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
|
||||||
|
|
||||||
@ -107,7 +107,6 @@ int ompi_coll_adapt_ireduce_init(void)
|
|||||||
&mca_coll_adapt_component.adapt_inbuf_free_list_inc);
|
&mca_coll_adapt_component.adapt_inbuf_free_list_inc);
|
||||||
|
|
||||||
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
|
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
|
||||||
mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled = 0;
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,7 +118,6 @@ int ompi_coll_adapt_ireduce_fini(void)
|
|||||||
if (NULL != mca_coll_adapt_component.adapt_ireduce_context_free_list) {
|
if (NULL != mca_coll_adapt_component.adapt_ireduce_context_free_list) {
|
||||||
OBJ_RELEASE(mca_coll_adapt_component.adapt_ireduce_context_free_list);
|
OBJ_RELEASE(mca_coll_adapt_component.adapt_ireduce_context_free_list);
|
||||||
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
|
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
|
||||||
mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled = 0;
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ireduce fini\n"));
|
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ireduce fini\n"));
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -148,26 +146,22 @@ static ompi_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_c
|
|||||||
static int add_to_list(opal_list_t * list, int id)
|
static int add_to_list(opal_list_t * list, int id)
|
||||||
{
|
{
|
||||||
ompi_coll_adapt_item_t *item;
|
ompi_coll_adapt_item_t *item;
|
||||||
int ret = 0;
|
|
||||||
for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list);
|
for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list);
|
||||||
item != (ompi_coll_adapt_item_t *) opal_list_get_end(list);
|
item != (ompi_coll_adapt_item_t *) opal_list_get_end(list);
|
||||||
item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
|
item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
|
||||||
if (item->id == id) {
|
if (item->id == id) {
|
||||||
(item->count)++;
|
(item->count)++;
|
||||||
ret = 1;
|
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return 1\n"));
|
||||||
break;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ret == 0) {
|
/* Add a new object to the list with count set to 1 */
|
||||||
item = OBJ_NEW(ompi_coll_adapt_item_t);
|
item = OBJ_NEW(ompi_coll_adapt_item_t);
|
||||||
item->id = id;
|
item->id = id;
|
||||||
item->count = 1;
|
item->count = 1;
|
||||||
opal_list_append(list, (opal_list_item_t *) item);
|
opal_list_append(list, (opal_list_item_t *) item);
|
||||||
ret = 2;
|
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return 1\n"));
|
||||||
}
|
return 2;
|
||||||
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return %d\n",
|
|
||||||
ret));
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -250,7 +244,6 @@ static int send_cb(ompi_request_t * req)
|
|||||||
adapt_ireduce_context_free_list);
|
adapt_ireduce_context_free_list);
|
||||||
if (context->con->tree->tree_nextsize > 0) {
|
if (context->con->tree->tree_nextsize > 0) {
|
||||||
send_context->buff = context->con->accumbuf[item->id];
|
send_context->buff = context->con->accumbuf[item->id];
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
send_context->buff =
|
send_context->buff =
|
||||||
context->buff + (item->id - context->frag_id) * context->con->segment_increment;
|
context->buff + (item->id - context->frag_id) * context->con->segment_increment;
|
||||||
@ -530,27 +523,23 @@ int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi
|
|||||||
{
|
{
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
return MPI_SUCCESS;
|
return MPI_SUCCESS;
|
||||||
} else {
|
}
|
||||||
int rank = ompi_comm_rank(comm);
|
int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1);
|
||||||
if (rank == root) {
|
ireduce_tag = (ireduce_tag % 4096) + 4096;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
|
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
|
||||||
"ireduce root %d, algorithm %d, coll_adapt_ireduce_segment_size %zu, coll_adapt_ireduce_max_send_requests %d, coll_adapt_ireduce_max_recv_requests %d\n",
|
"ireduce tag %d root %d, algorithm %d, coll_adapt_ireduce_segment_size %zu, coll_adapt_ireduce_max_send_requests %d, coll_adapt_ireduce_max_recv_requests %d\n",
|
||||||
root, mca_coll_adapt_component.adapt_ireduce_algorithm,
|
ireduce_tag, root, mca_coll_adapt_component.adapt_ireduce_algorithm,
|
||||||
mca_coll_adapt_component.adapt_ireduce_segment_size,
|
mca_coll_adapt_component.adapt_ireduce_segment_size,
|
||||||
mca_coll_adapt_component.adapt_ireduce_max_send_requests,
|
mca_coll_adapt_component.adapt_ireduce_max_send_requests,
|
||||||
mca_coll_adapt_component.adapt_ireduce_max_recv_requests));
|
mca_coll_adapt_component.adapt_ireduce_max_recv_requests));
|
||||||
}
|
|
||||||
/* Get ireduce tag */
|
|
||||||
int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1);
|
|
||||||
ireduce_tag = (ireduce_tag % 4096) + 4096;
|
|
||||||
fflush(stdout);
|
|
||||||
ompi_coll_adapt_ireduce_fn_t reduce_func =
|
ompi_coll_adapt_ireduce_fn_t reduce_func =
|
||||||
(ompi_coll_adapt_ireduce_fn_t)
|
(ompi_coll_adapt_ireduce_fn_t)
|
||||||
ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
|
ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
|
||||||
adapt_ireduce_algorithm].algorithm_fn_ptr;
|
adapt_ireduce_algorithm].algorithm_fn_ptr;
|
||||||
return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag);
|
return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ireduce functions with different algorithms
|
* Ireduce functions with different algorithms
|
||||||
@ -562,7 +551,7 @@ int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count,
|
|||||||
mca_coll_base_module_t *module, int ireduce_tag)
|
mca_coll_base_module_t *module, int ireduce_tag)
|
||||||
{
|
{
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
|
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
|
||||||
return OMPI_SUCCESS;
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
|
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
|
||||||
@ -688,15 +677,10 @@ int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
|
|||||||
ompi_datatype_get_true_extent(dtype, &true_lower_bound, &true_extent);
|
ompi_datatype_get_true_extent(dtype, &true_lower_bound, &true_extent);
|
||||||
real_seg_size = true_extent + (ptrdiff_t) (seg_count - 1) * extent;
|
real_seg_size = true_extent + (ptrdiff_t) (seg_count - 1) * extent;
|
||||||
|
|
||||||
/* Set up free list */
|
/* Atomically set up free list */
|
||||||
if (0 == mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled) {
|
if (NULL == mca_coll_adapt_component.adapt_ireduce_context_free_list) {
|
||||||
int32_t context_free_list_enabled =
|
opal_free_list_t* fl = OBJ_NEW(opal_free_list_t);
|
||||||
opal_atomic_add_fetch_32(&
|
opal_free_list_init(fl,
|
||||||
(mca_coll_adapt_component.
|
|
||||||
adapt_ireduce_context_free_list_enabled), 1);
|
|
||||||
if (1 == context_free_list_enabled) {
|
|
||||||
mca_coll_adapt_component.adapt_ireduce_context_free_list = OBJ_NEW(opal_free_list_t);
|
|
||||||
opal_free_list_init(mca_coll_adapt_component.adapt_ireduce_context_free_list,
|
|
||||||
sizeof(ompi_coll_adapt_reduce_context_t),
|
sizeof(ompi_coll_adapt_reduce_context_t),
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(ompi_coll_adapt_reduce_context_t),
|
OBJ_CLASS(ompi_coll_adapt_reduce_context_t),
|
||||||
@ -705,6 +689,9 @@ int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
|
|||||||
mca_coll_adapt_component.adapt_context_free_list_max,
|
mca_coll_adapt_component.adapt_context_free_list_max,
|
||||||
mca_coll_adapt_component.adapt_context_free_list_inc,
|
mca_coll_adapt_component.adapt_context_free_list_inc,
|
||||||
NULL, 0, NULL, NULL, NULL);
|
NULL, 0, NULL, NULL, NULL);
|
||||||
|
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *)&mca_coll_adapt_component.adapt_ireduce_context_free_list,
|
||||||
|
&(intptr_t){0}, fl) ) {
|
||||||
|
OBJ_RELEASE(fl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,13 +11,5 @@
|
|||||||
|
|
||||||
#include "coll_adapt_item.h"
|
#include "coll_adapt_item.h"
|
||||||
|
|
||||||
static void ompi_coll_adapt_item_constructor(ompi_coll_adapt_item_t * item)
|
OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t,
|
||||||
{
|
NULL, NULL);
|
||||||
}
|
|
||||||
|
|
||||||
static void ompi_coll_adapt_item_destructor(ompi_coll_adapt_item_t * item)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t, ompi_coll_adapt_item_constructor,
|
|
||||||
ompi_coll_adapt_item_destructor);
|
|
||||||
|
@ -16,7 +16,7 @@ struct ompi_coll_adapt_item_s {
|
|||||||
opal_list_item_t super;
|
opal_list_item_t super;
|
||||||
/* Fragment id */
|
/* Fragment id */
|
||||||
int id;
|
int id;
|
||||||
/* The number of children which have received the current segment from */
|
/* The number of children which have received the current segment */
|
||||||
int count;
|
int count;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -19,11 +19,13 @@ int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_
|
|||||||
{
|
{
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
return MPI_SUCCESS;
|
return MPI_SUCCESS;
|
||||||
} else {
|
}
|
||||||
ompi_request_t *request;
|
ompi_request_t *request = NULL;
|
||||||
int err =
|
int err = ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
|
||||||
ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
|
if( MPI_SUCCESS != err ) {
|
||||||
|
if( NULL == request )
|
||||||
|
return err;
|
||||||
|
}
|
||||||
ompi_request_wait(&request, MPI_STATUS_IGNORE);
|
ompi_request_wait(&request, MPI_STATUS_IGNORE);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
@ -492,10 +492,6 @@ struct mca_coll_base_comm_t {
|
|||||||
|
|
||||||
/* in-order binary tree (root of the in-order binary tree is rank 0) */
|
/* in-order binary tree (root of the in-order binary tree is rank 0) */
|
||||||
ompi_coll_tree_t *cached_in_order_bintree;
|
ompi_coll_tree_t *cached_in_order_bintree;
|
||||||
|
|
||||||
/* linear */
|
|
||||||
ompi_coll_tree_t *cached_linear;
|
|
||||||
int cached_linear_root;
|
|
||||||
};
|
};
|
||||||
typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
|
typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
|
||||||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);
|
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);
|
||||||
|
@ -215,18 +215,9 @@ static int tuned_open(void)
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
#if OPAL_ENABLE_DEBUG
|
#if OPAL_ENABLE_DEBUG
|
||||||
{
|
if (ompi_coll_base_framework.framework_verbose) {
|
||||||
int param;
|
|
||||||
|
|
||||||
param = mca_base_var_find("ompi", "coll", "base", "verbose");
|
|
||||||
if (param >= 0) {
|
|
||||||
const int *verbose = NULL;
|
|
||||||
mca_base_var_get_value(param, &verbose, NULL, NULL);
|
|
||||||
if (verbose && verbose[0] > 0) {
|
|
||||||
ompi_coll_tuned_stream = opal_output_open(NULL);
|
ompi_coll_tuned_stream = opal_output_open(NULL);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* OPAL_ENABLE_DEBUG */
|
#endif /* OPAL_ENABLE_DEBUG */
|
||||||
|
|
||||||
/* now check that the user hasn't overrode any of the decision functions if dynamic rules are enabled */
|
/* now check that the user hasn't overrode any of the decision functions if dynamic rules are enabled */
|
||||||
|
@ -438,9 +438,10 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
|
|||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
if(NULL != request->req_complete_cb) {
|
if(NULL != request->req_complete_cb) {
|
||||||
ompi_request_complete_fn_t temp = request->req_complete_cb;
|
/* Set the request cb to NULL to allow resetting in the callback */
|
||||||
|
ompi_request_complete_fn_t fct = request->req_complete_cb;
|
||||||
request->req_complete_cb = NULL;
|
request->req_complete_cb = NULL;
|
||||||
rc = temp( request );
|
rc = fct( request );
|
||||||
}
|
}
|
||||||
|
|
||||||
if (0 == rc) {
|
if (0 == rc) {
|
||||||
@ -454,10 +455,9 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
|
|||||||
if( REQUEST_PENDING != tmp_sync )
|
if( REQUEST_PENDING != tmp_sync )
|
||||||
wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR);
|
wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR);
|
||||||
}
|
}
|
||||||
} else {
|
} else
|
||||||
request->req_complete = REQUEST_COMPLETED;
|
request->req_complete = REQUEST_COMPLETED;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -468,14 +468,13 @@ static inline int ompi_request_set_callback(ompi_request_t* request,
|
|||||||
{
|
{
|
||||||
request->req_complete_cb_data = cb_data;
|
request->req_complete_cb_data = cb_data;
|
||||||
request->req_complete_cb = cb;
|
request->req_complete_cb = cb;
|
||||||
int rc = 0;
|
|
||||||
/* If request is completed and the callback is not called, need to call callback */
|
/* If request is completed and the callback is not called, need to call callback */
|
||||||
if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) {
|
if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) {
|
||||||
ompi_request_complete_fn_t temp = request->req_complete_cb;
|
ompi_request_complete_fn_t fct = request->req_complete_cb;
|
||||||
request->req_complete_cb = NULL;
|
request->req_complete_cb = NULL;
|
||||||
rc = temp( request );
|
return fct( request );
|
||||||
}
|
}
|
||||||
return rc;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user