1
1
* piggybacking Bull functionalities

* coll/adapt: Fix naming conventions and C11 atomic use

This commit fixes some naming convention issues, such as function names
which should follow the naming ompi_coll_adapt instead of
mca_coll_adapt, reserved for component and module naming (cf. tuned
collective component);

It also fixes the use of _Atomic construct, which is only valid in C11.
OPAL constructs have already been adapted to that use, so use
opal_atomic_* types instead.

* coll/adapt: Remove unused component field in module

This commit removes an unneeded field referencing the component in the
module of adapt, as it is already available through the
mca_coll_adapt_component global variable.

Signed-off-by: Marc Sergent <marc.sergent@atos.net>
Co-authored-by: Lemarinier, Pierre <pierre.lemarinier@atos.net>
Co-authored-by: pierrele <31764860+pierrele@users.noreply.github.com>
Этот коммит содержится в:
bsergentm 2020-05-06 18:30:03 +02:00 коммит произвёл Jeff Squyres
родитель fe73586808
Коммит a4be3bb93d
14 изменённых файлов: 339 добавлений и 292 удалений

Просмотреть файл

@ -21,13 +21,15 @@
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/coll_base_topo.h"
BEGIN_C_DECLS typedef struct mca_coll_adapt_module_t mca_coll_adapt_module_t;
BEGIN_C_DECLS
typedef struct mca_coll_adapt_module_t mca_coll_adapt_module_t;
/*
* Structure to hold the adapt coll component. First it holds the
* base coll component, and then holds a bunch of
* adapt-coll-component-specific stuff (e.g., current MCA param
* values).
* values).
*/
typedef struct mca_coll_adapt_component_t {
/* Base coll component */
@ -45,7 +47,7 @@ typedef struct mca_coll_adapt_component_t {
/* MCA parameter: Minimum number of segment in context free list */
int adapt_context_free_list_min;
/* MCA parameter: Increasment number of segment in context free list */
/* MCA parameter: Increasement number of segment in context free list */
int adapt_context_free_list_inc;
/* Bcast MCA parameter */
@ -55,7 +57,7 @@ typedef struct mca_coll_adapt_component_t {
int adapt_ibcast_max_recv_requests;
/* Bcast free list */
opal_free_list_t *adapt_ibcast_context_free_list;
_Atomic int32_t adapt_ibcast_context_free_list_enabled;
opal_atomic_int32_t adapt_ibcast_context_free_list_enabled;
/* Reduce MCA parameter */
int adapt_ireduce_algorithm;
@ -68,7 +70,7 @@ typedef struct mca_coll_adapt_component_t {
/* Reduce free list */
opal_free_list_t *adapt_ireduce_context_free_list;
_Atomic int32_t adapt_ireduce_context_free_list_enabled;
opal_atomic_int32_t adapt_ireduce_context_free_list_enabled;
} mca_coll_adapt_component_t;
@ -78,9 +80,7 @@ struct mca_coll_adapt_module_t {
mca_coll_base_module_t super;
/* Whether this module has been lazily initialized or not yet */
bool enabled;
/* Pointer to mca_coll_adapt_component */
mca_coll_adapt_component_t *adapt_component;
bool adapt_enabled;
};
OBJ_CLASS_DECLARATION(mca_coll_adapt_module_t);
@ -88,11 +88,10 @@ OBJ_CLASS_DECLARATION(mca_coll_adapt_module_t);
OMPI_MODULE_DECLSPEC extern mca_coll_adapt_component_t mca_coll_adapt_component;
/* ADAPT module functions */
int mca_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads);
mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority);
int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads);
mca_coll_base_module_t * ompi_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority);
/* Free ADAPT quest */
int adapt_request_free(ompi_request_t ** request);
int ompi_coll_adapt_request_free(ompi_request_t **request);
#endif /* MCA_COLL_ADAPT_EXPORT_H */
#endif /* MCA_COLL_ADAPT_EXPORT_H */

Просмотреть файл

@ -14,82 +14,88 @@
#include "ompi/mca/coll/base/coll_base_functions.h"
#include <math.h>
typedef struct mca_coll_adapt_algorithm_index_s {
typedef struct ompi_coll_adapt_algorithm_index_s {
int algorithm_index;
uintptr_t algorithm_fn_ptr;
} mca_coll_adapt_algorithm_index_t;
} ompi_coll_adapt_algorithm_index_t;
/* Bcast */
int mca_coll_adapt_ibcast_init(void);
int mca_coll_adapt_ibcast_fini(void);
int mca_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_init(void);
int ompi_coll_adapt_ibcast_fini(void);
int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module);
int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
size_t seg_size, int ibcast_tag);
int mca_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module,
int ibcast_tag);
int mca_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int mca_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int mca_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module,
int ibcast_tag);
int mca_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int mca_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t *module, int ibcast_tag);
/* Reduce */
int mca_coll_adapt_ireduce_init(void);
int mca_coll_adapt_ireduce_fini(void);
int mca_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
int ompi_coll_adapt_ireduce_init(void);
int ompi_coll_adapt_ireduce_fini(void);
int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);
int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module);
int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
size_t seg_size, int ireduce_tag);
int mca_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t *module, int ireduce_tag);
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int mca_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int mca_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int mca_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int mca_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int mca_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);

Просмотреть файл

@ -12,14 +12,14 @@
#include "coll_adapt.h"
#include "coll_adapt_algorithms.h"
int mca_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, mca_coll_base_module_t * module)
{
if (count == 0) {
return MPI_SUCCESS;
} else {
ompi_request_t *request;
int err = mca_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
}

Просмотреть файл

@ -36,35 +36,32 @@ static int adapt_register(void);
*/
mca_coll_adapt_component_t mca_coll_adapt_component = {
/* First, fill in the super */
{
/* First, the mca_component_t struct containing meta
information about the component itself */
/* First, the mca_component_t struct containing meta
information about the component itself */
.collm_version = {
MCA_COLL_BASE_VERSION_2_0_0,
{
MCA_COLL_BASE_VERSION_2_0_0,
/* Component name and version */
.mca_component_name = "adapt",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
/* Component name and version */
"adapt",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Component functions */
.mca_open_component = adapt_open,
.mca_close_component = adapt_close,
.mca_register_component_params = adapt_register,
},
.collm_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
/* Component functions */
adapt_open, /* open */
adapt_close,
NULL, /* query */
adapt_register},
{
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE},
/* Initialization / querying functions */
mca_coll_adapt_init_query,
mca_coll_adapt_comm_query,
},
/* Initialization / querying functions */
.collm_init_query = ompi_coll_adapt_init_query,
.collm_comm_query = ompi_coll_adapt_comm_query,
},
/* adapt-component specific information */
@ -81,6 +78,25 @@ mca_coll_adapt_component_t mca_coll_adapt_component = {
/* Open the component */
static int adapt_open(void)
{
int param;
mca_coll_adapt_component_t *cs = &mca_coll_adapt_component;
/*
* Get the global coll verbosity: it will be ours
*/
param = mca_base_var_find("ompi", "coll", "base", "verbose");
if (param >= 0) {
const int *verbose = NULL;
mca_base_var_get_value(param, &verbose, NULL, NULL);
if (verbose && verbose[0] > 0) {
cs->adapt_output = opal_output_open(NULL);
opal_output_set_verbosity(cs->adapt_output, verbose[0]);
}
}
opal_output_verbose(1, cs->adapt_output,
"coll:adapt:component_open: done!");
return OMPI_SUCCESS;
}
@ -88,8 +104,8 @@ static int adapt_open(void)
/* Shut down the component */
static int adapt_close(void)
{
mca_coll_adapt_ibcast_fini();
mca_coll_adapt_ireduce_fini();
ompi_coll_adapt_ibcast_fini();
ompi_coll_adapt_ireduce_fini();
return OMPI_SUCCESS;
}
@ -125,7 +141,7 @@ static int adapt_register(void)
opal_output_set_verbosity(cs->adapt_output, adapt_verbose);
cs->adapt_context_free_list_min = 10;
(void) mca_base_component_var_register(c, "context_free_list_max",
(void) mca_base_component_var_register(c, "context_free_list_min",
"Minimum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
@ -133,7 +149,7 @@ static int adapt_register(void)
&cs->adapt_context_free_list_min);
cs->adapt_context_free_list_max = 10000;
(void) mca_base_component_var_register(c, "context_free_list_min",
(void) mca_base_component_var_register(c, "context_free_list_max",
"Maximum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
@ -147,8 +163,8 @@ static int adapt_register(void)
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->adapt_context_free_list_inc);
mca_coll_adapt_ibcast_init();
mca_coll_adapt_ireduce_init();
ompi_coll_adapt_ibcast_init();
ompi_coll_adapt_ireduce_init();
return adapt_verify_mca_variables();
}

Просмотреть файл

@ -12,60 +12,58 @@
#include "ompi/mca/coll/coll.h"
#include "coll_adapt_context.h"
static void mca_coll_adapt_bcast_context_constructor(mca_coll_adapt_bcast_context_t * bcast_context)
static void ompi_coll_adapt_bcast_context_constructor(ompi_coll_adapt_bcast_context_t * bcast_context)
{
}
static void mca_coll_adapt_bcast_context_destructor(mca_coll_adapt_bcast_context_t * bcast_context)
static void ompi_coll_adapt_bcast_context_destructor(ompi_coll_adapt_bcast_context_t * bcast_context)
{
}
static void
mca_coll_adapt_constant_bcast_context_constructor(mca_coll_adapt_constant_bcast_context_t * con)
ompi_coll_adapt_constant_bcast_context_constructor(ompi_coll_adapt_constant_bcast_context_t * con)
{
}
static void mca_coll_adapt_constant_bcast_context_destructor(mca_coll_adapt_constant_bcast_context_t
static void ompi_coll_adapt_constant_bcast_context_destructor(ompi_coll_adapt_constant_bcast_context_t
* con)
{
}
OBJ_CLASS_INSTANCE(mca_coll_adapt_bcast_context_t, opal_free_list_item_t,
mca_coll_adapt_bcast_context_constructor,
mca_coll_adapt_bcast_context_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t,
ompi_coll_adapt_bcast_context_constructor,
ompi_coll_adapt_bcast_context_destructor);
OBJ_CLASS_INSTANCE(mca_coll_adapt_constant_bcast_context_t, opal_object_t,
mca_coll_adapt_constant_bcast_context_constructor,
mca_coll_adapt_constant_bcast_context_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_bcast_context_t, opal_object_t,
ompi_coll_adapt_constant_bcast_context_constructor,
ompi_coll_adapt_constant_bcast_context_destructor);
static void mca_coll_adapt_reduce_context_constructor(mca_coll_adapt_reduce_context_t *
static void ompi_coll_adapt_reduce_context_constructor(ompi_coll_adapt_reduce_context_t *
reduce_context)
{
}
static void mca_coll_adapt_reduce_context_destructor(mca_coll_adapt_reduce_context_t *
static void ompi_coll_adapt_reduce_context_destructor(ompi_coll_adapt_reduce_context_t *
reduce_context)
{
}
static void
mca_coll_adapt_constant_reduce_context_constructor(mca_coll_adapt_constant_reduce_context_t * con)
ompi_coll_adapt_constant_reduce_context_constructor(ompi_coll_adapt_constant_reduce_context_t * con)
{
}
static void
mca_coll_adapt_constant_reduce_context_destructor(mca_coll_adapt_constant_reduce_context_t * con)
ompi_coll_adapt_constant_reduce_context_destructor(ompi_coll_adapt_constant_reduce_context_t * con)
{
}
OBJ_CLASS_INSTANCE(mca_coll_adapt_reduce_context_t, opal_free_list_item_t,
mca_coll_adapt_reduce_context_constructor,
mca_coll_adapt_reduce_context_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t,
ompi_coll_adapt_reduce_context_constructor,
ompi_coll_adapt_reduce_context_destructor);
OBJ_CLASS_INSTANCE(mca_coll_adapt_constant_reduce_context_t, opal_object_t,
mca_coll_adapt_constant_reduce_context_constructor,
mca_coll_adapt_constant_reduce_context_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t,
ompi_coll_adapt_constant_reduce_context_constructor,
ompi_coll_adapt_constant_reduce_context_destructor);

Просмотреть файл

@ -19,7 +19,7 @@
#include "coll_adapt_inbuf.h"
/* Bcast constant context in bcast context */
struct mca_coll_adapt_constant_bcast_context_s {
struct ompi_coll_adapt_constant_bcast_context_s {
opal_object_t super;
int root;
size_t count;
@ -42,29 +42,29 @@ struct mca_coll_adapt_constant_bcast_context_s {
int ibcast_tag;
};
typedef struct mca_coll_adapt_constant_bcast_context_s mca_coll_adapt_constant_bcast_context_t;
typedef struct ompi_coll_adapt_constant_bcast_context_s ompi_coll_adapt_constant_bcast_context_t;
OBJ_CLASS_DECLARATION(mca_coll_adapt_constant_bcast_context_t);
OBJ_CLASS_DECLARATION(ompi_coll_adapt_constant_bcast_context_t);
/* Bcast context of each segment*/
typedef struct mca_coll_adapt_bcast_context_s mca_coll_adapt_bcast_context_t;
typedef struct ompi_coll_adapt_bcast_context_s ompi_coll_adapt_bcast_context_t;
typedef int (*mca_coll_adapt_bcast_cuda_callback_fn_t) (mca_coll_adapt_bcast_context_t * context);
typedef int (*ompi_coll_adapt_bcast_cuda_callback_fn_t) (ompi_coll_adapt_bcast_context_t * context);
struct mca_coll_adapt_bcast_context_s {
struct ompi_coll_adapt_bcast_context_s {
opal_free_list_item_t super;
char *buff;
int frag_id;
int child_id;
int peer;
mca_coll_adapt_constant_bcast_context_t *con;
ompi_coll_adapt_constant_bcast_context_t *con;
};
OBJ_CLASS_DECLARATION(mca_coll_adapt_bcast_context_t);
OBJ_CLASS_DECLARATION(ompi_coll_adapt_bcast_context_t);
/* Reduce constant context in reduce context */
struct mca_coll_adapt_constant_reduce_context_s {
struct ompi_coll_adapt_constant_reduce_context_s {
opal_object_t super;
size_t count;
size_t seg_count;
@ -81,7 +81,7 @@ struct mca_coll_adapt_constant_reduce_context_s {
/* Number of sent segments */
int32_t num_sent_segs;
/* Next seg need to be received for every children */
_Atomic int32_t *next_recv_segs;
opal_atomic_int32_t *next_recv_segs;
/* Mutex to protect recv_list */
opal_mutex_t *mutex_recv_list;
/* Mutex to protect num_recv_segs */
@ -95,12 +95,14 @@ struct mca_coll_adapt_constant_reduce_context_s {
ompi_coll_tree_t *tree;
/* Accumulate buff */
char **accumbuf;
/* inbuf list address of accumbuf */
ompi_coll_adapt_inbuf_t ** accumbuf_to_inbuf;
opal_free_list_t *inbuf_list;
/* A list to store the segments which are received and not yet be sent */
opal_list_t *recv_list;
ptrdiff_t lower_bound;
/* How many sends are posted but not finished */
_Atomic int32_t ongoing_send;
opal_atomic_int32_t ongoing_send;
char *sbuf;
char *rbuf;
int root;
@ -109,24 +111,24 @@ struct mca_coll_adapt_constant_reduce_context_s {
int ireduce_tag;
};
typedef struct mca_coll_adapt_constant_reduce_context_s mca_coll_adapt_constant_reduce_context_t;
typedef struct ompi_coll_adapt_constant_reduce_context_s ompi_coll_adapt_constant_reduce_context_t;
OBJ_CLASS_DECLARATION(mca_coll_adapt_constant_reduce_context_t);
OBJ_CLASS_DECLARATION(ompi_coll_adapt_constant_reduce_context_t);
/* Reduce context of each segment */
typedef struct mca_coll_adapt_reduce_context_s mca_coll_adapt_reduce_context_t;
typedef struct ompi_coll_adapt_reduce_context_s ompi_coll_adapt_reduce_context_t;
typedef int (*mca_coll_adapt_reduce_cuda_callback_fn_t) (mca_coll_adapt_reduce_context_t * context);
typedef int (*ompi_coll_adapt_reduce_cuda_callback_fn_t) (ompi_coll_adapt_reduce_context_t * context);
struct mca_coll_adapt_reduce_context_s {
struct ompi_coll_adapt_reduce_context_s {
opal_free_list_item_t super;
char *buff;
int frag_id;
int child_id;
int peer;
mca_coll_adapt_constant_reduce_context_t *con;
ompi_coll_adapt_constant_reduce_context_t *con;
/* store the incoming segment */
mca_coll_adapt_inbuf_t *inbuf;
ompi_coll_adapt_inbuf_t *inbuf;
};
OBJ_CLASS_DECLARATION(mca_coll_adapt_reduce_context_t);
OBJ_CLASS_DECLARATION(ompi_coll_adapt_reduce_context_t);

Просмотреть файл

@ -21,33 +21,35 @@
#include "ompi/mca/pml/ob1/pml_ob1.h"
typedef int (*mca_coll_adapt_ibcast_fn_t) (void *buff,
typedef int (*ompi_coll_adapt_ibcast_fn_t) (void *buff,
int count,
struct ompi_datatype_t * datatype,
int root,
struct ompi_communicator_t * comm,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
mca_coll_base_module_t * module,
int ibcast_tag);
static mca_coll_adapt_algorithm_index_t mca_coll_adapt_ibcast_algorithm_index[] = {
{1, (uintptr_t) mca_coll_adapt_ibcast_binomial},
{2, (uintptr_t) mca_coll_adapt_ibcast_in_order_binomial},
{3, (uintptr_t) mca_coll_adapt_ibcast_binary},
{4, (uintptr_t) mca_coll_adapt_ibcast_pipeline},
{5, (uintptr_t) mca_coll_adapt_ibcast_chain},
{6, (uintptr_t) mca_coll_adapt_ibcast_linear},
static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ibcast_algorithm_index[] = {
{0, (uintptr_t) ompi_coll_adapt_ibcast_tuned},
{1, (uintptr_t) ompi_coll_adapt_ibcast_binomial},
{2, (uintptr_t) ompi_coll_adapt_ibcast_in_order_binomial},
{3, (uintptr_t) ompi_coll_adapt_ibcast_binary},
{4, (uintptr_t) ompi_coll_adapt_ibcast_pipeline},
{5, (uintptr_t) ompi_coll_adapt_ibcast_chain},
{6, (uintptr_t) ompi_coll_adapt_ibcast_linear},
};
/*
* Set up MCA parameters of MPI_Bcast and MPI_IBcast
*/
int mca_coll_adapt_ibcast_init(void)
int ompi_coll_adapt_ibcast_init(void)
{
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
mca_coll_adapt_component.adapt_ibcast_algorithm = 1;
mca_base_component_var_register(c, "bcast_algorithm",
"Algorithm of broadcast, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
"Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_adapt_component.adapt_ibcast_algorithm);
@ -81,15 +83,15 @@ int mca_coll_adapt_ibcast_init(void)
}
/*
* Release the free list created in mca_coll_adapt_ibcast_generic
* Release the free list created in ompi_coll_adapt_ibcast_generic
*/
int mca_coll_adapt_ibcast_fini(void)
int ompi_coll_adapt_ibcast_fini(void)
{
if (NULL != mca_coll_adapt_component.adapt_ibcast_context_free_list) {
OBJ_RELEASE(mca_coll_adapt_component.adapt_ibcast_context_free_list);
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n"));
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n"));
}
return OMPI_SUCCESS;
}
@ -97,7 +99,7 @@ int mca_coll_adapt_ibcast_fini(void)
/*
* Finish a ibcast request
*/
static int ibcast_request_fini(mca_coll_adapt_bcast_context_t * context)
static int ibcast_request_fini(ompi_coll_adapt_bcast_context_t * context)
{
ompi_request_t *temp_req = context->con->request;
if (context->con->tree->tree_nextsize != 0) {
@ -121,8 +123,8 @@ static int ibcast_request_fini(mca_coll_adapt_bcast_context_t * context)
*/
static int send_cb(ompi_request_t * req)
{
mca_coll_adapt_bcast_context_t *context =
(mca_coll_adapt_bcast_context_t *) req->req_complete_cb_data;
ompi_coll_adapt_bcast_context_t *context =
(ompi_coll_adapt_bcast_context_t *) req->req_complete_cb_data;
int err;
@ -136,10 +138,11 @@ static int send_cb(ompi_request_t * req)
/* If the current process has fragments in recv_array can be sent */
if (sent_id < context->con->num_recv_segs) {
ompi_request_t *send_req;
ompi_coll_adapt_bcast_context_t *send_context;
opal_free_list_t *free_list;
int new_id = context->con->recv_array[sent_id];
mca_coll_adapt_bcast_context_t *send_context =
(mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ibcast_context_free_list);
free_list = mca_coll_adapt_component.adapt_ibcast_context_free_list;
send_context = (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(free_list);
send_context->buff =
context->buff + (new_id - context->frag_id) * context->con->real_seg_size;
send_context->frag_id = new_id;
@ -206,8 +209,8 @@ static int send_cb(ompi_request_t * req)
static int recv_cb(ompi_request_t * req)
{
/* Get necessary info from request */
mca_coll_adapt_bcast_context_t *context =
(mca_coll_adapt_bcast_context_t *) req->req_complete_cb_data;
ompi_coll_adapt_bcast_context_t *context =
(ompi_coll_adapt_bcast_context_t *) req->req_complete_cb_data;
int err, i;
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
@ -220,14 +223,15 @@ static int recv_cb(ompi_request_t * req)
int num_recv_segs_t = ++(context->con->num_recv_segs);
context->con->recv_array[num_recv_segs_t - 1] = context->frag_id;
opal_free_list_t *free_list;
int new_id = num_recv_segs_t + mca_coll_adapt_component.adapt_ibcast_max_recv_requests - 1;
/* Receive new segment */
if (new_id < context->con->num_segs) {
ompi_request_t *recv_req;
ompi_coll_adapt_bcast_context_t *recv_context;
free_list = mca_coll_adapt_component.adapt_ibcast_context_free_list;
/* Get new context item from free list */
mca_coll_adapt_bcast_context_t *recv_context =
(mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ibcast_context_free_list);
recv_context = (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(free_list);
recv_context->buff =
context->buff + (new_id - context->frag_id) * context->con->real_seg_size;
recv_context->frag_id = new_id;
@ -266,9 +270,9 @@ static int recv_cb(ompi_request_t * req)
send_count = context->con->count - context->frag_id * context->con->seg_count;
}
mca_coll_adapt_bcast_context_t *send_context =
(mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ibcast_context_free_list);
ompi_coll_adapt_bcast_context_t *send_context;
free_list = mca_coll_adapt_component.adapt_ibcast_context_free_list;
send_context = (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(free_list);
send_context->buff = context->buff;
send_context->frag_id = context->frag_id;
send_context->child_id = i;
@ -326,7 +330,7 @@ static int recv_cb(ompi_request_t * req)
return 1;
}
int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module)
{
@ -335,7 +339,7 @@ int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatyp
temp_request = OBJ_NEW(ompi_request_t);
OMPI_REQUEST_INIT(temp_request, false);
temp_request->req_type = 0;
temp_request->req_free = adapt_request_free;
temp_request->req_free = ompi_coll_adapt_request_free;
temp_request->req_status.MPI_SOURCE = 0;
temp_request->req_status.MPI_TAG = 0;
temp_request->req_status.MPI_ERROR = 0;
@ -356,9 +360,9 @@ int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatyp
}
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
ibcast_tag = ibcast_tag % 4096;
mca_coll_adapt_ibcast_fn_t bcast_func =
(mca_coll_adapt_ibcast_fn_t)
mca_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
ompi_coll_adapt_ibcast_fn_t bcast_func =
(ompi_coll_adapt_ibcast_fn_t)
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
algorithm_fn_ptr;
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
}
@ -367,72 +371,81 @@ int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatyp
/*
* Ibcast functions with different algorithms
*/
int mca_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t *module, int ibcast_tag)
{
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
return OMPI_SUCCESS;
}
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module,
int ibcast_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_bmtree(comm, root);
int err =
mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
mca_coll_adapt_component.adapt_ibcast_segment_size,
ibcast_tag);
return err;
}
int mca_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_in_order_bmtree(comm, root);
int err =
mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
mca_coll_adapt_component.adapt_ibcast_segment_size,
ibcast_tag);
return err;
}
int mca_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_tree(2, comm, root);
int err =
mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
mca_coll_adapt_component.adapt_ibcast_segment_size,
ibcast_tag);
return err;
}
int mca_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module,
int ibcast_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(1, comm, root);
int err =
mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
mca_coll_adapt_component.adapt_ibcast_segment_size,
ibcast_tag);
return err;
}
int mca_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(4, comm, root);
int err =
mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
mca_coll_adapt_component.adapt_ibcast_segment_size,
ibcast_tag);
return err;
}
int mca_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag)
{
@ -446,14 +459,14 @@ int mca_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *
tree = ompi_coll_base_topo_build_tree(MAXTREEFANOUT, comm, root);
}
int err =
mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree,
mca_coll_adapt_component.adapt_ibcast_segment_size,
ibcast_tag);
return err;
}
int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root,
int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
size_t seg_size, int ibcast_tag)
@ -494,9 +507,9 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
if (1 == context_free_list_enabled) {
mca_coll_adapt_component.adapt_ibcast_context_free_list = OBJ_NEW(opal_free_list_t);
opal_free_list_init(mca_coll_adapt_component.adapt_ibcast_context_free_list,
sizeof(mca_coll_adapt_bcast_context_t),
sizeof(ompi_coll_adapt_bcast_context_t),
opal_cache_line_size,
OBJ_CLASS(mca_coll_adapt_bcast_context_t),
OBJ_CLASS(ompi_coll_adapt_bcast_context_t),
0, opal_cache_line_size,
mca_coll_adapt_component.adapt_context_free_list_min,
mca_coll_adapt_component.adapt_context_free_list_max,
@ -510,7 +523,7 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
OMPI_REQUEST_INIT(temp_request, false);
temp_request->req_state = OMPI_REQUEST_ACTIVE;
temp_request->req_type = 0;
temp_request->req_free = adapt_request_free;
temp_request->req_free = ompi_coll_adapt_request_free;
temp_request->req_status.MPI_SOURCE = 0;
temp_request->req_status.MPI_TAG = 0;
temp_request->req_status.MPI_ERROR = 0;
@ -540,7 +553,7 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
}
/* Set constant context for send and recv call back */
mca_coll_adapt_constant_bcast_context_t *con = OBJ_NEW(mca_coll_adapt_constant_bcast_context_t);
ompi_coll_adapt_constant_bcast_context_t *con = OBJ_NEW(ompi_coll_adapt_constant_bcast_context_t);
con->root = root;
con->count = count;
con->seg_count = seg_count;
@ -582,7 +595,7 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
recv_array[i] = i;
}
con->num_recv_segs = num_segs;
/* Set send_array, will send adapt_ibcast_max_send_requests segments */
/* Set send_array, will send ompi_coll_adapt_ibcast_max_send_requests segments */
for (i = 0; i < tree->tree_nextsize; i++) {
send_array[i] = mca_coll_adapt_component.adapt_ibcast_max_send_requests;
}
@ -595,8 +608,8 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
send_count = count - i * seg_count;
}
for (j = 0; j < tree->tree_nextsize; j++) {
mca_coll_adapt_bcast_context_t *context =
(mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component.
ompi_coll_adapt_bcast_context_t *context =
(ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ibcast_context_free_list);
context->buff = (char *) buff + i * real_seg_size;
context->frag_id = i;
@ -656,8 +669,8 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
if (i == (num_segs - 1)) {
recv_count = count - i * seg_count;
}
mca_coll_adapt_bcast_context_t *context =
(mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component.
ompi_coll_adapt_bcast_context_t *context =
(ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ibcast_context_free_list);
context->buff = (char *) buff + i * real_seg_size;
context->frag_id = i;
@ -691,4 +704,4 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
"[%d]: End of Ibcast\n", rank));
return MPI_SUCCESS;
}
}

Просмотреть файл

@ -12,13 +12,13 @@
#include "coll_adapt.h"
#include "coll_adapt_inbuf.h"
static void mca_coll_adapt_inbuf_constructor(mca_coll_adapt_inbuf_t * inbuf)
static void ompi_coll_adapt_inbuf_constructor(ompi_coll_adapt_inbuf_t * inbuf)
{
}
static void mca_coll_adapt_inbuf_destructor(mca_coll_adapt_inbuf_t * inbuf)
static void ompi_coll_adapt_inbuf_destructor(ompi_coll_adapt_inbuf_t * inbuf)
{
}
OBJ_CLASS_INSTANCE(mca_coll_adapt_inbuf_t, opal_free_list_item_t, mca_coll_adapt_inbuf_constructor,
mca_coll_adapt_inbuf_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t, ompi_coll_adapt_inbuf_constructor,
ompi_coll_adapt_inbuf_destructor);

Просмотреть файл

@ -14,13 +14,13 @@
#include "opal/class/opal_free_list.h"
struct mca_coll_adapt_inbuf_s {
struct ompi_coll_adapt_inbuf_s {
opal_free_list_item_t super;
char buff[1];
};
typedef struct mca_coll_adapt_inbuf_s mca_coll_adapt_inbuf_t;
typedef struct ompi_coll_adapt_inbuf_s ompi_coll_adapt_inbuf_t;
OBJ_CLASS_DECLARATION(mca_coll_adapt_inbuf_t);
OBJ_CLASS_DECLARATION(ompi_coll_adapt_inbuf_t);
#endif /* MCA_COLL_ADAPT_INBUF_H */

Просмотреть файл

@ -24,7 +24,7 @@
/* MPI_Reduce and MPI_Ireduce in the ADAPT module only work for commutative operations */
typedef int (*mca_coll_adapt_ireduce_fn_t) (const void *sbuf,
typedef int (*ompi_coll_adapt_ireduce_fn_t) (const void *sbuf,
void *rbuf,
int count,
struct ompi_datatype_t * datatype,
@ -34,19 +34,20 @@ typedef int (*mca_coll_adapt_ireduce_fn_t) (const void *sbuf,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
static mca_coll_adapt_algorithm_index_t mca_coll_adapt_ireduce_algorithm_index[] = {
{1, (uintptr_t) mca_coll_adapt_ireduce_binomial},
{2, (uintptr_t) mca_coll_adapt_ireduce_in_order_binomial},
{3, (uintptr_t) mca_coll_adapt_ireduce_binary},
{4, (uintptr_t) mca_coll_adapt_ireduce_pipeline},
{5, (uintptr_t) mca_coll_adapt_ireduce_chain},
{6, (uintptr_t) mca_coll_adapt_ireduce_linear},
static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ireduce_algorithm_index[] = {
{0, (uintptr_t)ompi_coll_adapt_ireduce_tuned},
{1, (uintptr_t) ompi_coll_adapt_ireduce_binomial},
{2, (uintptr_t) ompi_coll_adapt_ireduce_in_order_binomial},
{3, (uintptr_t) ompi_coll_adapt_ireduce_binary},
{4, (uintptr_t) ompi_coll_adapt_ireduce_pipeline},
{5, (uintptr_t) ompi_coll_adapt_ireduce_chain},
{6, (uintptr_t) ompi_coll_adapt_ireduce_linear},
};
/*
* Set up MCA parameters of MPI_Reduce and MPI_Ireduce
*/
int mca_coll_adapt_ireduce_init(void)
int ompi_coll_adapt_ireduce_init(void)
{
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
@ -111,9 +112,9 @@ int mca_coll_adapt_ireduce_init(void)
}
/*
* Release the free list created in mca_coll_adapt_ireduce_generic
* Release the free list created in ompi_coll_adapt_ireduce_generic
*/
int mca_coll_adapt_ireduce_fini(void)
int ompi_coll_adapt_ireduce_fini(void)
{
if (NULL != mca_coll_adapt_component.adapt_ireduce_context_free_list) {
OBJ_RELEASE(mca_coll_adapt_component.adapt_ireduce_context_free_list);
@ -127,15 +128,15 @@ int mca_coll_adapt_ireduce_fini(void)
/*
* Functions to access list
*/
static mca_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_children)
static ompi_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_children)
{
mca_coll_adapt_item_t *item;
ompi_coll_adapt_item_t *item;
if (opal_list_is_empty(list)) {
return NULL;
}
for (item = (mca_coll_adapt_item_t *) opal_list_get_first(list);
item != (mca_coll_adapt_item_t *) opal_list_get_end(list);
item = (mca_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list);
item != (ompi_coll_adapt_item_t *) opal_list_get_end(list);
item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
if (item->count == num_children) {
opal_list_remove_item(list, (opal_list_item_t *) item);
return item;
@ -146,11 +147,11 @@ static mca_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_ch
static int add_to_list(opal_list_t * list, int id)
{
mca_coll_adapt_item_t *item;
ompi_coll_adapt_item_t *item;
int ret = 0;
for (item = (mca_coll_adapt_item_t *) opal_list_get_first(list);
item != (mca_coll_adapt_item_t *) opal_list_get_end(list);
item = (mca_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list);
item != (ompi_coll_adapt_item_t *) opal_list_get_end(list);
item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
if (item->id == id) {
(item->count)++;
ret = 1;
@ -158,7 +159,7 @@ static int add_to_list(opal_list_t * list, int id)
}
}
if (ret == 0) {
item = OBJ_NEW(mca_coll_adapt_item_t);
item = OBJ_NEW(ompi_coll_adapt_item_t);
item->id = id;
item->count = 1;
opal_list_append(list, (opal_list_item_t *) item);
@ -172,15 +173,15 @@ static int add_to_list(opal_list_t * list, int id)
/*
* Get the inbuf address
*/
static mca_coll_adapt_inbuf_t *to_inbuf(char *buf, int distance)
static ompi_coll_adapt_inbuf_t *to_inbuf(char *buf, int distance)
{
return (mca_coll_adapt_inbuf_t *) (buf - distance);
return (ompi_coll_adapt_inbuf_t *) (buf - distance);
}
/*
* Finish a ireduce request
*/
static int ireduce_request_fini(mca_coll_adapt_reduce_context_t * context)
static int ireduce_request_fini(ompi_coll_adapt_reduce_context_t * context)
{
/* Return the allocated recourses */
int i;
@ -227,8 +228,8 @@ static int ireduce_request_fini(mca_coll_adapt_reduce_context_t * context)
*/
static int send_cb(ompi_request_t * req)
{
mca_coll_adapt_reduce_context_t *context =
(mca_coll_adapt_reduce_context_t *) req->req_complete_cb_data;
ompi_coll_adapt_reduce_context_t *context =
(ompi_coll_adapt_reduce_context_t *) req->req_complete_cb_data;
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
"[%d]: ireduce_send_cb, peer %d, seg_id %d\n", context->con->rank,
context->peer, context->frag_id));
@ -238,14 +239,14 @@ static int send_cb(ompi_request_t * req)
/* Send a new segment */
OPAL_THREAD_LOCK(context->con->mutex_recv_list);
mca_coll_adapt_item_t *item =
ompi_coll_adapt_item_t *item =
get_next_ready_item(context->con->recv_list, context->con->tree->tree_nextsize);
OPAL_THREAD_UNLOCK(context->con->mutex_recv_list);
if (item != NULL) {
/* Get new context item from free list */
mca_coll_adapt_reduce_context_t *send_context =
(mca_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component.
ompi_coll_adapt_reduce_context_t *send_context =
(ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ireduce_context_free_list);
if (context->con->tree->tree_nextsize > 0) {
send_context->buff = context->con->accumbuf[item->id];
@ -316,8 +317,8 @@ static int send_cb(ompi_request_t * req)
*/
static int recv_cb(ompi_request_t * req)
{
mca_coll_adapt_reduce_context_t *context =
(mca_coll_adapt_reduce_context_t *) req->req_complete_cb_data;
ompi_coll_adapt_reduce_context_t *context =
(ompi_coll_adapt_reduce_context_t *) req->req_complete_cb_data;
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
"[%d]: ireduce_recv_cb, peer %d, seg_id %d\n", context->con->rank,
context->peer, context->frag_id));
@ -329,7 +330,7 @@ static int recv_cb(ompi_request_t * req)
/* Receive new segment */
if (new_id < context->con->num_segs) {
char *temp_recv_buf = NULL;
mca_coll_adapt_inbuf_t *inbuf = NULL;
ompi_coll_adapt_inbuf_t *inbuf = NULL;
/* Set inbuf, if it it first child, recv on rbuf, else recv on inbuf */
if (context->child_id == 0 && context->con->sbuf != MPI_IN_PLACE
&& context->con->root == context->con->rank) {
@ -339,12 +340,12 @@ static int recv_cb(ompi_request_t * req)
} else {
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
"[%d]: In recv_cb, alloc inbuf\n", context->con->rank));
inbuf = (mca_coll_adapt_inbuf_t *) opal_free_list_wait(context->con->inbuf_list);
inbuf = (ompi_coll_adapt_inbuf_t *) opal_free_list_wait(context->con->inbuf_list);
temp_recv_buf = inbuf->buff - context->con->lower_bound;
}
/* Get new context item from free list */
mca_coll_adapt_reduce_context_t *recv_context =
(mca_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component.
ompi_coll_adapt_reduce_context_t *recv_context =
(ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ireduce_context_free_list);
recv_context->buff = temp_recv_buf;
recv_context->frag_id = new_id;
@ -372,7 +373,7 @@ static int recv_cb(ompi_request_t * req)
if (MPI_SUCCESS != err) {
return err;
}
/* Invoke recvive call back */
/* Invoke receive call back */
ompi_request_set_callback(recv_req, recv_cb, recv_context);
}
@ -443,14 +444,14 @@ static int recv_cb(ompi_request_t * req)
if (context->con->rank != context->con->tree->tree_root
&& context->con->ongoing_send < mca_coll_adapt_component.adapt_ireduce_max_send_requests) {
OPAL_THREAD_LOCK(context->con->mutex_recv_list);
mca_coll_adapt_item_t *item =
ompi_coll_adapt_item_t *item =
get_next_ready_item(context->con->recv_list, context->con->tree->tree_nextsize);
OPAL_THREAD_UNLOCK(context->con->mutex_recv_list);
if (item != NULL) {
/* Gt new context item from free list */
mca_coll_adapt_reduce_context_t *send_context =
(mca_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component.
/* Get new context item from free list */
ompi_coll_adapt_reduce_context_t *send_context =
(ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component.
adapt_ireduce_context_free_list);
send_context->buff = context->con->accumbuf[context->frag_id];
send_context->frag_id = item->id;
@ -523,7 +524,7 @@ static int recv_cb(ompi_request_t * req)
return 1;
}
int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module)
{
@ -543,9 +544,9 @@ int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_
int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1);
ireduce_tag = (ireduce_tag % 4096) + 4096;
fflush(stdout);
mca_coll_adapt_ireduce_fn_t reduce_func =
(mca_coll_adapt_ireduce_fn_t)
mca_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
ompi_coll_adapt_ireduce_fn_t reduce_func =
(ompi_coll_adapt_ireduce_fn_t)
ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
adapt_ireduce_algorithm].algorithm_fn_ptr;
return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag);
}
@ -554,20 +555,30 @@ int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_
/*
* Ireduce functions with different algorithms
*/
int mca_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t *module, int ireduce_tag)
{
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
return OMPI_SUCCESS;
}
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_bmtree(comm, root);
int err =
mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
tree, mca_coll_adapt_component.adapt_ireduce_segment_size,
ireduce_tag);
return err;
}
int mca_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
@ -575,53 +586,53 @@ int mca_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int c
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_in_order_bmtree(comm, root);
int err =
mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
tree, mca_coll_adapt_component.adapt_ireduce_segment_size,
ireduce_tag);
return err;
}
int mca_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_tree(2, comm, root);
int err =
mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
tree, mca_coll_adapt_component.adapt_ireduce_segment_size,
ireduce_tag);
return err;
}
int mca_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(1, comm, root);
int err =
mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
tree, mca_coll_adapt_component.adapt_ireduce_segment_size,
ireduce_tag);
return err;
}
int mca_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag)
{
ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(4, comm, root);
int err =
mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
tree, mca_coll_adapt_component.adapt_ireduce_segment_size,
ireduce_tag);
return err;
}
int mca_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag)
@ -636,14 +647,14 @@ int mca_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
tree = ompi_coll_base_topo_build_tree(MAXTREEFANOUT, comm, root);
}
int err =
mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module,
tree, mca_coll_adapt_component.adapt_ireduce_segment_size,
ireduce_tag);
return err;
}
int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
@ -655,7 +666,7 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
size_t typelng;
int seg_count = count, num_segs, rank, recv_count, send_count, i, j, err, min, distance = 0;
int32_t seg_index;
_Atomic int *next_recv_segs = NULL;
opal_atomic_int_t *next_recv_segs = NULL;
/* Used to store the accumuate result, pointer to every segment */
char **accumbuf = NULL;
/* A free list contains all recv data */
@ -686,9 +697,9 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
if (1 == context_free_list_enabled) {
mca_coll_adapt_component.adapt_ireduce_context_free_list = OBJ_NEW(opal_free_list_t);
opal_free_list_init(mca_coll_adapt_component.adapt_ireduce_context_free_list,
sizeof(mca_coll_adapt_reduce_context_t),
sizeof(ompi_coll_adapt_reduce_context_t),
opal_cache_line_size,
OBJ_CLASS(mca_coll_adapt_reduce_context_t),
OBJ_CLASS(ompi_coll_adapt_reduce_context_t),
0, opal_cache_line_size,
mca_coll_adapt_component.adapt_context_free_list_min,
mca_coll_adapt_component.adapt_context_free_list_max,
@ -701,18 +712,18 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
if (tree->tree_nextsize > 0) {
inbuf_list = OBJ_NEW(opal_free_list_t);
opal_free_list_init(inbuf_list,
sizeof(mca_coll_adapt_inbuf_t) + real_seg_size,
sizeof(ompi_coll_adapt_inbuf_t) + real_seg_size,
opal_cache_line_size,
OBJ_CLASS(mca_coll_adapt_inbuf_t),
OBJ_CLASS(ompi_coll_adapt_inbuf_t),
0, opal_cache_line_size,
mca_coll_adapt_component.adapt_inbuf_free_list_min,
mca_coll_adapt_component.adapt_inbuf_free_list_max,
mca_coll_adapt_component.adapt_inbuf_free_list_inc,
NULL, 0, NULL, NULL, NULL);
/* Set up next_recv_segs */
next_recv_segs = (_Atomic int32_t *) malloc(sizeof(int32_t) * tree->tree_nextsize);
mca_coll_adapt_inbuf_t *temp_inbuf =
(mca_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list);
next_recv_segs = (opal_atomic_int32_t *) malloc(sizeof(int32_t) * tree->tree_nextsize);
ompi_coll_adapt_inbuf_t *temp_inbuf =
(ompi_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list);
distance = (char *) temp_inbuf->buff - lower_bound - (char *) temp_inbuf; //address of inbuf->buff to address of inbuf
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
"[%d]: distance %d, inbuf %p, inbuf->buff %p, inbuf->buff-lb %p, to_inbuf %p, inbuf_list %p\n",
@ -732,7 +743,7 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
OMPI_REQUEST_INIT(temp_request, false);
temp_request->req_state = OMPI_REQUEST_ACTIVE;
temp_request->req_type = 0;
temp_request->req_free = adapt_request_free;
temp_request->req_free = ompi_coll_adapt_request_free;
temp_request->req_status.MPI_SOURCE = 0;
temp_request->req_status.MPI_TAG = 0;
temp_request->req_status.MPI_ERROR = 0;
@ -752,8 +763,8 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
recv_list = OBJ_NEW(opal_list_t);
/* Set constant context for send and recv call back */
mca_coll_adapt_constant_reduce_context_t *con =
OBJ_NEW(mca_coll_adapt_constant_reduce_context_t);
ompi_coll_adapt_constant_reduce_context_t *con =
OBJ_NEW(ompi_coll_adapt_constant_reduce_context_t);
con->count = count;
con->seg_count = seg_count;
con->datatype = dtype;
@ -822,21 +833,21 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
recv_count = count - (ptrdiff_t) seg_count *(ptrdiff_t) seg_index;
}
char *temp_recv_buf = NULL;
mca_coll_adapt_inbuf_t *inbuf = NULL;
ompi_coll_adapt_inbuf_t *inbuf = NULL;
/* Set inbuf, if it it first child, recv on rbuf, else recv on inbuf */
if (i == 0 && sbuf != MPI_IN_PLACE && root == rank) {
temp_recv_buf =
(char *) rbuf + (ptrdiff_t) j *(ptrdiff_t) segment_increment;
} else {
inbuf = (mca_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list);
inbuf = (ompi_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list);
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
"[%d]: In ireduce, alloc inbuf %p\n", rank,
(void *) inbuf));
temp_recv_buf = inbuf->buff - lower_bound;
}
/* Get context */
mca_coll_adapt_reduce_context_t *context =
(mca_coll_adapt_reduce_context_t *)
ompi_coll_adapt_reduce_context_t *context =
(ompi_coll_adapt_reduce_context_t *)
opal_free_list_wait(mca_coll_adapt_component.
adapt_ireduce_context_free_list);
context->buff = temp_recv_buf;
@ -871,10 +882,10 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
/* Leaf nodes */
else {
mca_coll_adapt_item_t *item;
ompi_coll_adapt_item_t *item;
/* Set up recv_list */
for (seg_index = 0; seg_index < num_segs; seg_index++) {
item = OBJ_NEW(mca_coll_adapt_item_t);
item = OBJ_NEW(ompi_coll_adapt_item_t);
item->id = seg_index;
item->count = tree->tree_nextsize;
opal_list_append(recv_list, (opal_list_item_t *) item);
@ -894,8 +905,8 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
if (item->id == (num_segs - 1)) {
send_count = count - (ptrdiff_t) seg_count *(ptrdiff_t) item->id;
}
mca_coll_adapt_reduce_context_t *context =
(mca_coll_adapt_reduce_context_t *)
ompi_coll_adapt_reduce_context_t *context =
(ompi_coll_adapt_reduce_context_t *)
opal_free_list_wait(mca_coll_adapt_component.adapt_ireduce_context_free_list);
context->buff =
(char *) sbuf + (ptrdiff_t) item->id * (ptrdiff_t) segment_increment;

Просмотреть файл

@ -11,13 +11,13 @@
#include "coll_adapt_item.h"
static void mca_coll_adapt_item_constructor(mca_coll_adapt_item_t * item)
static void ompi_coll_adapt_item_constructor(ompi_coll_adapt_item_t * item)
{
}
static void mca_coll_adapt_item_destructor(mca_coll_adapt_item_t * item)
static void ompi_coll_adapt_item_destructor(ompi_coll_adapt_item_t * item)
{
}
OBJ_CLASS_INSTANCE(mca_coll_adapt_item_t, opal_list_item_t, mca_coll_adapt_item_constructor,
mca_coll_adapt_item_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t, ompi_coll_adapt_item_constructor,
ompi_coll_adapt_item_destructor);

Просмотреть файл

@ -12,7 +12,7 @@
#include "opal/class/opal_list.h"
#include "coll_adapt_inbuf.h"
struct mca_coll_adapt_item_s {
struct ompi_coll_adapt_item_s {
opal_list_item_t super;
/* Fragment id */
int id;
@ -20,6 +20,6 @@ struct mca_coll_adapt_item_s {
int count;
};
typedef struct mca_coll_adapt_item_s mca_coll_adapt_item_t;
typedef struct ompi_coll_adapt_item_s ompi_coll_adapt_item_t;
OBJ_CLASS_DECLARATION(mca_coll_adapt_item_t);
OBJ_CLASS_DECLARATION(ompi_coll_adapt_item_t);

Просмотреть файл

@ -14,17 +14,17 @@
#include <stdio.h>
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#endif /* HAVE_STRING_H */
#ifdef HAVE_SCHED_H
#include <sched.h>
#endif
#endif /* HAVE_SCHED_H */
#include <sys/types.h>
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif /* HAVE_SYS_MMAN_H */
#endif /* HAVE_SYS_MMAN_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#endif /* HAVE_UNISTD_H */
#include "mpi.h"
#include "opal_stdint.h"
@ -35,7 +35,6 @@
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/coll/base/coll_base_functions.h"
//#include "ompi/mca/rte/rte.h"
#include "ompi/proc/proc.h"
#include "coll_adapt.h"
@ -47,29 +46,37 @@
/*
* Local functions
*/
static int adapt_module_enable(mca_coll_base_module_t * module, struct ompi_communicator_t *comm);
/*
* Module constructor
*/
static void mca_coll_adapt_module_construct(mca_coll_adapt_module_t * module)
static void adapt_module_construct(mca_coll_adapt_module_t * module)
{
module->enabled = false;
module->adapt_component = &mca_coll_adapt_component;
module->adapt_enabled = false;
}
/*
* Module destructor
*/
static void mca_coll_adapt_module_destruct(mca_coll_adapt_module_t * module)
static void adapt_module_destruct(mca_coll_adapt_module_t * module)
{
module->enabled = false;
module->adapt_enabled = false;
}
OBJ_CLASS_INSTANCE(mca_coll_adapt_module_t,
mca_coll_base_module_t,
mca_coll_adapt_module_construct, mca_coll_adapt_module_destruct);
mca_coll_base_module_t,
adapt_module_construct,
adapt_module_destruct);
/*
* Init module on the communicator
*/
static int adapt_module_enable(mca_coll_base_module_t * module,
struct ompi_communicator_t *comm)
{
return OMPI_SUCCESS;
}
/*
* Initial query function that is invoked during MPI_INIT, allowing
@ -77,34 +84,37 @@ OBJ_CLASS_INSTANCE(mca_coll_adapt_module_t,
* required level of thread support. This function is invoked exactly
* once.
*/
int mca_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads)
int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads)
{
return OMPI_SUCCESS;
}
/*
* Invoked when there's a new communicator that has been created.
* Look at the communicator and decide which set of functions and
* priority we want to return.
*/
mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t * comm, int *priority)
mca_coll_base_module_t *ompi_coll_adapt_comm_query(struct ompi_communicator_t * comm,
int *priority)
{
mca_coll_adapt_module_t *adapt_module;
/* If we're intercomm, or if there's only one process in the communicator */
if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm)) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:adapt:comm_query (%d/%s): intercomm, comm is too small; disqualifying myself",
"coll:adapt:comm_query (%d/%s): intercomm, "
"comm is too small; disqualifying myself",
comm->c_contextid, comm->c_name);
return NULL;
}
/* Get the priority level attached to this module. If priority is less than or equal to 0, then the module is unavailable. */
/* Get the priority level attached to this module.
If priority is less than or equal to 0, then the module is unavailable. */
*priority = mca_coll_adapt_component.adapt_priority;
if (mca_coll_adapt_component.adapt_priority <= 0) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:adapt:comm_query (%d/%s): priority too low; disqualifying myself",
"coll:adapt:comm_query (%d/%s): priority too low; "
"disqualifying myself",
comm->c_contextid, comm->c_name);
return NULL;
}
@ -123,17 +133,17 @@ mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t * c
adapt_module->super.coll_alltoall = NULL;
adapt_module->super.coll_alltoallw = NULL;
adapt_module->super.coll_barrier = NULL;
adapt_module->super.coll_bcast = mca_coll_adapt_bcast;
adapt_module->super.coll_bcast = ompi_coll_adapt_bcast;
adapt_module->super.coll_exscan = NULL;
adapt_module->super.coll_gather = NULL;
adapt_module->super.coll_gatherv = NULL;
adapt_module->super.coll_reduce = mca_coll_adapt_reduce;
adapt_module->super.coll_reduce = ompi_coll_adapt_reduce;
adapt_module->super.coll_reduce_scatter = NULL;
adapt_module->super.coll_scan = NULL;
adapt_module->super.coll_scatter = NULL;
adapt_module->super.coll_scatterv = NULL;
adapt_module->super.coll_ibcast = mca_coll_adapt_ibcast;
adapt_module->super.coll_ireduce = mca_coll_adapt_ireduce;
adapt_module->super.coll_ibcast = ompi_coll_adapt_ibcast;
adapt_module->super.coll_ireduce = ompi_coll_adapt_ireduce;
adapt_module->super.coll_iallreduce = NULL;
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
@ -143,17 +153,9 @@ mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t * c
}
/*
* Init module on the communicator
* Free ADAPT request
*/
static int adapt_module_enable(mca_coll_base_module_t * module, struct ompi_communicator_t *comm)
{
return OMPI_SUCCESS;
}
/*
* Free ADAPT request
*/
int adapt_request_free(ompi_request_t ** request)
int ompi_coll_adapt_request_free(ompi_request_t ** request)
{
(*request)->req_state = OMPI_REQUEST_INVALID;
OBJ_RELEASE(*request);

Просмотреть файл

@ -13,7 +13,7 @@
#include "coll_adapt_algorithms.h"
/* MPI_Reduce and MPI_Ireduce in the ADAPT module only work for commutative operations */
int mca_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t * module)
{
@ -22,7 +22,7 @@ int mca_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_d
} else {
ompi_request_t *request;
int err =
mca_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
}