1
1

Fix the atomic management of the bcast and reduce freelist

API consistent with other collective modules
Add comments
Other minor cleanups.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2020-05-07 14:42:02 -04:00 коммит произвёл Jeff Squyres
родитель a4be3bb93d
Коммит d71264569e
16 изменённых файлов: 185 добавлений и 329 удалений

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2017 The University of Tennessee and The University
* Copyright (c) 2004-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -187,10 +187,14 @@ struct ompi_communicator_t {
/* Collectives module interface and data */
mca_coll_base_comm_coll_t *c_coll;
/* Non-blocking collective tag */
_Atomic int32_t c_ibcast_tag;
_Atomic int32_t c_ireduce_tag;
/* Non-blocking collective tag. These are added here as they should be
* shared between all non-blocking collective modules (to avoid message
* collisions between them in the case where multiple outstanding
* non-blocking collective coexists using multiple backends).
*/
opal_atomic_int32_t c_ibcast_tag;
opal_atomic_int32_t c_ireduce_tag;
};
typedef struct ompi_communicator_t ompi_communicator_t;

Просмотреть файл

@ -1,5 +1,5 @@
#
# Copyright (c) 2014 The University of Tennessee and The University
# Copyright (c) 2014-2020 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# $COPYRIGHT$
@ -13,11 +13,11 @@
sources = \
coll_adapt_component.c \
coll_adapt_module.c \
coll_adapt_bcast.c \
coll_adapt_bcast.c \
coll_adapt_ibcast.c \
coll_adapt_reduce.c \
coll_adapt_ireduce.c \
coll_adapt.h \
coll_adapt.h \
coll_adapt_algorithms.h \
coll_adapt_context.h \
coll_adapt_context.c \

Просмотреть файл

@ -38,8 +38,9 @@ typedef struct mca_coll_adapt_component_t {
/* MCA parameter: Priority of this component */
int adapt_priority;
/* MCA parameter: Output verbose level */
/* MCA parameter: Output stream and verbose level */
int adapt_output;
int adapt_verbose;
/* MCA parameter: Maximum number of segment in context free list */
int adapt_context_free_list_max;
@ -57,7 +58,6 @@ typedef struct mca_coll_adapt_component_t {
int adapt_ibcast_max_recv_requests;
/* Bcast free list */
opal_free_list_t *adapt_ibcast_context_free_list;
opal_atomic_int32_t adapt_ibcast_context_free_list_enabled;
/* Reduce MCA parameter */
int adapt_ireduce_algorithm;
@ -70,7 +70,6 @@ typedef struct mca_coll_adapt_component_t {
/* Reduce free list */
opal_free_list_t *adapt_ireduce_context_free_list;
opal_atomic_int32_t adapt_ireduce_context_free_list_enabled;
} mca_coll_adapt_component_t;
@ -91,7 +90,7 @@ OMPI_MODULE_DECLSPEC extern mca_coll_adapt_component_t mca_coll_adapt_component;
int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads);
mca_coll_base_module_t * ompi_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority);
/* Free ADAPT quest */
/* ADAPT request free */
int ompi_coll_adapt_request_free(ompi_request_t **request);
#endif /* MCA_COLL_ADAPT_EXPORT_H */

Просмотреть файл

@ -20,82 +20,45 @@ typedef struct ompi_coll_adapt_algorithm_index_s {
} ompi_coll_adapt_algorithm_index_t;
/* Bcast */
int ompi_coll_adapt_ibcast_init(void);
int ompi_coll_adapt_ibcast_register(void);
int ompi_coll_adapt_ibcast_fini(void);
int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module);
int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
size_t seg_size, int ibcast_tag);
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module,
int ibcast_tag);
int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module,
int ibcast_tag);
int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ibcast_tag);
int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t *module, int ibcast_tag);
int ompi_coll_adapt_bcast(BCAST_ARGS);
int ompi_coll_adapt_ibcast(IBCAST_ARGS);
int ompi_coll_adapt_ibcast_generic(IBCAST_ARGS,
ompi_coll_tree_t * tree, size_t seg_size, int ibcast_tag);
int ompi_coll_adapt_ibcast_binomial(IBCAST_ARGS,
int ibcast_tag);
int ompi_coll_adapt_ibcast_in_order_binomial(IBCAST_ARGS,
int ibcast_tag);
int ompi_coll_adapt_ibcast_binary(IBCAST_ARGS,
int ibcast_tag);
int ompi_coll_adapt_ibcast_pipeline(IBCAST_ARGS,
int ibcast_tag);
int ompi_coll_adapt_ibcast_chain(IBCAST_ARGS,
int ibcast_tag);
int ompi_coll_adapt_ibcast_linear(IBCAST_ARGS,
int ibcast_tag);
int ompi_coll_adapt_ibcast_tuned(IBCAST_ARGS,
int ibcast_tag);
/* Reduce */
int ompi_coll_adapt_ireduce_init(void);
int ompi_coll_adapt_ireduce_register(void);
int ompi_coll_adapt_ireduce_fini(void);
int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);
int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module);
int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
size_t seg_size, int ireduce_tag);
int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t *module, int ireduce_tag);
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm,
ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module, int ireduce_tag);
int ompi_coll_adapt_reduce(REDUCE_ARGS);
int ompi_coll_adapt_ireduce(IREDUCE_ARGS);
int ompi_coll_adapt_ireduce_generic(IREDUCE_ARGS,
ompi_coll_tree_t * tree, size_t seg_size, int ireduce_tag);
int ompi_coll_adapt_ireduce_tuned(IREDUCE_ARGS,
int ireduce_tag);
int ompi_coll_adapt_ireduce_binomial(IREDUCE_ARGS,
int ireduce_tag);
int ompi_coll_adapt_ireduce_in_order_binomial(IREDUCE_ARGS,
int ireduce_tag);
int ompi_coll_adapt_ireduce_binary(IREDUCE_ARGS,
int ireduce_tag);
int ompi_coll_adapt_ireduce_pipeline(IREDUCE_ARGS,
int ireduce_tag);
int ompi_coll_adapt_ireduce_chain(IREDUCE_ARGS,
int ireduce_tag);
int ompi_coll_adapt_ireduce_linear(IREDUCE_ARGS,
int ireduce_tag);

Просмотреть файл

@ -17,10 +17,13 @@ int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatyp
{
if (count == 0) {
return MPI_SUCCESS;
} else {
ompi_request_t *request;
int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
}
ompi_request_t *request = NULL;
int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module);
if( MPI_SUCCESS != err ) {
if( NULL == request )
return err;
}
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
}

Просмотреть файл

@ -65,11 +65,10 @@ mca_coll_adapt_component_t mca_coll_adapt_component = {
/* adapt-component specific information */
/* (default) priority */
0,
0, /* (default) priority */
/* (default) verbose level */
0,
0, /* (default) output stream */
0, /* (default) verbose level */
/* default values for non-MCA parameters */
/* Not specifying values here gives us all 0's */
@ -78,25 +77,13 @@ mca_coll_adapt_component_t mca_coll_adapt_component = {
/* Open the component */
static int adapt_open(void)
{
int param;
mca_coll_adapt_component_t *cs = &mca_coll_adapt_component;
/*
* Get the global coll verbosity: it will be ours
*/
param = mca_base_var_find("ompi", "coll", "base", "verbose");
if (param >= 0) {
const int *verbose = NULL;
mca_base_var_get_value(param, &verbose, NULL, NULL);
if (verbose && verbose[0] > 0) {
cs->adapt_output = opal_output_open(NULL);
opal_output_set_verbosity(cs->adapt_output, verbose[0]);
}
if (cs->adapt_verbose > 0) {
cs->adapt_output = opal_output_open(NULL);
opal_output_set_verbosity(cs->adapt_output, cs->adapt_verbose);
}
opal_output_verbose(1, cs->adapt_output,
"coll:adapt:component_open: done!");
return OMPI_SUCCESS;
}
@ -131,16 +118,14 @@ static int adapt_register(void)
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority);
int adapt_verbose = 0;
cs->adapt_verbose = ompi_coll_base_framework.framework_verbose;
(void) mca_base_component_var_register(c, "verbose",
"Verbose level",
"Verbose level (default set to the collective framework verbosity)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &adapt_verbose);
cs->adapt_output = opal_output_open(NULL);
opal_output_set_verbosity(cs->adapt_output, adapt_verbose);
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_verbose);
cs->adapt_context_free_list_min = 10;
cs->adapt_context_free_list_min = 64;
(void) mca_base_component_var_register(c, "context_free_list_min",
"Minimum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@ -148,7 +133,7 @@ static int adapt_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&cs->adapt_context_free_list_min);
cs->adapt_context_free_list_max = 10000;
cs->adapt_context_free_list_max = 1024;
(void) mca_base_component_var_register(c, "context_free_list_max",
"Maximum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@ -156,15 +141,15 @@ static int adapt_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&cs->adapt_context_free_list_max);
cs->adapt_context_free_list_inc = 10;
cs->adapt_context_free_list_inc = 32;
(void) mca_base_component_var_register(c, "context_free_list_inc",
"Increasement number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->adapt_context_free_list_inc);
ompi_coll_adapt_ibcast_init();
ompi_coll_adapt_ireduce_init();
ompi_coll_adapt_ibcast_register();
ompi_coll_adapt_ireduce_register();
return adapt_verify_mca_variables();
}

Просмотреть файл

@ -12,58 +12,15 @@
#include "ompi/mca/coll/coll.h"
#include "coll_adapt_context.h"
static void ompi_coll_adapt_bcast_context_constructor(ompi_coll_adapt_bcast_context_t * bcast_context)
{
}
static void ompi_coll_adapt_bcast_context_destructor(ompi_coll_adapt_bcast_context_t * bcast_context)
{
}
static void
ompi_coll_adapt_constant_bcast_context_constructor(ompi_coll_adapt_constant_bcast_context_t * con)
{
}
static void ompi_coll_adapt_constant_bcast_context_destructor(ompi_coll_adapt_constant_bcast_context_t
* con)
{
}
OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t,
ompi_coll_adapt_bcast_context_constructor,
ompi_coll_adapt_bcast_context_destructor);
NULL, NULL);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_bcast_context_t, opal_object_t,
ompi_coll_adapt_constant_bcast_context_constructor,
ompi_coll_adapt_constant_bcast_context_destructor);
static void ompi_coll_adapt_reduce_context_constructor(ompi_coll_adapt_reduce_context_t *
reduce_context)
{
}
static void ompi_coll_adapt_reduce_context_destructor(ompi_coll_adapt_reduce_context_t *
reduce_context)
{
}
static void
ompi_coll_adapt_constant_reduce_context_constructor(ompi_coll_adapt_constant_reduce_context_t * con)
{
}
static void
ompi_coll_adapt_constant_reduce_context_destructor(ompi_coll_adapt_constant_reduce_context_t * con)
{
}
NULL, NULL);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t,
ompi_coll_adapt_reduce_context_constructor,
ompi_coll_adapt_reduce_context_destructor);
NULL, NULL);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t,
ompi_coll_adapt_constant_reduce_context_constructor,
ompi_coll_adapt_constant_reduce_context_destructor);
NULL, NULL);

Просмотреть файл

@ -43,7 +43,7 @@ static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ibcast_algorithm_index[
/*
* Set up MCA parameters of MPI_Bcast and MPI_IBcast
*/
int ompi_coll_adapt_ibcast_init(void)
int ompi_coll_adapt_ibcast_register(void)
{
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
@ -78,7 +78,6 @@ int ompi_coll_adapt_ibcast_init(void)
&mca_coll_adapt_component.adapt_ibcast_max_recv_requests);
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0;
return OMPI_SUCCESS;
}
@ -90,7 +89,6 @@ int ompi_coll_adapt_ibcast_fini(void)
if (NULL != mca_coll_adapt_component.adapt_ibcast_context_free_list) {
OBJ_RELEASE(mca_coll_adapt_component.adapt_ibcast_context_free_list);
mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n"));
}
return OMPI_SUCCESS;
@ -179,7 +177,6 @@ static int send_cb(ompi_request_t * req)
int num_sent = ++(context->con->num_sent_segs);
int num_recv_fini_t = context->con->num_recv_fini;
int rank = ompi_comm_rank(context->con->comm);
opal_mutex_t *mutex_temp = context->con->mutex;
/* Check whether signal the condition */
if ((rank == context->con->root
&& num_sent == context->con->tree->tree_nextsize * context->con->num_segs)
@ -190,13 +187,13 @@ static int send_cb(ompi_request_t * req)
context->con->num_segs)) {
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in send\n",
ompi_comm_rank(context->con->comm)));
OPAL_THREAD_UNLOCK(mutex_temp);
OPAL_THREAD_UNLOCK(context->con->mutex);
ibcast_request_fini(context);
} else {
OBJ_RELEASE(context->con);
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
(opal_free_list_item_t *) context);
OPAL_THREAD_UNLOCK(mutex_temp);
OPAL_THREAD_UNLOCK(context->con->mutex);
}
req->req_free(&req);
/* Call back function return 1, which means successful */
@ -306,7 +303,6 @@ static int recv_cb(ompi_request_t * req)
int num_sent = context->con->num_sent_segs;
int num_recv_fini_t = ++(context->con->num_recv_fini);
int rank = ompi_comm_rank(context->con->comm);
opal_mutex_t *mutex_temp = context->con->mutex;
/* If this process is leaf and has received all the segments */
if ((rank == context->con->root
@ -318,13 +314,13 @@ static int recv_cb(ompi_request_t * req)
context->con->num_segs)) {
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: Singal in recv\n",
ompi_comm_rank(context->con->comm)));
OPAL_THREAD_UNLOCK(mutex_temp);
OPAL_THREAD_UNLOCK(context->con->mutex);
ibcast_request_fini(context);
} else {
OBJ_RELEASE(context->con);
opal_free_list_return(mca_coll_adapt_component.adapt_ibcast_context_free_list,
(opal_free_list_item_t *) context);
OPAL_THREAD_UNLOCK(mutex_temp);
OPAL_THREAD_UNLOCK(context->con->mutex);
}
req->req_free(&req);
return 1;
@ -334,9 +330,8 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
struct ompi_communicator_t *comm, ompi_request_t ** request,
mca_coll_base_module_t * module)
{
if (count == 0) {
ompi_request_t *temp_request;
temp_request = OBJ_NEW(ompi_request_t);
if (0 == count) {
ompi_request_t *temp_request = OBJ_NEW(ompi_request_t);
OMPI_REQUEST_INIT(temp_request, false);
temp_request->req_type = 0;
temp_request->req_free = ompi_coll_adapt_request_free;
@ -348,24 +343,22 @@ int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *dataty
ompi_request_complete(temp_request, 1);
*request = temp_request;
return MPI_SUCCESS;
} else {
int rank = ompi_comm_rank(comm);
if (rank == root) {
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ibcast root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
root, mca_coll_adapt_component.adapt_ibcast_algorithm,
mca_coll_adapt_component.adapt_ibcast_segment_size,
mca_coll_adapt_component.adapt_ibcast_max_send_requests,
mca_coll_adapt_component.adapt_ibcast_max_recv_requests));
}
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
ibcast_tag = ibcast_tag % 4096;
ompi_coll_adapt_ibcast_fn_t bcast_func =
(ompi_coll_adapt_ibcast_fn_t)
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
algorithm_fn_ptr;
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
}
int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1);
ibcast_tag = ibcast_tag % 4096;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ibcast tag %d root %d, algorithm %d, coll_adapt_ibcast_segment_size %zu, coll_adapt_ibcast_max_send_requests %d, coll_adapt_ibcast_max_recv_requests %d\n",
ibcast_tag, root, mca_coll_adapt_component.adapt_ibcast_algorithm,
mca_coll_adapt_component.adapt_ibcast_segment_size,
mca_coll_adapt_component.adapt_ibcast_max_send_requests,
mca_coll_adapt_component.adapt_ibcast_max_recv_requests));
ompi_coll_adapt_ibcast_fn_t bcast_func =
(ompi_coll_adapt_ibcast_fn_t)
ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm].
algorithm_fn_ptr;
return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag);
}
/*
@ -377,7 +370,7 @@ int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *
mca_coll_base_module_t *module, int ibcast_tag)
{
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
return OMPI_SUCCESS;
return OMPI_ERR_NOT_IMPLEMENTED;
}
int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype,
@ -471,12 +464,7 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
mca_coll_base_module_t * module, ompi_coll_tree_t * tree,
size_t seg_size, int ibcast_tag)
{
/* Tempory variables for iteration */
int i, j;
/* Rank of this process */
int rank;
/* Record return value */
int err;
int i, j, rank, err;
/* The min of num_segs and SEND_NUM or RECV_NUM, in case the num_segs is less than SEND_NUM or RECV_NUM */
int min;
@ -498,23 +486,21 @@ int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t
/* Record how many isends have been issued for every child */
int *send_array = NULL;
/* Set up free list */
if (0 == mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled) {
int32_t context_free_list_enabled =
opal_atomic_add_fetch_32(&
(mca_coll_adapt_component.
adapt_ibcast_context_free_list_enabled), 1);
if (1 == context_free_list_enabled) {
mca_coll_adapt_component.adapt_ibcast_context_free_list = OBJ_NEW(opal_free_list_t);
opal_free_list_init(mca_coll_adapt_component.adapt_ibcast_context_free_list,
sizeof(ompi_coll_adapt_bcast_context_t),
opal_cache_line_size,
OBJ_CLASS(ompi_coll_adapt_bcast_context_t),
0, opal_cache_line_size,
mca_coll_adapt_component.adapt_context_free_list_min,
mca_coll_adapt_component.adapt_context_free_list_max,
mca_coll_adapt_component.adapt_context_free_list_inc,
NULL, 0, NULL, NULL, NULL);
/* Atomically set up free list */
if (NULL == mca_coll_adapt_component.adapt_ibcast_context_free_list) {
opal_free_list_t* fl = OBJ_NEW(opal_free_list_t);
opal_free_list_init(fl,
sizeof(ompi_coll_adapt_bcast_context_t),
opal_cache_line_size,
OBJ_CLASS(ompi_coll_adapt_bcast_context_t),
0, opal_cache_line_size,
mca_coll_adapt_component.adapt_context_free_list_min,
mca_coll_adapt_component.adapt_context_free_list_max,
mca_coll_adapt_component.adapt_context_free_list_inc,
NULL, 0, NULL, NULL, NULL);
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *)&mca_coll_adapt_component.adapt_ibcast_context_free_list,
&(intptr_t){0}, fl) ) {
OBJ_RELEASE(fl);
}
}

Просмотреть файл

@ -12,13 +12,5 @@
#include "coll_adapt.h"
#include "coll_adapt_inbuf.h"
static void ompi_coll_adapt_inbuf_constructor(ompi_coll_adapt_inbuf_t * inbuf)
{
}
static void ompi_coll_adapt_inbuf_destructor(ompi_coll_adapt_inbuf_t * inbuf)
{
}
OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t, ompi_coll_adapt_inbuf_constructor,
ompi_coll_adapt_inbuf_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t,
NULL, NULL);

Просмотреть файл

@ -47,7 +47,7 @@ static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ireduce_algorithm_index
/*
* Set up MCA parameters of MPI_Reduce and MPI_Ireduce
*/
int ompi_coll_adapt_ireduce_init(void)
int ompi_coll_adapt_ireduce_register(void)
{
mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version;
@ -107,7 +107,6 @@ int ompi_coll_adapt_ireduce_init(void)
&mca_coll_adapt_component.adapt_inbuf_free_list_inc);
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled = 0;
return OMPI_SUCCESS;
}
@ -119,14 +118,13 @@ int ompi_coll_adapt_ireduce_fini(void)
if (NULL != mca_coll_adapt_component.adapt_ireduce_context_free_list) {
OBJ_RELEASE(mca_coll_adapt_component.adapt_ireduce_context_free_list);
mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled = 0;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ireduce fini\n"));
}
return OMPI_SUCCESS;
}
/*
* Functions to access list
* Functions to access list
*/
static ompi_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_children)
{
@ -148,26 +146,22 @@ static ompi_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_c
static int add_to_list(opal_list_t * list, int id)
{
ompi_coll_adapt_item_t *item;
int ret = 0;
for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list);
item != (ompi_coll_adapt_item_t *) opal_list_get_end(list);
item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) {
if (item->id == id) {
(item->count)++;
ret = 1;
break;
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return 1\n"));
return 1;
}
}
if (ret == 0) {
item = OBJ_NEW(ompi_coll_adapt_item_t);
item->id = id;
item->count = 1;
opal_list_append(list, (opal_list_item_t *) item);
ret = 2;
}
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return %d\n",
ret));
return ret;
/* Add a new object to the list with count set to 1 */
item = OBJ_NEW(ompi_coll_adapt_item_t);
item->id = id;
item->count = 1;
opal_list_append(list, (opal_list_item_t *) item);
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "add_to_list_return 1\n"));
return 2;
}
/*
@ -250,7 +244,6 @@ static int send_cb(ompi_request_t * req)
adapt_ireduce_context_free_list);
if (context->con->tree->tree_nextsize > 0) {
send_context->buff = context->con->accumbuf[item->id];
} else {
send_context->buff =
context->buff + (item->id - context->frag_id) * context->con->segment_increment;
@ -530,26 +523,22 @@ int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi
{
if (count == 0) {
return MPI_SUCCESS;
} else {
int rank = ompi_comm_rank(comm);
if (rank == root) {
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ireduce root %d, algorithm %d, coll_adapt_ireduce_segment_size %zu, coll_adapt_ireduce_max_send_requests %d, coll_adapt_ireduce_max_recv_requests %d\n",
root, mca_coll_adapt_component.adapt_ireduce_algorithm,
mca_coll_adapt_component.adapt_ireduce_segment_size,
mca_coll_adapt_component.adapt_ireduce_max_send_requests,
mca_coll_adapt_component.adapt_ireduce_max_recv_requests));
}
/* Get ireduce tag */
int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1);
ireduce_tag = (ireduce_tag % 4096) + 4096;
fflush(stdout);
ompi_coll_adapt_ireduce_fn_t reduce_func =
(ompi_coll_adapt_ireduce_fn_t)
ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
adapt_ireduce_algorithm].algorithm_fn_ptr;
return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag);
}
int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1);
ireduce_tag = (ireduce_tag % 4096) + 4096;
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ireduce tag %d root %d, algorithm %d, coll_adapt_ireduce_segment_size %zu, coll_adapt_ireduce_max_send_requests %d, coll_adapt_ireduce_max_recv_requests %d\n",
ireduce_tag, root, mca_coll_adapt_component.adapt_ireduce_algorithm,
mca_coll_adapt_component.adapt_ireduce_segment_size,
mca_coll_adapt_component.adapt_ireduce_max_send_requests,
mca_coll_adapt_component.adapt_ireduce_max_recv_requests));
ompi_coll_adapt_ireduce_fn_t reduce_func =
(ompi_coll_adapt_ireduce_fn_t)
ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component.
adapt_ireduce_algorithm].algorithm_fn_ptr;
return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag);
}
/*
@ -562,7 +551,7 @@ int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count,
mca_coll_base_module_t *module, int ireduce_tag)
{
OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n"));
return OMPI_SUCCESS;
return OMPI_ERR_NOT_IMPLEMENTED;
}
int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count,
@ -688,23 +677,21 @@ int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count,
ompi_datatype_get_true_extent(dtype, &true_lower_bound, &true_extent);
real_seg_size = true_extent + (ptrdiff_t) (seg_count - 1) * extent;
/* Set up free list */
if (0 == mca_coll_adapt_component.adapt_ireduce_context_free_list_enabled) {
int32_t context_free_list_enabled =
opal_atomic_add_fetch_32(&
(mca_coll_adapt_component.
adapt_ireduce_context_free_list_enabled), 1);
if (1 == context_free_list_enabled) {
mca_coll_adapt_component.adapt_ireduce_context_free_list = OBJ_NEW(opal_free_list_t);
opal_free_list_init(mca_coll_adapt_component.adapt_ireduce_context_free_list,
sizeof(ompi_coll_adapt_reduce_context_t),
opal_cache_line_size,
OBJ_CLASS(ompi_coll_adapt_reduce_context_t),
0, opal_cache_line_size,
mca_coll_adapt_component.adapt_context_free_list_min,
mca_coll_adapt_component.adapt_context_free_list_max,
mca_coll_adapt_component.adapt_context_free_list_inc,
NULL, 0, NULL, NULL, NULL);
/* Atomically set up free list */
if (NULL == mca_coll_adapt_component.adapt_ireduce_context_free_list) {
opal_free_list_t* fl = OBJ_NEW(opal_free_list_t);
opal_free_list_init(fl,
sizeof(ompi_coll_adapt_reduce_context_t),
opal_cache_line_size,
OBJ_CLASS(ompi_coll_adapt_reduce_context_t),
0, opal_cache_line_size,
mca_coll_adapt_component.adapt_context_free_list_min,
mca_coll_adapt_component.adapt_context_free_list_max,
mca_coll_adapt_component.adapt_context_free_list_inc,
NULL, 0, NULL, NULL, NULL);
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *)&mca_coll_adapt_component.adapt_ireduce_context_free_list,
&(intptr_t){0}, fl) ) {
OBJ_RELEASE(fl);
}
}

Просмотреть файл

@ -11,13 +11,5 @@
#include "coll_adapt_item.h"
static void ompi_coll_adapt_item_constructor(ompi_coll_adapt_item_t * item)
{
}
static void ompi_coll_adapt_item_destructor(ompi_coll_adapt_item_t * item)
{
}
OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t, ompi_coll_adapt_item_constructor,
ompi_coll_adapt_item_destructor);
OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t,
NULL, NULL);

Просмотреть файл

@ -16,7 +16,7 @@ struct ompi_coll_adapt_item_s {
opal_list_item_t super;
/* Fragment id */
int id;
/* The number of children which have received the current segment from */
/* The number of children which have received the current segment */
int count;
};

Просмотреть файл

@ -19,11 +19,13 @@ int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_
{
if (count == 0) {
return MPI_SUCCESS;
} else {
ompi_request_t *request;
int err =
ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
}
ompi_request_t *request = NULL;
int err = ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
if( MPI_SUCCESS != err ) {
if( NULL == request )
return err;
}
ompi_request_wait(&request, MPI_STATUS_IGNORE);
return err;
}

Просмотреть файл

@ -492,10 +492,6 @@ struct mca_coll_base_comm_t {
/* in-order binary tree (root of the in-order binary tree is rank 0) */
ompi_coll_tree_t *cached_in_order_bintree;
/* linear */
ompi_coll_tree_t *cached_linear;
int cached_linear_root;
};
typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);

Просмотреть файл

@ -215,17 +215,8 @@ static int tuned_open(void)
int rc;
#if OPAL_ENABLE_DEBUG
{
int param;
param = mca_base_var_find("ompi", "coll", "base", "verbose");
if (param >= 0) {
const int *verbose = NULL;
mca_base_var_get_value(param, &verbose, NULL, NULL);
if (verbose && verbose[0] > 0) {
ompi_coll_tuned_stream = opal_output_open(NULL);
}
}
if (ompi_coll_base_framework.framework_verbose) {
ompi_coll_tuned_stream = opal_output_open(NULL);
}
#endif /* OPAL_ENABLE_DEBUG */

Просмотреть файл

@ -436,13 +436,14 @@ static inline void ompi_request_wait_completion(ompi_request_t *req)
static inline int ompi_request_complete(ompi_request_t* request, bool with_signal)
{
int rc = 0;
if(NULL != request->req_complete_cb) {
ompi_request_complete_fn_t temp = request->req_complete_cb;
/* Set the request cb to NULL to allow resetting in the callback */
ompi_request_complete_fn_t fct = request->req_complete_cb;
request->req_complete_cb = NULL;
rc = temp( request );
rc = fct( request );
}
if (0 == rc) {
if( OPAL_LIKELY(with_signal) ) {
void *_tmp_ptr = REQUEST_PENDING;
@ -454,11 +455,10 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
if( REQUEST_PENDING != tmp_sync )
wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR);
}
} else {
} else
request->req_complete = REQUEST_COMPLETED;
}
}
return OMPI_SUCCESS;
}
@ -468,14 +468,13 @@ static inline int ompi_request_set_callback(ompi_request_t* request,
{
request->req_complete_cb_data = cb_data;
request->req_complete_cb = cb;
int rc = 0;
/* If request is completed and the callback is not called, need to call callback */
if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) {
ompi_request_complete_fn_t temp = request->req_complete_cb;
ompi_request_complete_fn_t fct = request->req_complete_cb;
request->req_complete_cb = NULL;
rc = temp( request );
return fct( request );
}
return rc;
return OMPI_SUCCESS;
}
END_C_DECLS