Merge pull request #999 from tkordenbrock/topic/add.triggered.gather
coll-portals4: add gather and igather implementations that use Portals4 triggered operations
Этот коммит содержится в:
Коммит
f60c506c68
@ -15,6 +15,7 @@ local_sources = \
|
||||
coll_portals4_barrier.c \
|
||||
coll_portals4_bcast.c \
|
||||
coll_portals4_reduce.c \
|
||||
coll_portals4_gather.c \
|
||||
coll_portals4_request.h \
|
||||
coll_portals4_request.c
|
||||
|
||||
|
@ -24,13 +24,18 @@
|
||||
#include "ompi/datatype/ompi_datatype_internal.h"
|
||||
#include "ompi/op/op.h"
|
||||
#include "ompi/mca/mca.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/base/base.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
|
||||
|
||||
#include "ompi/mca/mtl/portals4/mtl_portals4.h"
|
||||
|
||||
#define MAXTREEFANOUT 32
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define COLL_PORTALS4_NO_OP ((ptl_op_t)-1)
|
||||
@ -61,10 +66,27 @@ struct mca_coll_portals4_component_t {
|
||||
|
||||
ptl_ni_limits_t ni_limits;
|
||||
|
||||
int use_binomial_gather_algorithm;
|
||||
|
||||
};
|
||||
typedef struct mca_coll_portals4_component_t mca_coll_portals4_component_t;
|
||||
OMPI_MODULE_DECLSPEC extern mca_coll_portals4_component_t mca_coll_portals4_component;
|
||||
|
||||
|
||||
/*
|
||||
* Borrowed with thanks from the coll-tuned component, then modified for Portals4.
|
||||
*/
|
||||
typedef struct ompi_coll_portals4_tree_t {
|
||||
int32_t tree_root;
|
||||
int32_t tree_fanout;
|
||||
int32_t tree_bmtree;
|
||||
int32_t tree_prev;
|
||||
int32_t tree_next[MAXTREEFANOUT];
|
||||
int32_t tree_nextsize;
|
||||
int32_t tree_numdescendants;
|
||||
} ompi_coll_portals4_tree_t;
|
||||
|
||||
|
||||
struct mca_coll_portals4_module_t {
|
||||
mca_coll_base_module_t super;
|
||||
size_t coll_count;
|
||||
@ -79,6 +101,13 @@ struct mca_coll_portals4_module_t {
|
||||
mca_coll_base_module_t *previous_allreduce_module;
|
||||
mca_coll_base_module_iallreduce_fn_t previous_iallreduce;
|
||||
mca_coll_base_module_t *previous_iallreduce_module;
|
||||
|
||||
/* binomial tree */
|
||||
ompi_coll_portals4_tree_t *cached_in_order_bmtree;
|
||||
int cached_in_order_bmtree_root;
|
||||
|
||||
size_t barrier_count;
|
||||
size_t gather_count;
|
||||
};
|
||||
typedef struct mca_coll_portals4_module_t mca_coll_portals4_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_portals4_module_t);
|
||||
@ -135,6 +164,22 @@ int
|
||||
opal_stderr(const char *msg, const char *file,
|
||||
const int line, const int ret);
|
||||
|
||||
/*
|
||||
* Borrowed with thanks from the coll-tuned component.
|
||||
*/
|
||||
#define COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, PORTALS4_MODULE, ROOT ) \
|
||||
do { \
|
||||
if( !( ((PORTALS4_MODULE)->cached_in_order_bmtree) \
|
||||
&& ((PORTALS4_MODULE)->cached_in_order_bmtree_root == (ROOT)) ) ) { \
|
||||
if( (PORTALS4_MODULE)->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
|
||||
ompi_coll_portals4_destroy_tree( &((PORTALS4_MODULE)->cached_in_order_bmtree) ); \
|
||||
} \
|
||||
(PORTALS4_MODULE)->cached_in_order_bmtree = ompi_coll_portals4_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
|
||||
(PORTALS4_MODULE)->cached_in_order_bmtree_root = (ROOT); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
int ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
|
||||
@ -177,6 +222,20 @@ int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int
|
||||
int
|
||||
ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *request);
|
||||
|
||||
int ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
ompi_request_t **request,
|
||||
mca_coll_base_module_t *module);
|
||||
int ompi_coll_portals4_igather_intra_fini(struct ompi_coll_portals4_request_t *request);
|
||||
|
||||
|
||||
static inline ptl_process_t
|
||||
ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank)
|
||||
{
|
||||
@ -357,6 +416,43 @@ void get_k_ary_tree(const unsigned int k_ary,
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ompi_coll_portals4_create_recv_converter (opal_convertor_t *converter,
|
||||
void *target,
|
||||
ompi_proc_t *proc,
|
||||
int count,
|
||||
ompi_datatype_t *datatype)
|
||||
{
|
||||
/* create converter */
|
||||
OBJ_CONSTRUCT(converter, opal_convertor_t);
|
||||
|
||||
/* initialize converter */
|
||||
opal_convertor_copy_and_prepare_for_recv(proc->super.proc_convertor,
|
||||
&datatype->super,
|
||||
count,
|
||||
target,
|
||||
0,
|
||||
converter);
|
||||
}
|
||||
|
||||
static inline void
|
||||
ompi_coll_portals4_create_send_converter (opal_convertor_t *converter,
|
||||
const void *source,
|
||||
ompi_proc_t *proc,
|
||||
int count,
|
||||
ompi_datatype_t *datatype)
|
||||
{
|
||||
OBJ_CONSTRUCT(converter, opal_convertor_t);
|
||||
|
||||
opal_convertor_copy_and_prepare_for_send(proc->super.proc_convertor,
|
||||
&datatype->super,
|
||||
count,
|
||||
source,
|
||||
0,
|
||||
converter);
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_COLL_PORTALS4_EXPORT_H */
|
||||
|
@ -203,6 +203,14 @@ portals4_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_portals4_priority);
|
||||
|
||||
mca_coll_portals4_component.use_binomial_gather_algorithm = 0;
|
||||
(void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "use_binomial_gather_algorithm",
|
||||
"if 1 use a binomial tree algorithm for gather, otherwise use linear",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_portals4_component.use_binomial_gather_algorithm);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -463,7 +471,7 @@ portals4_init_query(bool enable_progress_threads,
|
||||
__FILE__, __LINE__, ret);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length));
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%lx\n", md.start, md.length));
|
||||
|
||||
/* setup finish ack ME */
|
||||
me.start = NULL;
|
||||
@ -472,7 +480,7 @@ portals4_init_query(bool enable_progress_threads,
|
||||
me.min_free = 0;
|
||||
me.uid = mca_coll_portals4_component.uid;
|
||||
me.options = PTL_ME_OP_PUT |
|
||||
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
me.match_bits = 0;
|
||||
@ -584,6 +592,12 @@ portals4_comm_query(struct ompi_communicator_t *comm,
|
||||
portals4_module->super.coll_barrier = ompi_coll_portals4_barrier_intra;
|
||||
portals4_module->super.coll_ibarrier = ompi_coll_portals4_ibarrier_intra;
|
||||
|
||||
portals4_module->super.coll_gather = ompi_coll_portals4_gather_intra;
|
||||
portals4_module->super.coll_igather = ompi_coll_portals4_igather_intra;
|
||||
|
||||
portals4_module->cached_in_order_bmtree=NULL;
|
||||
portals4_module->cached_in_order_bmtree_root=-1;
|
||||
|
||||
portals4_module->super.coll_bcast = ompi_coll_portals4_bcast_intra;
|
||||
portals4_module->super.coll_ibcast = ompi_coll_portals4_ibcast_intra;
|
||||
|
||||
@ -593,6 +607,9 @@ portals4_comm_query(struct ompi_communicator_t *comm,
|
||||
portals4_module->super.coll_reduce = ompi_coll_portals4_reduce_intra;
|
||||
portals4_module->super.coll_ireduce = ompi_coll_portals4_ireduce_intra;
|
||||
|
||||
portals4_module->barrier_count = 0;
|
||||
portals4_module->gather_count = 0;
|
||||
|
||||
return &(portals4_module->super);
|
||||
}
|
||||
|
||||
@ -689,9 +706,11 @@ portals4_progress(void)
|
||||
ompi_coll_portals4_iallreduce_intra_fini(ptl_request);
|
||||
break;
|
||||
case OMPI_COLL_PORTALS4_TYPE_SCATTER:
|
||||
case OMPI_COLL_PORTALS4_TYPE_GATHER:
|
||||
opal_output(ompi_coll_base_framework.framework_output,
|
||||
"allreduce is not supported yet\n");
|
||||
"scatter is not supported yet\n");
|
||||
break;
|
||||
case OMPI_COLL_PORTALS4_TYPE_GATHER:
|
||||
ompi_coll_portals4_igather_intra_fini(ptl_request);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
1326
ompi/mca/coll/portals4/coll_portals4_gather.c
Обычный файл
1326
ompi/mca/coll/portals4/coll_portals4_gather.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -83,6 +83,46 @@ struct ompi_coll_portals4_request_t {
|
||||
ptl_handle_ct_t ack_ct_h;
|
||||
} allreduce;
|
||||
|
||||
struct {
|
||||
opal_convertor_t send_converter;
|
||||
opal_convertor_t recv_converter;
|
||||
size_t packed_size;
|
||||
int8_t is_sync;
|
||||
int8_t free_after;
|
||||
size_t coll_count;
|
||||
char *gather_buf;
|
||||
uint64_t gather_bytes;
|
||||
ptl_match_bits_t gather_match_bits;
|
||||
ptl_handle_md_t gather_mdh;
|
||||
ptl_size_t gather_offset;
|
||||
ptl_handle_ct_t gather_cth;
|
||||
ptl_handle_md_t gather_meh;
|
||||
ptl_match_bits_t sync_match_bits;
|
||||
ptl_handle_md_t sync_mdh;
|
||||
ptl_handle_ct_t sync_cth;
|
||||
ptl_handle_me_t sync_meh;
|
||||
int my_rank;
|
||||
int root_rank;
|
||||
int size;
|
||||
const void *sbuf;
|
||||
void *rbuf;
|
||||
const char *pack_src_buf;
|
||||
int pack_src_count;
|
||||
struct ompi_datatype_t *pack_src_dtype;
|
||||
MPI_Aint pack_src_extent;
|
||||
MPI_Aint pack_src_true_extent;
|
||||
MPI_Aint pack_src_lb;
|
||||
MPI_Aint pack_src_true_lb;
|
||||
MPI_Aint pack_src_offset;
|
||||
uint64_t unpack_bytes;
|
||||
char *unpack_dst_buf;
|
||||
int unpack_dst_count;
|
||||
struct ompi_datatype_t *unpack_dst_dtype;
|
||||
MPI_Aint unpack_dst_extent;
|
||||
MPI_Aint unpack_dst_true_extent;
|
||||
MPI_Aint unpack_dst_lb;
|
||||
MPI_Aint unpack_dst_true_lb;
|
||||
} gather;
|
||||
} u;
|
||||
};
|
||||
typedef struct ompi_coll_portals4_request_t ompi_coll_portals4_request_t;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user