The SM2 collective component has not been updated in a long
time. Rich, the original developer, agrees with this removal. This commit was SVN r25368.
Этот коммит содержится в:
родитель
e887d595c7
Коммит
72f731f25f
@ -1,2 +0,0 @@
|
||||
rg6
|
||||
rlgraham
|
@ -1,57 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
not_used_yet =
|
||||
|
||||
sources = \
|
||||
coll_sm2.h \
|
||||
coll_sm2_component.c \
|
||||
coll_sm2_module.c \
|
||||
coll_sm2_bcast.c \
|
||||
coll_sm2_reduce.c \
|
||||
coll_sm2_allreduce.c \
|
||||
coll_sm2_barrier.c \
|
||||
coll_sm2_service.c
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
component_noinst =
|
||||
component_install =
|
||||
if MCA_BUILD_ompi_coll_sm2_DSO
|
||||
component_install += mca_coll_sm2.la
|
||||
else
|
||||
component_noinst += libmca_coll_sm2.la
|
||||
endif
|
||||
|
||||
# See ompi/mca/btl/sm/Makefile.am for an explanation of
|
||||
# libmca_common_sm.la.
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_coll_sm2_la_SOURCES = $(sources)
|
||||
mca_coll_sm2_la_LDFLAGS = -module -avoid-version
|
||||
mca_coll_sm2_la_LIBADD = \
|
||||
$(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_coll_sm2_la_SOURCES =$(sources)
|
||||
libmca_coll_sm2_la_LDFLAGS = -module -avoid-version
|
@ -1,690 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file */
|
||||
|
||||
#ifndef MCA_COLL_SM2_EXPORT_H
|
||||
#define MCA_COLL_SM2_EXPORT_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/mca/common/sm/common_sm_mmap.h"
|
||||
#include "ompi/request/request.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#ifdef HAVE_SCHED_YIELD
|
||||
# include <sched.h>
|
||||
# define SPIN sched_yield()
|
||||
#elif defined(__WINDOWS__)
|
||||
# define SPIN SwitchToThread()
|
||||
#else /* no switch available */
|
||||
# define SPIN
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Memory Management
|
||||
* - All memory allocation will be done on a per-communictor basis
|
||||
* - At least two banks of memory will be used
|
||||
* - Each bank of memory will have M buffers (or segments)
|
||||
* - These buffers will be used in a cirucular buffer order
|
||||
* - Each buffer will be contigous in virtual memory, and will have page-aligned
|
||||
* regions belonging to each process in the communicator
|
||||
* - The memory associated with each process will have a control region, and
|
||||
* a data region.
|
||||
* - First touch will be used to enforce memory locality, and thus relies on
|
||||
* processor affinity to be set.
|
||||
* - A non-blocking collective will be issued when all buffers in a bank have
|
||||
* been used. This will be completed before this bank is re-used.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Structure to hold the sm coll component. First it holds the
|
||||
* base coll component, and then holds a bunch of
|
||||
* sm-coll-component-specific stuff (e.g., current MCA param
|
||||
* values).
|
||||
*/
|
||||
struct mca_coll_sm2_component_t {
|
||||
/** Base coll component */
|
||||
mca_coll_base_component_2_0_0_t super;
|
||||
|
||||
/** MCA parameter: Priority of this component */
|
||||
int sm2_priority;
|
||||
|
||||
/** MCA parameter: control region size (bytes), per proc */
|
||||
size_t sm2_ctl_size_per_proc;
|
||||
|
||||
/** MCA parameter: control region size (bytes) actually allocated - per proc*/
|
||||
size_t sm2_ctl_size_allocated;
|
||||
|
||||
/** MCA parameter: control region alignment */
|
||||
size_t sm2_ctl_alignment;
|
||||
|
||||
/** MCA parameter: Max data Segment size */
|
||||
size_t sm2_max_data_seg_size;
|
||||
|
||||
/** MCA parameter: Min data Segment size */
|
||||
size_t sm2_data_seg_size;
|
||||
|
||||
/** MCA parameter: control data size (bytes) actually allocated - per proc*/
|
||||
size_t sm2_data_size_allocated;
|
||||
|
||||
/** MCA parameter: data region alignment */
|
||||
int sm2_data_alignment;
|
||||
|
||||
/** MCA parameter: number of memory banks */
|
||||
int sm2_num_mem_banks;
|
||||
|
||||
/** MCA parameter: number of regions per memory bank */
|
||||
int sm2_num_regions_per_bank;
|
||||
|
||||
/** MCA parameter: order of buffer management barrier tree */
|
||||
int order_barrier_tree;
|
||||
|
||||
/** MCA parameter: order of reduction tree */
|
||||
int order_reduction_tree;
|
||||
|
||||
/** MCA parameter: order of fan-out read tree */
|
||||
int order_fanout_read_tree;
|
||||
|
||||
/** MCA paramenter: number of polling loops to run while waiting
|
||||
* for children or parent to complete their work
|
||||
*/
|
||||
int n_poll_loops;
|
||||
|
||||
/** MCA parameter: message size cutoff for switching between
|
||||
* short and long protocol
|
||||
*/
|
||||
size_t short_message_size;
|
||||
|
||||
/*
|
||||
* Parameters to control methods used
|
||||
*/
|
||||
/** MCA parameter: method to force a given barrier method to be used.
|
||||
* 0 - FANIN_FAN_OUT_BARRIER_FN
|
||||
* 1 - RECURSIVE_DOUBLING_BARRIER_FN
|
||||
*/
|
||||
int force_barrier;
|
||||
|
||||
/** MCA parameter: method to force a given reduce method to be used.
|
||||
* 0 - FANIN_FAN_OUT_REDUCE_FN
|
||||
* 1 - REDUCE_SCATTER_GATHER_FN
|
||||
*/
|
||||
int force_reduce;
|
||||
|
||||
/** MCA parameter: method to force a given allreduce method to be used.
|
||||
* 0 - FANIN_FANOUT_ALLREDUCE_FN
|
||||
* 1 - REDUCE_SCATTER_ALLGATHER_FN
|
||||
*/
|
||||
int force_allreduce;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t;
|
||||
|
||||
/*
|
||||
* Implemented function index list
|
||||
*/
|
||||
|
||||
/* barrier */
|
||||
enum{
|
||||
FANIN_FAN_OUT_BARRIER_FN,
|
||||
RECURSIVE_DOUBLING_BARRIER_FN,
|
||||
N_BARRIER_FNS
|
||||
};
|
||||
|
||||
/* reduce */
|
||||
enum{
|
||||
FANIN_REDUCE_FN,
|
||||
REDUCE_SCATTER_GATHER_FN,
|
||||
N_REDUCE_FNS
|
||||
};
|
||||
enum{
|
||||
SHORT_DATA_FN_REDUCE,
|
||||
LONG_DATA_FN_REDUCE,
|
||||
N_REDUCE_FNS_USED
|
||||
};
|
||||
|
||||
/* all-reduce */
|
||||
enum{
|
||||
FANIN_FANOUT_ALLREDUCE_FN,
|
||||
REDUCE_SCATTER_ALLGATHER_FN,
|
||||
N_ALLREDUCE_FNS
|
||||
};
|
||||
enum{
|
||||
SHORT_DATA_FN_ALLREDUCE,
|
||||
LONG_DATA_FN_ALLREDUCE,
|
||||
N_ALLREDUCE_FNS_USED
|
||||
};
|
||||
|
||||
|
||||
/* enum for node type */
|
||||
enum{
|
||||
ROOT_NODE,
|
||||
LEAF_NODE,
|
||||
INTERIOR_NODE
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* N-order tree node description
|
||||
*/
|
||||
struct tree_node_t {
|
||||
/* my rank within the group */
|
||||
int my_rank;
|
||||
/* my node type - root, leaf, or interior */
|
||||
int my_node_type;
|
||||
/* number of nodes in the tree */
|
||||
int tree_size;
|
||||
/* number of parents (0/1) */
|
||||
int n_parents;
|
||||
/* number of children */
|
||||
int n_children;
|
||||
/* parent rank within the group */
|
||||
int parent_rank;
|
||||
/* chidren ranks within the group */
|
||||
int *children_ranks;
|
||||
};
|
||||
typedef struct tree_node_t tree_node_t;
|
||||
|
||||
/*
|
||||
* Pair-wise data exchange
|
||||
*/
|
||||
/* enum for node type */
|
||||
enum{
|
||||
EXCHANGE_NODE,
|
||||
EXTRA_NODE
|
||||
};
|
||||
|
||||
struct pair_exchange_node_t {
|
||||
|
||||
/* number of nodes this node will exchange data with */
|
||||
int n_exchanges;
|
||||
|
||||
/* ranks of nodes involved in data exchnge */
|
||||
int *rank_exchanges;
|
||||
|
||||
/* number of extra sources of data - outside largest power of 2 in
|
||||
* this group */
|
||||
int n_extra_sources;
|
||||
|
||||
/* rank of the extra source */
|
||||
int rank_extra_source;
|
||||
|
||||
/* number of tags needed per stripe */
|
||||
int n_tags;
|
||||
|
||||
/* log 2 of largest full power of 2 for this node set */
|
||||
int log_2;
|
||||
|
||||
/* largest power of 2 that fits in this group */
|
||||
int n_largest_pow_2;
|
||||
|
||||
/* node type */
|
||||
int node_type;
|
||||
|
||||
};
|
||||
typedef struct pair_exchange_node_t pair_exchange_node_t;
|
||||
|
||||
/*
|
||||
* Barrier request objects
|
||||
*/
|
||||
|
||||
/* shared memory data strucutures */
|
||||
struct mca_coll_sm2_nb_request_process_shared_mem_t {
|
||||
/* flag used to indicate the status of this memory region */
|
||||
volatile long long flag;
|
||||
volatile long long index;
|
||||
|
||||
/* pading */
|
||||
/* Note: need to change this so it takes less memory */
|
||||
char padding[2*opal_cache_line_size-2*sizeof(long long)];
|
||||
};
|
||||
|
||||
typedef struct mca_coll_sm2_nb_request_process_shared_mem_t
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t;
|
||||
|
||||
/* enum for phase at which the nb barrier is in */
|
||||
enum{
|
||||
NB_BARRIER_INACTIVE,
|
||||
NB_BARRIER_FAN_IN,
|
||||
NB_BARRIER_FAN_OUT,
|
||||
/* done and not started are the same for all practicle
|
||||
* purposes, as the init funtion always sets this flag
|
||||
*/
|
||||
NB_BARRIER_DONE
|
||||
};
|
||||
|
||||
/* forward declartion */
|
||||
struct mca_coll_sm2_module_t;
|
||||
|
||||
/*
|
||||
* shared memory region descriptor
|
||||
*/
|
||||
struct sm_memory_region_desc_t {
|
||||
|
||||
/* pointer to control structures */
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *control_region;
|
||||
|
||||
/* pointer to data segment, and lower half of data segment */
|
||||
volatile char *data_segment;
|
||||
|
||||
};
|
||||
typedef struct sm_memory_region_desc_t sm_memory_region_desc_t;
|
||||
|
||||
/*
|
||||
* Shared memory buffer management strcucture
|
||||
*/
|
||||
struct sm_work_buffer_t {
|
||||
/* pointer to segment base */
|
||||
volatile char * base_segment_address;
|
||||
|
||||
/* description of how the memory segment is mapped on
|
||||
* a per process basis
|
||||
*/
|
||||
sm_memory_region_desc_t *proc_memory;
|
||||
|
||||
/*
|
||||
* bank index
|
||||
*/
|
||||
int bank_index;
|
||||
|
||||
/*
|
||||
* first buffer in the bank - if the barrier corresponding to
|
||||
* this bank is active when trying to allocate this buffer,
|
||||
* can't proceed until it complete
|
||||
*/
|
||||
int index_first_buffer_in_bank;
|
||||
|
||||
/* last buffer in the bank - nb barrier is started after this
|
||||
* buffer is freed.
|
||||
*/
|
||||
int index_last_buffer_in_bank;
|
||||
};
|
||||
typedef struct sm_work_buffer_t sm_work_buffer_t;
|
||||
|
||||
/* process private barrier request object */
|
||||
struct mca_coll_sm2_nb_request_process_private_mem_t {
|
||||
struct ompi_request_t super;
|
||||
/* tag that will be used as unique barrier identifier */
|
||||
long long tag;
|
||||
|
||||
/* barrier phase */
|
||||
int sm2_barrier_phase;
|
||||
|
||||
/* shared memory strucuture index - will be flip-flopping between structures */
|
||||
int sm_index;
|
||||
|
||||
/* this processes base address of the barrier shared memory region */
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *barrier_base_address[2];
|
||||
|
||||
/* module pointer */
|
||||
struct mca_coll_sm2_module_t *coll_sm2_module;
|
||||
|
||||
};
|
||||
typedef struct mca_coll_sm2_nb_request_process_private_mem_t
|
||||
mca_coll_sm2_nb_request_process_private_mem_t;
|
||||
|
||||
/* debug */
|
||||
#define BARRIER_BANK_LIST_SIZE 32
|
||||
/* end debug */
|
||||
struct mca_coll_sm2_module_t {
|
||||
/* base structure */
|
||||
mca_coll_base_module_t super;
|
||||
|
||||
/* size */
|
||||
int comm_size;
|
||||
|
||||
/* Shared Memory file name */
|
||||
char *coll_sm2_file_name;
|
||||
|
||||
/* size of shared memory backing file */
|
||||
size_t size_sm2_backing_file;
|
||||
|
||||
/* Memory pointer to shared file */
|
||||
char *shared_memory_region;
|
||||
|
||||
/* size of memory banks control regions */
|
||||
size_t size_mem_banks_ctl_region;
|
||||
|
||||
/* Pointer to the collective buffers */
|
||||
char *collective_buffer_region;
|
||||
|
||||
/* size of collective buffer region */
|
||||
size_t size_of_collective_buffer_region;
|
||||
|
||||
/* pointer to memory for blocking collectives */
|
||||
char *sm_blocking_barrier_region;
|
||||
|
||||
/* size of memory for blocking collectives */
|
||||
size_t size_of_blocking_barrier_region;
|
||||
|
||||
/* per proc size of memory for blocking collectives */
|
||||
size_t per_proc_size_of_blocking_barrier_region;
|
||||
|
||||
/* index of blocking barrier memory region to use */
|
||||
int index_blocking_barrier_memory_bank;
|
||||
|
||||
/* pointers to blocking memory control regions */
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t ***ctl_blocking_barrier;
|
||||
|
||||
/* description of allocated temp buffers - one struct per
|
||||
* buffer. Each buffer has space "owned" by each process
|
||||
* in the group.
|
||||
*/
|
||||
sm_work_buffer_t *sm_buffer_descriptor;
|
||||
|
||||
/* size of memory region, per process, for memory bank management */
|
||||
size_t sm2_size_management_region_per_proc;
|
||||
|
||||
/* size of each memory segment */
|
||||
size_t segment_size;
|
||||
|
||||
/* size, per process, of each memory segment */
|
||||
size_t segement_size_per_process;
|
||||
|
||||
/* size, per process and segment , of control region */
|
||||
size_t ctl_memory_per_proc_per_segment;
|
||||
|
||||
/* size, per process and segment , of data region */
|
||||
size_t data_memory_per_proc_per_segment;
|
||||
|
||||
/* data strucutures used to manage the memory buffers */
|
||||
long long num_nb_barriers_started;
|
||||
long long num_nb_barriers_completed;
|
||||
|
||||
/* number of memory banks */
|
||||
int sm2_module_num_memory_banks;
|
||||
|
||||
/* number of buffers per memory bank */
|
||||
int sm2_module_num_regions_per_bank;
|
||||
|
||||
/* total number of working buffers */
|
||||
int sm2_module_num_buffers;
|
||||
|
||||
/* allocated buffer index - local counter */
|
||||
int sm2_allocated_buffer_index;
|
||||
|
||||
/* freed allocated buffer index - local counter */
|
||||
int sm2_freed_buffer_index;
|
||||
|
||||
/* communicator - there is a one-to-one association between
|
||||
* the communicator and the module
|
||||
*/
|
||||
struct ompi_communicator_t *module_comm;
|
||||
|
||||
/* non-blocking barrier strcutres used for mangeing the shared
|
||||
* buffers */
|
||||
tree_node_t sm_buffer_mgmt_barrier_tree;
|
||||
|
||||
/* request objects for the non-blocking barrier */
|
||||
mca_coll_sm2_nb_request_process_private_mem_t *barrier_request;
|
||||
|
||||
/* barrier request to progress */
|
||||
int current_request_index;
|
||||
|
||||
/* unique tag used for non-blocking collectives */
|
||||
long long nb_barrier_tag;
|
||||
|
||||
/* multinumial reduction tree */
|
||||
tree_node_t *reduction_tree;
|
||||
|
||||
/* multinumial fan-out read tree */
|
||||
tree_node_t *fanout_read_tree;
|
||||
|
||||
/* recursive-doubling tree node */
|
||||
pair_exchange_node_t recursive_doubling_tree;
|
||||
|
||||
/* number of polling loops to run while waiting
|
||||
* for children or parent to complete their work
|
||||
*/
|
||||
int n_poll_loops;
|
||||
|
||||
/* collective tag */
|
||||
long long collective_tag;
|
||||
|
||||
/* scratch space - one int per process */
|
||||
int *scratch_space;
|
||||
|
||||
/* message size cutoff for switching between short and long
|
||||
* protocol
|
||||
*/
|
||||
size_t short_message_size;
|
||||
|
||||
/*
|
||||
* flag indicating if have socket layout for the procs
|
||||
*/
|
||||
int have_socket_information;
|
||||
|
||||
/*
|
||||
* socket index
|
||||
*/
|
||||
int *socket_index;
|
||||
|
||||
/*
|
||||
* number of processes per socket
|
||||
*/
|
||||
int *n_procs_per_socket;
|
||||
|
||||
/*
|
||||
* sockets in use
|
||||
*/
|
||||
int *sockets_in_use;
|
||||
|
||||
/*
|
||||
* index of my socekt within the list of sockets in use
|
||||
*/
|
||||
int my_socket_group;
|
||||
|
||||
/*
|
||||
* number of processes per socket
|
||||
*/
|
||||
int **list_of_ranks_per_socket;
|
||||
|
||||
/*
|
||||
* function table for variants of a given collective
|
||||
* function.
|
||||
*/
|
||||
mca_coll_base_module_barrier_fn_t barrier_functions[N_BARRIER_FNS];
|
||||
mca_coll_base_module_reduce_fn_t list_reduce_functions[N_REDUCE_FNS];
|
||||
mca_coll_base_module_reduce_fn_t reduce_functions[N_REDUCE_FNS_USED];
|
||||
mca_coll_base_module_allreduce_fn_t
|
||||
list_allreduce_functions[N_ALLREDUCE_FNS];
|
||||
mca_coll_base_module_allreduce_fn_t
|
||||
allreduce_functions[N_ALLREDUCE_FNS_USED];
|
||||
|
||||
|
||||
};
|
||||
|
||||
typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_sm2_module_t);
|
||||
|
||||
/*
|
||||
* struct for manageing the allreduce pipeline.
|
||||
*/
|
||||
struct mca_coll_sm2_module_allreduce_pipeline_t {
|
||||
/* pointer to shared temporary working buffer */
|
||||
sm_work_buffer_t *shared_buffer;
|
||||
|
||||
/* cached rank */
|
||||
int my_rank;
|
||||
|
||||
/* cached reduction node */
|
||||
tree_node_t *my_reduction_node;
|
||||
|
||||
/* cached fanout tree */
|
||||
tree_node_t *my_fanout_read_tree;
|
||||
|
||||
|
||||
/* staus of the buffer - determines what next to do
|
||||
* with this data
|
||||
*/
|
||||
int status;
|
||||
|
||||
/*
|
||||
* number of child loops completed - needed for
|
||||
* async progress
|
||||
*/
|
||||
int n_child_loops_completed;
|
||||
|
||||
/*
|
||||
* number of data-type elements to process
|
||||
*/
|
||||
int count_this_stripe;
|
||||
|
||||
/*
|
||||
* offset into the data type buffer, in units of data-types
|
||||
*/
|
||||
int count_processed;
|
||||
|
||||
/*
|
||||
* tag
|
||||
*/
|
||||
long long tag;
|
||||
};
|
||||
typedef struct mca_coll_sm2_module_allreduce_pipeline_t
|
||||
mca_coll_sm2_module_allreduce_pipeline_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_sm2_module_allreduce_pipeline_t);
|
||||
|
||||
enum {
|
||||
BUFFER_AVAILABLE,
|
||||
STARTED,
|
||||
FANIN,
|
||||
FANOUT
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Global component instance
|
||||
*/
|
||||
OMPI_MODULE_DECLSPEC extern mca_coll_sm2_component_t mca_coll_sm2_component;
|
||||
|
||||
|
||||
/*
|
||||
* coll module functions
|
||||
*/
|
||||
|
||||
/* query to see if the component is available for use, and can
|
||||
* satisfy the thread and progress requirements
|
||||
*/
|
||||
int mca_coll_sm2_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
|
||||
/* query to see if the module is available for use on the given
|
||||
* communicator, and if so, what it's priority is.
|
||||
*/
|
||||
mca_coll_base_module_t *
|
||||
mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority);
|
||||
|
||||
/* setup an multi-nomial tree - for each node in the tree
|
||||
* this returns it's parent, and it's children
|
||||
*/
|
||||
int setup_multinomial_tree(int tree_order, int num_nodes,
|
||||
tree_node_t *tree_nodes);
|
||||
|
||||
/* setup recursive doubleing tree node */
|
||||
int setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
|
||||
pair_exchange_node_t *tree_node);
|
||||
|
||||
/* non-blocking barrier - init function */
|
||||
int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
||||
mca_coll_sm2_nb_request_process_private_mem_t *request,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
/* non-blocking barrier - completion function */
|
||||
int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
||||
mca_coll_sm2_nb_request_process_private_mem_t *request,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
/* allocate working buffer */
|
||||
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module);
|
||||
|
||||
/* free working buffer - it is assumed that buffers are released in
|
||||
* the order they are allocated. We can assume this because each
|
||||
* communiator will have only one outstanding collective at a given
|
||||
* time, and we ensure that operations are completed in order. */
|
||||
int free_sm2_shared_buffer(mca_coll_sm2_module_t *module);
|
||||
|
||||
/**
|
||||
* Shared memory blocking allreduce.
|
||||
*/
|
||||
int mca_coll_sm2_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_sm2_allreduce_intra_reducescatter_allgather(
|
||||
void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf,
|
||||
int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
/**
|
||||
* Shared memory blocking reduce
|
||||
*/
|
||||
int mca_coll_sm2_reduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_sm2_reduce_intra_reducescatter_gather(void *sbuf, void *rbuf,
|
||||
int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
/**
|
||||
* Shared memory blocking broadcast.
|
||||
*/
|
||||
int mca_coll_sm2_bcast_intra(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
/**
|
||||
* Shared memory blocking barrier
|
||||
*/
|
||||
int mca_coll_sm2_barrier_intra( struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_sm2_barrier_intra_fanin_fanout(
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_sm2_barrier_intra_recursive_doubling(
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_COLL_SM2_EXPORT_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,711 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file */
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "coll_sm2.h"
|
||||
/* debug
|
||||
extern int debug_print;
|
||||
end debug */
|
||||
|
||||
/**
|
||||
* Shared memory barrier.
|
||||
*
|
||||
* Tree-based algorithm for a barrier: a fan in to rank 0 followed by
|
||||
* a fan out using the barrier segments in the shared memory area.
|
||||
*
|
||||
* There are 2 sets of barrier buffers -- since there can only be, at
|
||||
* most, 2 outstanding barriers at any time, there is no need for more
|
||||
* than this. The generalized in-use flags, control, and data
|
||||
* segments are not used.
|
||||
*
|
||||
* The general algorithm is for a given process to wait for its N
|
||||
* children to fan in by monitoring a uint32_t in its barrier "in"
|
||||
* buffer. When this value reaches N (i.e., each of the children have
|
||||
* atomically incremented the value), then the process atomically
|
||||
* increases the uint32_t in its parent's "in" buffer. Then the
|
||||
* process waits for the parent to set a "1" in the process' "out"
|
||||
* buffer. Once this happens, the process writes a "1" in each of its
|
||||
* children's "out" buffers, and returns.
|
||||
*
|
||||
* There's corner cases, of course, such as the root that has no
|
||||
* parent, and the leaves that have no children. But that's the
|
||||
* general idea.
|
||||
*/
|
||||
|
||||
/* non-blocking barrier - init function */
|
||||
int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
||||
mca_coll_sm2_nb_request_process_private_mem_t *request,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
|
||||
/* since completion must be in-order for the sm collective buffer allocation
|
||||
* to work correctly, no barrier completion will happen here. The most
|
||||
* that will be done is for the leaf processes, to signal their presence.
|
||||
*/
|
||||
/* local variables */
|
||||
int index;
|
||||
long long tag;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *sm_barrier_region;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *sm_address;
|
||||
|
||||
/* get pointer to nb-barrier structure */
|
||||
index=request->sm_index;
|
||||
sm_barrier_region=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
(request->barrier_base_address[index]);
|
||||
|
||||
/* set barrier tag - no atomicity needed as only only one outstanding
|
||||
* collective per communicator exists
|
||||
*/
|
||||
sm_module=(mca_coll_sm2_module_t *)module;
|
||||
sm_module->nb_barrier_tag++;
|
||||
request->tag=sm_module->nb_barrier_tag;
|
||||
tag=sm_module->nb_barrier_tag;
|
||||
|
||||
if( LEAF_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) {
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.my_rank*
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
sm_address->flag=tag;
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||
|
||||
} else if( INTERIOR_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) {
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
|
||||
} else {
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
}
|
||||
|
||||
/* return - successful completion */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/* non-blocking barrier - completion function */
|
||||
int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
||||
mca_coll_sm2_nb_request_process_private_mem_t *request,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
|
||||
/* local variables */
|
||||
int index;
|
||||
int child,cnt,phase;
|
||||
long long tag;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *sm_barrier_region;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *sm_address;
|
||||
|
||||
/* get pointer to nb-barrier structure */
|
||||
index=request->sm_index;
|
||||
sm_barrier_region=request->barrier_base_address[index];
|
||||
|
||||
/* set barrier tag - no atomicity needed as only only one outstanding
|
||||
* collective per communicator exists
|
||||
*/
|
||||
sm_module=(mca_coll_sm2_module_t *)module;
|
||||
tag=request->tag;
|
||||
|
||||
if( LEAF_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) {
|
||||
phase=request->sm2_barrier_phase;
|
||||
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||
goto FANOUT_LEAF;
|
||||
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||
goto DONE;
|
||||
}
|
||||
/* defult - NB_BARRIER_FAN_IN */
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
FANOUT_LEAF:
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.parent_rank*
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
if( sm_address->flag != -tag ) {
|
||||
/* if parent has not checked in - set parameters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
} else if( INTERIOR_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) {
|
||||
phase=request->sm2_barrier_phase;
|
||||
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||
goto FANOUT_INTERIOR;
|
||||
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||
goto DONE;
|
||||
}
|
||||
/* defult - NB_BARRIER_FAN_IN */
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->sm_buffer_mgmt_barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child] *
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->sm_buffer_mgmt_barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.my_rank *
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
sm_address->flag=tag;
|
||||
/* don't need memory barrier here, as we are not setting any other sm
|
||||
* data for someone else to read
|
||||
*/
|
||||
|
||||
FANOUT_INTERIOR:
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.parent_rank*
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
if( sm_address->flag != -tag ) {
|
||||
/* if parent has not checked in - set parameters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.my_rank *
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
} else {
|
||||
/* root node */
|
||||
phase=request->sm2_barrier_phase;
|
||||
if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||
goto DONE;
|
||||
}
|
||||
/* defult - NB_BARRIER_FAN_IN */
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->sm_buffer_mgmt_barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child] *
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->sm_buffer_mgmt_barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->sm_buffer_mgmt_barrier_tree.my_rank *
|
||||
sm_module->sm2_size_management_region_per_proc);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
}
|
||||
|
||||
DONE:
|
||||
/* return - successful completion */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared memory blocking allreduce.
|
||||
*/
|
||||
int mca_coll_sm2_barrier_intra_fanin_fanout(
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc=OMPI_SUCCESS,bar_buff_index;
|
||||
int my_rank, child_rank, child, n_parents, n_children;
|
||||
int my_fanin_parent;
|
||||
int my_fanout_parent;
|
||||
long long tag;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * child_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
tree_node_t *my_reduction_node, *my_fanout_read_tree;
|
||||
sm_work_buffer_t *sm_buffer_desc;
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
/* get my node for the reduction tree */
|
||||
my_rank=ompi_comm_rank(comm);
|
||||
my_reduction_node=&(sm_module->reduction_tree[my_rank]);
|
||||
my_fanout_read_tree=&(sm_module->fanout_read_tree[my_rank]);
|
||||
n_children=my_reduction_node->n_children;
|
||||
n_parents=my_reduction_node->n_parents;
|
||||
my_fanin_parent=my_reduction_node->parent_rank;
|
||||
my_fanout_parent=my_fanout_read_tree->parent_rank;
|
||||
|
||||
/* get unique tag for this stripe - assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag++;
|
||||
|
||||
/*
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
*/
|
||||
sm_module->index_blocking_barrier_memory_bank^=1;
|
||||
bar_buff_index=sm_module->index_blocking_barrier_memory_bank;
|
||||
|
||||
my_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][my_rank];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||
*/
|
||||
|
||||
/***************************
|
||||
* Fan into root phase
|
||||
***************************/
|
||||
|
||||
if( LEAF_NODE != my_reduction_node->my_node_type ) {
|
||||
|
||||
/*
|
||||
* Wait on children, and apply op to their data
|
||||
*/
|
||||
for( child=0 ; child < n_children ; child++ ) {
|
||||
|
||||
child_rank=my_reduction_node->children_ranks[child];
|
||||
child_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][child_rank];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[child_rank].control_region;
|
||||
*/
|
||||
|
||||
/* wait until child flag is set */
|
||||
while( child_ctl_pointer->flag != tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
|
||||
/* end test */
|
||||
} /* end child loop */
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
} else {
|
||||
/* leaf node */
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
}
|
||||
|
||||
/***************************
|
||||
* Fan out from root
|
||||
***************************/
|
||||
/*
|
||||
* Fan out from root - let the memory copies at each
|
||||
* stage help reduce memory contention.
|
||||
*/
|
||||
if( ROOT_NODE == my_fanout_read_tree->my_node_type ) {
|
||||
/* I am the root - so copy signal children, and then
|
||||
* start reading
|
||||
*/
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
my_ctl_pointer->flag=-tag;
|
||||
|
||||
|
||||
} else if( LEAF_NODE == my_fanout_read_tree->my_node_type ) {
|
||||
|
||||
parent_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][my_fanout_parent];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
||||
*/
|
||||
|
||||
/*
|
||||
* wait on Parent to signal that data is ready
|
||||
*/
|
||||
while( parent_ctl_pointer->flag != -tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
} else {
|
||||
/* interior nodes */
|
||||
|
||||
parent_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][my_fanout_parent];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
||||
*/
|
||||
|
||||
/*
|
||||
* wait on Parent to signal that data is ready
|
||||
*/
|
||||
while( parent_ctl_pointer->flag != -tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
|
||||
/* signal children that they may read the result data */
|
||||
my_ctl_pointer->flag=-tag;
|
||||
|
||||
}
|
||||
|
||||
/* "free" the shared-memory working buffer */
|
||||
/*
|
||||
rc=free_sm2_shared_buffer(sm_module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
*/
|
||||
|
||||
/* return */
|
||||
return rc;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared memory blocking barrier.
|
||||
*/
|
||||
int mca_coll_sm2_barrier_intra_recursive_doubling(
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc=OMPI_SUCCESS;
|
||||
int pair_rank,exchange,extra_rank;
|
||||
pair_exchange_node_t *my_exchange_node;
|
||||
int my_rank,bar_buff_index;
|
||||
long long tag, base_tag;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *
|
||||
partner_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *
|
||||
extra_ctl_pointer;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
/* debug
|
||||
opal_timer_t t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
|
||||
end debug */
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
/* get my node for the reduction tree */
|
||||
my_exchange_node=&(sm_module->recursive_doubling_tree);
|
||||
my_rank=ompi_comm_rank(comm);
|
||||
|
||||
/* get pointer to barrier strcuture */
|
||||
sm_module->index_blocking_barrier_memory_bank^=1;
|
||||
bar_buff_index=sm_module->index_blocking_barrier_memory_bank;
|
||||
|
||||
|
||||
/* get unique set of tags for this stripe.
|
||||
* Assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
base_tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag+=my_exchange_node->n_tags;
|
||||
|
||||
/* get pointers to my work buffers */
|
||||
my_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][my_rank];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||
*/
|
||||
|
||||
/* copy data in from the "extra" source, if need be */
|
||||
tag=base_tag;
|
||||
if(0 < my_exchange_node->n_extra_sources) {
|
||||
|
||||
if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
|
||||
|
||||
extra_rank=my_exchange_node->rank_extra_source;
|
||||
extra_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][extra_rank];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||
*/
|
||||
|
||||
/* wait until remote data is read */
|
||||
while( extra_ctl_pointer->flag < tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
tag=base_tag+1;
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/* loop over data exchanges */
|
||||
for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
|
||||
|
||||
/* debug
|
||||
t4=opal_timer_base_get_cycles();
|
||||
end debug */
|
||||
|
||||
/* is the remote data read */
|
||||
pair_rank=my_exchange_node->rank_exchanges[exchange];
|
||||
partner_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][pair_rank];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[pair_rank].control_region;
|
||||
*/
|
||||
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/* wait until remote data is read */
|
||||
while( partner_ctl_pointer->flag < tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* end test */
|
||||
|
||||
/* signal that I am done reading my peer's data */
|
||||
tag++;
|
||||
|
||||
}
|
||||
|
||||
/* copy data in from the "extra" source, if need be */
|
||||
if(0 < my_exchange_node->n_extra_sources) {
|
||||
tag=base_tag+my_exchange_node->n_tags-1;
|
||||
|
||||
if ( EXTRA_NODE == my_exchange_node->node_type ) {
|
||||
|
||||
extra_rank=my_exchange_node->rank_extra_source;
|
||||
extra_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][extra_rank];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||
*/
|
||||
|
||||
/* wait until remote data is read */
|
||||
while(! ( extra_ctl_pointer->flag == tag ) ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
|
||||
/* signal that I am done */
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
} else {
|
||||
|
||||
tag=base_tag+my_exchange_node->n_tags-1;
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
|
||||
/*
|
||||
MB();
|
||||
*/
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/* wait until child is done to move on - this buffer will
|
||||
* be reused for the next stripe, so don't want to move
|
||||
* on too quick.
|
||||
*/
|
||||
extra_rank=my_exchange_node->rank_extra_source;
|
||||
extra_ctl_pointer=
|
||||
sm_module->ctl_blocking_barrier[bar_buff_index][extra_rank];
|
||||
/*
|
||||
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||
*/
|
||||
|
||||
/* wait until remote data is read */
|
||||
while( extra_ctl_pointer->flag < tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* debug
|
||||
|
||||
t9=opal_timer_base_get_cycles();
|
||||
timers[5]+=(t9-t8);
|
||||
end debug */
|
||||
|
||||
|
||||
/* "free" the shared-memory working buffer */
|
||||
rc=free_sm2_shared_buffer(sm_module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
/* return */
|
||||
return rc;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
||||
/**
|
||||
* Shared memory blocking barrier
|
||||
*/
|
||||
int mca_coll_sm2_barrier_intra( struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
rc= sm_module->barrier_functions[0](comm, module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
@ -1,234 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2008 UT-Battelle, LLC
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file */
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "coll_sm2.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
/* debug
|
||||
#include "opal/sys/timer.h"
|
||||
|
||||
extern uint64_t timers[7];
|
||||
end debug */
|
||||
|
||||
/**
|
||||
* Shared memory blocking allreduce.
|
||||
*/
|
||||
|
||||
static
|
||||
int mca_coll_sm2_fanout(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
|
||||
int comm_size,process_shift,my_node_index;
|
||||
int my_rank;
|
||||
int count_processed,count_this_stripe;
|
||||
int my_fanout_parent;
|
||||
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
|
||||
long long tag;
|
||||
volatile char * my_data_pointer;
|
||||
volatile char * parent_data_pointer;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
tree_node_t *my_fanout_read_tree;
|
||||
sm_work_buffer_t *sm_buffer_desc;
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
|
||||
/* compute process shift */
|
||||
my_rank=ompi_comm_rank(comm);
|
||||
comm_size=ompi_comm_size(comm);
|
||||
process_shift=root;
|
||||
my_node_index=my_rank-root;
|
||||
/* wrap around */
|
||||
if(0 > my_node_index ) {
|
||||
my_node_index+=comm_size;
|
||||
}
|
||||
|
||||
|
||||
/* get size of data needed - same layout as user data, so that
|
||||
* we can apply the reudction routines directly on these buffers
|
||||
*/
|
||||
rc=ompi_datatype_type_extent(dtype, &dt_extent);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
message_extent=dt_extent*count;
|
||||
|
||||
/* lenght of control and data regions */
|
||||
ctl_size=sm_module->ctl_memory_per_proc_per_segment;
|
||||
len_data_buffer=sm_module->data_memory_per_proc_per_segment;
|
||||
|
||||
/* number of data types copies that the scratch buffer can hold */
|
||||
n_dts_per_buffer=((int) len_data_buffer)/dt_extent;
|
||||
if ( 0 == n_dts_per_buffer ) {
|
||||
rc=OMPI_ERROR;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
/* compute number of stripes needed to process this collective */
|
||||
n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
|
||||
|
||||
/* get my node for the reduction tree */
|
||||
my_fanout_read_tree=&(sm_module->fanout_read_tree[my_node_index]);
|
||||
my_fanout_parent=my_fanout_read_tree->parent_rank+process_shift;
|
||||
if( comm_size <= my_fanout_parent ){
|
||||
my_fanout_parent-=comm_size;
|
||||
}
|
||||
|
||||
count_processed=0;
|
||||
|
||||
/* get a pointer to the shared-memory working buffer */
|
||||
/* NOTE: starting with a rather synchronous approach */
|
||||
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
|
||||
|
||||
/* get unique tag for this stripe - assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag++;
|
||||
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
|
||||
/* get number of elements to process in this stripe */
|
||||
count_this_stripe=n_dts_per_buffer;
|
||||
if( count_processed + count_this_stripe > count )
|
||||
count_this_stripe=count-count_processed;
|
||||
|
||||
/* offset to data segment */
|
||||
my_ctl_pointer=sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||
my_data_pointer=sm_buffer_desc->proc_memory[my_rank].data_segment;
|
||||
|
||||
/*
|
||||
* Fan out from root - let the memory copies at each
|
||||
* stage help reduce memory contention.
|
||||
*/
|
||||
if( ROOT_NODE == my_fanout_read_tree->my_node_type ) {
|
||||
|
||||
/* copy data to user supplied buffer */
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)my_data_pointer,
|
||||
(char *)((char *)buf+dt_extent*count_processed));
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* I am the root - so copy signal children, and then
|
||||
* start reading
|
||||
*/
|
||||
MB();
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
} else if( LEAF_NODE == my_fanout_read_tree->my_node_type ) {
|
||||
|
||||
parent_data_pointer=
|
||||
sm_buffer_desc->proc_memory[my_fanout_parent].data_segment;
|
||||
parent_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
||||
|
||||
/*
|
||||
* wait on Parent to signal that data is ready
|
||||
*/
|
||||
while( parent_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* copy data to user supplied buffer */
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)buf+dt_extent*count_processed,
|
||||
(char *)parent_data_pointer);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
} else {
|
||||
/* interior nodes */
|
||||
|
||||
parent_data_pointer=
|
||||
sm_buffer_desc->proc_memory[my_fanout_parent].data_segment;
|
||||
parent_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
||||
|
||||
/*
|
||||
* wait on Parent to signal that data is ready
|
||||
*/
|
||||
while( parent_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* copy the data to my shared buffer, for access by children */
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)my_data_pointer,(char *)parent_data_pointer);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
MB();
|
||||
|
||||
/* signal children that they may read the result data */
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/* copy data to user supplied buffer */
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)buf+dt_extent*count_processed,
|
||||
(char *)my_data_pointer);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* "free" the shared-memory working buffer */
|
||||
rc=free_sm2_shared_buffer(sm_module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
/* update the count of elements processed */
|
||||
count_processed+=count_this_stripe;
|
||||
}
|
||||
|
||||
/* return */
|
||||
return rc;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared memory blocking broadcast.
|
||||
*/
|
||||
int mca_coll_sm2_bcast_intra(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc;
|
||||
|
||||
rc= mca_coll_sm2_fanout(buf, count, dtype, root, comm, module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
@ -1,208 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Most of the description of the data layout is in the
|
||||
* coll_sm_module.c file.
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "coll_sm2.h"
|
||||
#include "ompi/mca/coll/base/base.h"
|
||||
|
||||
|
||||
/*
|
||||
* Public string showing the coll ompi_sm V2 component version number
|
||||
*/
|
||||
const char *mca_coll_sm2_component_version_string =
|
||||
"Open MPI sm-V2 collective MCA component version " OMPI_VERSION;
|
||||
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int sm2_open(void);
|
||||
static int sm2_close(void);
|
||||
|
||||
static inline int mca_coll_sm2_param_register_int(
|
||||
const char* param_name, int default_value)
|
||||
{
|
||||
int id = mca_base_param_register_int("coll","sm2",param_name,NULL,default_value);
|
||||
int param_value = default_value;
|
||||
mca_base_param_lookup_int(id,¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
|
||||
mca_coll_sm2_component_t mca_coll_sm2_component = {
|
||||
|
||||
/* First, fill in the super */
|
||||
|
||||
{
|
||||
/* First, the mca_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
MCA_COLL_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
|
||||
"sm-v2",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
|
||||
sm2_open,
|
||||
sm2_close,
|
||||
},
|
||||
{
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
mca_coll_sm2_init_query,
|
||||
mca_coll_sm2_comm_query,
|
||||
},
|
||||
|
||||
/* sm-component specifc information */
|
||||
|
||||
/* (default) priority */
|
||||
0,
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* Open the component
|
||||
*/
|
||||
static int sm2_open(void)
|
||||
{
|
||||
|
||||
/* local variables */
|
||||
mca_coll_sm2_component_t *cs = &mca_coll_sm2_component;
|
||||
|
||||
/* set component priority */
|
||||
cs->sm2_priority=
|
||||
mca_coll_sm2_param_register_int("sm2_priority",90);
|
||||
|
||||
/* set control region size (bytes), per proc */
|
||||
cs->sm2_ctl_size_per_proc=
|
||||
mca_coll_sm2_param_register_int("sm2_ctl_size_per_proc",2*sizeof(long long));
|
||||
|
||||
/* initialize control region allocted */
|
||||
cs->sm2_ctl_size_allocated=0;
|
||||
|
||||
/* set control region alignment (bytes) */
|
||||
cs->sm2_ctl_alignment=
|
||||
mca_coll_sm2_param_register_int("sm2_ctl_alignment",getpagesize());
|
||||
|
||||
/* Min data Segment size (bytes) - per proc */
|
||||
cs->sm2_data_seg_size=
|
||||
mca_coll_sm2_param_register_int("sm2_data_seg_size",32768);
|
||||
|
||||
/* Max data Segment size (bytes) - per proc */
|
||||
cs->sm2_max_data_seg_size=
|
||||
mca_coll_sm2_param_register_int("sm2_max_data_seg_size",20*getpagesize());
|
||||
|
||||
/* initialize control region allocted */
|
||||
cs->sm2_data_size_allocated=0;
|
||||
|
||||
/* Data region alignment (bytes) - per proc */
|
||||
cs->sm2_data_alignment=
|
||||
mca_coll_sm2_param_register_int("sm2_data_alignment",opal_cache_line_size);
|
||||
|
||||
/* Number of memory banks */
|
||||
cs->sm2_num_mem_banks=
|
||||
mca_coll_sm2_param_register_int("sm2_num_mem_banks",2);
|
||||
|
||||
/* Number of regions per memory bank */
|
||||
cs->sm2_num_regions_per_bank=
|
||||
mca_coll_sm2_param_register_int("sm2_num_regions_per_bank",8);
|
||||
|
||||
/* Order of buffer management Barrier Tree */
|
||||
cs->order_barrier_tree=
|
||||
mca_coll_sm2_param_register_int("order_barrier_tree",2);
|
||||
|
||||
/* Order of reduction Tree */
|
||||
cs->order_reduction_tree=
|
||||
mca_coll_sm2_param_register_int("order_reduction_tree",2);
|
||||
|
||||
/* Order of fan-out read Tree */
|
||||
cs->order_fanout_read_tree=
|
||||
mca_coll_sm2_param_register_int("order_fanout_read_tree",4);
|
||||
|
||||
/* number of polling loops to allow pending resources to
|
||||
* complete their work
|
||||
*/
|
||||
cs->n_poll_loops=
|
||||
mca_coll_sm2_param_register_int("n_poll_loops",4);
|
||||
|
||||
/* Size of message for switching between short and long protocol.
|
||||
* This should probably be the segment size for several algorithms,
|
||||
* though not all.
|
||||
*/
|
||||
cs->short_message_size=
|
||||
mca_coll_sm2_param_register_int("short_message_size",32768);
|
||||
|
||||
/* collective ops to use */
|
||||
cs->force_barrier=
|
||||
mca_coll_sm2_param_register_int("force_barrier",(-1));
|
||||
cs->force_reduce=
|
||||
mca_coll_sm2_param_register_int("force_reduce",(-1));
|
||||
cs->force_allreduce=
|
||||
mca_coll_sm2_param_register_int("force_allreduce",(-1));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Close the component
|
||||
*/
|
||||
static int sm2_close(void)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* query to see if the component is available for use, and can
|
||||
* satisfy the thread and progress requirements
|
||||
*/
|
||||
int mca_coll_sm2_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
/* at this stage there is no reason to disaulify this component */
|
||||
|
||||
/* done */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,852 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2008 UT-Battelle, LLC
|
||||
* Copyright (c) 2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file */
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "coll_sm2.h"
|
||||
#include "ompi/op/op.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
/* debug
|
||||
#include "opal/sys/timer.h"
|
||||
|
||||
extern uint64_t timers[7];
|
||||
end debug */
|
||||
|
||||
/**
|
||||
* Shared memory blocking allreduce.
|
||||
*/
|
||||
int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
|
||||
int my_rank, comm_size, child_rank, child, n_children;
|
||||
int count_processed,count_this_stripe;
|
||||
int process_shift,my_node_index;
|
||||
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
|
||||
long long tag;
|
||||
volatile char * my_data_pointer;
|
||||
volatile char * child_data_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * child_ctl_pointer;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
tree_node_t *my_reduction_node;
|
||||
sm_work_buffer_t *sm_buffer_desc;
|
||||
|
||||
/* debug
|
||||
last_root=root;
|
||||
end debug */
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
/* compute process shift */
|
||||
my_rank=ompi_comm_rank(comm);
|
||||
comm_size=ompi_comm_size(comm);
|
||||
process_shift=root;
|
||||
my_node_index=my_rank-root;
|
||||
/* wrap around */
|
||||
if(0 > my_node_index ) {
|
||||
my_node_index+=comm_size;
|
||||
}
|
||||
|
||||
/* get size of data needed - same layout as user data, so that
|
||||
* we can apply the reudction routines directly on these buffers
|
||||
*/
|
||||
rc=ompi_datatype_type_extent(dtype, &dt_extent);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
message_extent=dt_extent*count;
|
||||
|
||||
/* lenght of control and data regions */
|
||||
ctl_size=sm_module->ctl_memory_per_proc_per_segment;
|
||||
len_data_buffer=sm_module->data_memory_per_proc_per_segment;
|
||||
|
||||
/* number of data types copies that the scratch buffer can hold */
|
||||
n_dts_per_buffer=((int) len_data_buffer)/dt_extent;
|
||||
if ( 0 == n_dts_per_buffer ) {
|
||||
rc=OMPI_ERROR;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
/* compute number of stripes needed to process this collective */
|
||||
n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
|
||||
|
||||
/* get my node for the reduction tree */
|
||||
my_reduction_node=&(sm_module->reduction_tree[my_node_index]);
|
||||
n_children=my_reduction_node->n_children;
|
||||
/* debug
|
||||
node_type=my_reduction_node->my_node_type;
|
||||
end debug */
|
||||
|
||||
if( 1 == n_data_segments ) {
|
||||
/* single data segment */
|
||||
|
||||
/* get unique tag for this stripe - assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag++;
|
||||
/* debug
|
||||
assert(tag);
|
||||
end debug */
|
||||
|
||||
/* get a pointer to the shared-memory working buffer */
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
/* debug
|
||||
free_buff_free_index=tag;
|
||||
end debug */
|
||||
|
||||
/* get number of elements to process in this stripe */
|
||||
count_this_stripe=count;
|
||||
|
||||
/* offset to data segment */
|
||||
my_ctl_pointer=sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||
my_data_pointer=sm_buffer_desc->proc_memory[my_rank].data_segment;
|
||||
|
||||
/***************************
|
||||
* Fan into root phase
|
||||
***************************/
|
||||
|
||||
if( ROOT_NODE == my_reduction_node->my_node_type ) {
|
||||
/*
|
||||
* copy local data from source buffer to result buffer
|
||||
*/
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)rbuf,
|
||||
(char *)sbuf);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait on children, and apply op to their data
|
||||
*/
|
||||
for( child=0 ; child < n_children ; child++ ) {
|
||||
child_rank=my_reduction_node->children_ranks[child];
|
||||
child_rank+=process_shift;
|
||||
/* wrap around */
|
||||
if( comm_size <= child_rank ){
|
||||
child_rank-=comm_size;
|
||||
}
|
||||
|
||||
child_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[child_rank].control_region;
|
||||
child_data_pointer=
|
||||
sm_buffer_desc->proc_memory[child_rank].data_segment;
|
||||
|
||||
/* debug
|
||||
if( 0 == child_ctl_pointer->flag ) {
|
||||
fprintf(stderr,"TTT 2 count %d root %d child_rank %d \n",
|
||||
count,root,child_rank);
|
||||
debug_module();
|
||||
}
|
||||
end debug */
|
||||
/* wait until child flag is set */
|
||||
while(child_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* apply collective operation */
|
||||
ompi_op_reduce(op,(void *)child_data_pointer,
|
||||
(void *)rbuf, count_this_stripe,dtype);
|
||||
|
||||
} /* end child loop */
|
||||
|
||||
} else if( INTERIOR_NODE == my_reduction_node->my_node_type ) {
|
||||
|
||||
/* copy segment into shared buffer - ompi_op_reduce
|
||||
* provids only 2 buffers, so can't add from two
|
||||
* into a third buffer.
|
||||
*/
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)my_data_pointer,
|
||||
(char *)sbuf);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait on children, and apply op to their data
|
||||
*/
|
||||
for( child=0 ; child < n_children ; child++ ) {
|
||||
child_rank=my_reduction_node->children_ranks[child];
|
||||
child_rank+=process_shift;
|
||||
/* wrap around */
|
||||
if( comm_size <= child_rank ){
|
||||
child_rank-=comm_size;
|
||||
}
|
||||
|
||||
child_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[child_rank].control_region;
|
||||
child_data_pointer=
|
||||
sm_buffer_desc->proc_memory[child_rank].data_segment;
|
||||
|
||||
/* wait until child flag is set */
|
||||
/* debug
|
||||
if( 0 == child_ctl_pointer->flag ) {
|
||||
fprintf(stderr,"TTT 3 count %d root %d child_rank \n",
|
||||
count,root,child_rank);
|
||||
debug_module();
|
||||
}
|
||||
end debug */
|
||||
while(child_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* apply collective operation */
|
||||
ompi_op_reduce(op,(void *)child_data_pointer,
|
||||
(void *)my_data_pointer, count_this_stripe,dtype);
|
||||
|
||||
} /* end child loop */
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
MB();
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
|
||||
} else {
|
||||
/* leaf node */
|
||||
/* copy segment into shared buffer - later on will optimize to
|
||||
* eliminate extra copies.
|
||||
*/
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)my_data_pointer,
|
||||
(char *)sbuf);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
MB();
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
}
|
||||
|
||||
/* "free" the shared-memory working buffer */
|
||||
rc=free_sm2_shared_buffer(sm_module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
} else {
|
||||
count_processed=0;
|
||||
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
|
||||
|
||||
/* get unique tag for this stripe - assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag++;
|
||||
|
||||
/* get a pointer to the shared-memory working buffer */
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
|
||||
/* get number of elements to process in this stripe */
|
||||
count_this_stripe=n_dts_per_buffer;
|
||||
if( count_processed + count_this_stripe > count )
|
||||
count_this_stripe=count-count_processed;
|
||||
|
||||
/* offset to data segment */
|
||||
my_ctl_pointer=sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||
my_data_pointer=sm_buffer_desc->proc_memory[my_rank].data_segment;
|
||||
|
||||
/***************************
|
||||
* Fan into root phase
|
||||
***************************/
|
||||
|
||||
if( LEAF_NODE != my_reduction_node->my_node_type ) {
|
||||
/* copy segment into shared buffer - ompi_op_reduce
|
||||
* provids only 2 buffers, so can't add from two
|
||||
* into a third buffer.
|
||||
*/
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)my_data_pointer,
|
||||
(char *)((char *)sbuf+dt_extent*count_processed));
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait on children, and apply op to their data
|
||||
*/
|
||||
for( child=0 ; child < n_children ; child++ ) {
|
||||
child_rank=my_reduction_node->children_ranks[child];
|
||||
child_rank+=process_shift;
|
||||
/* wrap around */
|
||||
if( comm_size <= child_rank ){
|
||||
child_rank-=comm_size;
|
||||
}
|
||||
|
||||
child_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[child_rank].control_region;
|
||||
child_data_pointer=
|
||||
sm_buffer_desc->proc_memory[child_rank].data_segment;
|
||||
|
||||
/* wait until child flag is set */
|
||||
/* debug
|
||||
if( 0 == child_ctl_pointer->flag ) {
|
||||
fprintf(stderr,"TTT 1 count %d root %d child_rank %d \n",
|
||||
count,root,child_rank);
|
||||
debug_module();
|
||||
}
|
||||
end debug */
|
||||
while(child_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* apply collective operation */
|
||||
ompi_op_reduce(op,(void *)child_data_pointer,
|
||||
(void *)my_data_pointer, count_this_stripe,dtype);
|
||||
|
||||
} /* end child loop */
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
MB();
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/* copy data to destination */
|
||||
if( ROOT_NODE == my_reduction_node->my_node_type ) {
|
||||
/* copy data to user supplied buffer */
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)rbuf+dt_extent*count_processed,
|
||||
(char *)my_data_pointer);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
/* leaf node */
|
||||
/* copy segment into shared buffer - later on will optimize to
|
||||
* eliminate extra copies.
|
||||
*/
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)my_data_pointer,
|
||||
(char *)((char *)sbuf+dt_extent*count_processed));
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
MB();
|
||||
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
}
|
||||
|
||||
/* "free" the shared-memory working buffer */
|
||||
rc=free_sm2_shared_buffer(sm_module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
/* update the count of elements processed */
|
||||
count_processed+=count_this_stripe;
|
||||
}
|
||||
}
|
||||
|
||||
/* return */
|
||||
return rc;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
||||
/**
|
||||
* Shared memory blocking reduce.
|
||||
*/
|
||||
int mca_coll_sm2_reduce_intra_reducescatter_gather(void *sbuf, void *rbuf,
|
||||
int count, struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local varibles */
|
||||
int i,rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
|
||||
int pair_rank,exchange,extra_rank,n_proc_data,tmp;
|
||||
int starting_proc;
|
||||
int n_elements_per_proc, n_residual_elements;
|
||||
int cnt_offset,n_copy;
|
||||
pair_exchange_node_t *my_exchange_node;
|
||||
int my_rank,comm_size,count_processed,count_this_stripe;
|
||||
int count_this_exchange;
|
||||
int done_copy_tag,ok_to_copy_tag;
|
||||
size_t len_data_buffer;
|
||||
ptrdiff_t dt_extent;
|
||||
long long tag, base_tag;
|
||||
sm_work_buffer_t *sm_buffer_desc;
|
||||
volatile char * extra_rank_write_data_pointer;
|
||||
volatile char * my_extra_write_pointer;
|
||||
volatile char * partner_base_pointer;
|
||||
volatile char * my_pointer;
|
||||
volatile char * my_base_pointer;
|
||||
volatile char * partner_pointer;
|
||||
volatile char * source_pointer;
|
||||
mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *
|
||||
partner_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *
|
||||
extra_ctl_pointer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *
|
||||
source_ctl_pointer;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
/* get size of data needed - same layout as user data, so that
|
||||
* we can apply the reudction routines directly on these buffers
|
||||
*/
|
||||
rc=ompi_datatype_type_extent(dtype, &dt_extent);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
/* lenght of control and data regions */
|
||||
len_data_buffer=sm_module->data_memory_per_proc_per_segment;
|
||||
|
||||
/* number of data types copies that the scratch buffer can hold */
|
||||
n_dts_per_buffer=((int) len_data_buffer)/dt_extent;
|
||||
if ( 0 == n_dts_per_buffer ) {
|
||||
rc=OMPI_ERROR;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
len_data_buffer=n_dts_per_buffer*dt_extent;
|
||||
|
||||
/* compute number of stripes needed to process this collective */
|
||||
n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
|
||||
|
||||
/* get my node for the reduction tree */
|
||||
my_exchange_node=&(sm_module->recursive_doubling_tree);
|
||||
my_rank=ompi_comm_rank(comm);
|
||||
comm_size=ompi_comm_size(comm);
|
||||
|
||||
/* get access to shared memory working buffer */
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
my_ctl_pointer=sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||
my_base_pointer=sm_buffer_desc->proc_memory[my_rank].data_segment;
|
||||
|
||||
count_processed=0;
|
||||
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
|
||||
/* get number of elements to process in this stripe */
|
||||
/* debug
|
||||
t2=opal_timer_base_get_cycles();
|
||||
end debug */
|
||||
count_this_stripe=n_dts_per_buffer;
|
||||
if( count_processed + count_this_stripe > count )
|
||||
count_this_stripe=count-count_processed;
|
||||
|
||||
/* compute the number of elements "owned" by each process */
|
||||
n_elements_per_proc=(count_this_stripe/my_exchange_node->n_largest_pow_2);
|
||||
n_residual_elements=count_this_stripe-
|
||||
n_elements_per_proc*my_exchange_node->n_largest_pow_2;
|
||||
for(i=0 ; i < my_exchange_node->n_largest_pow_2 ; i++ ) {
|
||||
sm_module->scratch_space[i]=n_elements_per_proc;
|
||||
if( i < n_residual_elements) {
|
||||
sm_module->scratch_space[i]++;
|
||||
}
|
||||
}
|
||||
|
||||
/* get unique set of tags for this stripe.
|
||||
* Assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
base_tag=sm_module->collective_tag;
|
||||
/* log_2 tags for recursive doubling, one for the non-power of 2
|
||||
* initial send, 1 for first copy into shared memory, and
|
||||
* one for completing the copyout.
|
||||
*/
|
||||
sm_module->collective_tag+=(my_exchange_node->log_2+3);
|
||||
|
||||
|
||||
/* copy data into the write buffer */
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
|
||||
(char *)my_base_pointer,
|
||||
(char *)((char *)sbuf+dt_extent*count_processed));
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
/* debug
|
||||
{ int *int_tmp=(int *)my_base_pointer;
|
||||
int i;
|
||||
fprintf(stderr," my rank %d data in tmp :: ",my_rank);
|
||||
for (i=0 ; i < count_this_stripe ; i++ ) {
|
||||
fprintf(stderr," %d ",int_tmp[i]);
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
end debug */
|
||||
|
||||
/* debug
|
||||
t3=opal_timer_base_get_cycles();
|
||||
timers[1]+=(t3-t2);
|
||||
end debug */
|
||||
|
||||
/* copy data in from the "extra" source, if need be */
|
||||
tag=base_tag;
|
||||
if(0 < my_exchange_node->n_extra_sources) {
|
||||
int n_my_count;
|
||||
|
||||
if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
|
||||
|
||||
/* signal to partner that I am ready */
|
||||
MB();
|
||||
/*
|
||||
* Signal extra node that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/* figure out my portion of the reduction */
|
||||
n_my_count=count_this_stripe/2;
|
||||
|
||||
extra_rank=my_exchange_node->rank_extra_source;
|
||||
extra_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||
extra_rank_write_data_pointer=
|
||||
sm_buffer_desc->proc_memory[extra_rank].data_segment;
|
||||
|
||||
/* wait until remote data is read */
|
||||
while( extra_ctl_pointer->flag < tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* apply collective operation to first half of the data */
|
||||
if( 0 < n_my_count ) {
|
||||
ompi_op_reduce(op,(void *)extra_rank_write_data_pointer,
|
||||
(void *)my_base_pointer, n_my_count,dtype);
|
||||
}
|
||||
|
||||
|
||||
/* wait for my partner to finish reducing the data */
|
||||
tag=base_tag+1;
|
||||
while( extra_ctl_pointer->flag < tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
|
||||
/* read my partner's data */
|
||||
|
||||
/* adjust read an write pointers */
|
||||
extra_rank_write_data_pointer+=(n_my_count*dt_extent);
|
||||
|
||||
if( 0 < (count_this_stripe-n_my_count) ) {
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype,
|
||||
count_this_stripe-n_my_count,
|
||||
(char *)(my_base_pointer+n_my_count*dt_extent),
|
||||
(char *)extra_rank_write_data_pointer);
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* now we are ready for the power of 2 portion of the
|
||||
* algorithm
|
||||
*/
|
||||
|
||||
} else {
|
||||
|
||||
/* set memory barriet to make sure data is in main memory before
|
||||
* the completion flgas are set.
|
||||
*/
|
||||
|
||||
MB();
|
||||
|
||||
/*
|
||||
* Signal extra node that data is ready
|
||||
*/
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/* figure out my portion of the reduction */
|
||||
n_my_count=count_this_stripe-(count_this_stripe/2);
|
||||
|
||||
/* get the pointer to the partners data that needs to be reduced */
|
||||
extra_rank=my_exchange_node->rank_extra_source;
|
||||
extra_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||
extra_rank_write_data_pointer=
|
||||
sm_buffer_desc->proc_memory[extra_rank].data_segment;
|
||||
/* offset into my half of the data */
|
||||
extra_rank_write_data_pointer+=
|
||||
((count_this_stripe/2)*dt_extent);
|
||||
my_extra_write_pointer=my_base_pointer+
|
||||
((count_this_stripe/2)*dt_extent);
|
||||
|
||||
/* wait until remote data is read */
|
||||
while( extra_ctl_pointer->flag < tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* apply collective operation to second half of the data */
|
||||
if( 0 < n_my_count ) {
|
||||
ompi_op_reduce(op,(void *)extra_rank_write_data_pointer,
|
||||
(void *)my_extra_write_pointer, n_my_count,dtype);
|
||||
}
|
||||
|
||||
/* signal that I am done, so my partner can read my data */
|
||||
MB();
|
||||
tag=base_tag+1;
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
}
|
||||
}
|
||||
MB();
|
||||
|
||||
/*
|
||||
* reduce-scatter
|
||||
*/
|
||||
/*
|
||||
* Signal parent that data is ready
|
||||
*/
|
||||
tag=base_tag+1;
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
/*
|
||||
* loop over data exchanges
|
||||
*/
|
||||
/* set the number of procs whos's data I will manipulate - this starts
|
||||
* at the number of procs in the exchange, so a divide by two at each
|
||||
* iteration will give the right number of proc for the given iteration
|
||||
*/
|
||||
/* debug
|
||||
{ int *int_tmp=(int *)my_base_pointer;
|
||||
int i;
|
||||
fprintf(stderr," GGG my rank %d data in tmp :: ",my_rank);
|
||||
for (i=0 ; i < count_this_stripe ; i++ ) {
|
||||
fprintf(stderr," %d ",int_tmp[i]);
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
end debug */
|
||||
n_proc_data=my_exchange_node->n_largest_pow_2;
|
||||
starting_proc=0;
|
||||
for(exchange=my_exchange_node->n_exchanges-1;exchange>=0;exchange--) {
|
||||
|
||||
/* is the remote data read */
|
||||
pair_rank=my_exchange_node->rank_exchanges[exchange];
|
||||
|
||||
partner_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[pair_rank].control_region;
|
||||
partner_base_pointer=
|
||||
sm_buffer_desc->proc_memory[pair_rank].data_segment;
|
||||
|
||||
/* wait until remote data is read */
|
||||
while( partner_ctl_pointer->flag < tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* figure out the base address to use : the lower rank gets
|
||||
* the upper data, with the higher rank getting the lower half
|
||||
* of the current chunk */
|
||||
n_proc_data=n_proc_data/2;
|
||||
if(pair_rank < my_rank ) {
|
||||
starting_proc+=n_proc_data;
|
||||
}
|
||||
|
||||
/* figure out my staring pointer */
|
||||
tmp=0;
|
||||
for(i=0 ; i < starting_proc ; i++ ) {
|
||||
tmp+=sm_module->scratch_space[i];
|
||||
}
|
||||
my_pointer=my_base_pointer+tmp*dt_extent;
|
||||
/* figure out partner's staring pointer */
|
||||
partner_pointer=partner_base_pointer+tmp*dt_extent;
|
||||
|
||||
/* figure out how much to read */
|
||||
tmp=0;
|
||||
for(i=starting_proc ; i < starting_proc+n_proc_data ; i++ ) {
|
||||
tmp+=sm_module->scratch_space[i];
|
||||
}
|
||||
count_this_exchange=tmp;
|
||||
|
||||
/* reduce data into my write buffer */
|
||||
/* apply collective operation */
|
||||
ompi_op_reduce(op,(void *)partner_pointer,
|
||||
(void *)my_pointer, count_this_exchange,dtype);
|
||||
/* debug
|
||||
{ int *int_tmp=(int *)my_pointer;
|
||||
int i;
|
||||
fprintf(stderr," result my rank %d data in tmp :: ",my_rank);
|
||||
for (i=0 ; i < count_this_exchange ; i++ ) {
|
||||
fprintf(stderr," %d ",int_tmp[i]);
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
int_tmp=(int *)partner_pointer;
|
||||
fprintf(stderr," partner data my rank %d data in tmp :: ",my_rank);
|
||||
for (i=0 ; i < count_this_exchange ; i++ ) {
|
||||
fprintf(stderr," %d ",int_tmp[i]);
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
end debug */
|
||||
|
||||
/* signal that I am done reading my peer's data */
|
||||
tag++;
|
||||
MB();
|
||||
my_ctl_pointer->flag=tag;
|
||||
|
||||
|
||||
} /* end exchange loop */
|
||||
|
||||
/* debug
|
||||
t8=opal_timer_base_get_cycles();
|
||||
end debug */
|
||||
/* copy data out to final destination. Could do some sort of
|
||||
* recursive doubleing in the sm, then copy to process private,
|
||||
* which reduces memory contention. However, this also almost
|
||||
* doubles the number of copies.
|
||||
*/
|
||||
ok_to_copy_tag=base_tag+1+my_exchange_node->log_2;
|
||||
|
||||
/* only root reads the results */
|
||||
if( root == my_rank) {
|
||||
/* read from the result buffers directly to the final destinaion */
|
||||
cnt_offset=0;
|
||||
for(n_copy=0 ; n_copy < my_exchange_node->n_largest_pow_2 ; n_copy++ ) {
|
||||
|
||||
if( 0 >= sm_module->scratch_space[n_copy] )
|
||||
continue;
|
||||
|
||||
source_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[n_copy].control_region;
|
||||
source_pointer=
|
||||
sm_buffer_desc->proc_memory[n_copy].data_segment;
|
||||
|
||||
/* wait until remote data is read */
|
||||
while( source_ctl_pointer->flag < ok_to_copy_tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
/* copy data into the destination buffer */
|
||||
rc=ompi_datatype_copy_content_same_ddt(dtype,
|
||||
sm_module->scratch_space[n_copy],
|
||||
(char *)((char *)rbuf+
|
||||
dt_extent*(count_processed+cnt_offset)),
|
||||
(char *)((char *)source_pointer+
|
||||
dt_extent*cnt_offset));
|
||||
if( 0 != rc ) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
cnt_offset+=sm_module->scratch_space[n_copy];
|
||||
}
|
||||
}
|
||||
done_copy_tag=base_tag+2+my_exchange_node->log_2;
|
||||
my_ctl_pointer->flag=done_copy_tag;
|
||||
|
||||
/* wait for all to read the data, before re-using this buffer */
|
||||
if( stripe_number < (n_data_segments-1) ) {
|
||||
for(n_copy=0 ; n_copy < comm_size ; n_copy++ ) {
|
||||
source_ctl_pointer=
|
||||
sm_buffer_desc->proc_memory[n_copy].control_region;
|
||||
while( source_ctl_pointer-> flag < done_copy_tag ) {
|
||||
opal_progress();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* update the count of elements processed */
|
||||
count_processed+=count_this_stripe;
|
||||
}
|
||||
/* return */
|
||||
return rc;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared memory blocking reduce.
|
||||
*/
|
||||
int mca_coll_sm2_reduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
ptrdiff_t dt_extent;
|
||||
size_t len_data_buffer;
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
/* get size of data needed - same layout as user data, so that
|
||||
* we can apply the reudction routines directly on these buffers
|
||||
*/
|
||||
rc=ompi_datatype_type_extent(dtype, &dt_extent);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
len_data_buffer=count*dt_extent;
|
||||
|
||||
if( len_data_buffer <= sm_module->short_message_size) {
|
||||
rc=sm_module->reduce_functions[SHORT_DATA_FN_REDUCE]
|
||||
(sbuf, rbuf, count, dtype, op, root, comm, module);
|
||||
}
|
||||
else {
|
||||
rc=sm_module->reduce_functions[LONG_DATA_FN_REDUCE]
|
||||
(sbuf, rbuf, count, dtype, op, root, comm, module);
|
||||
}
|
||||
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
#if 0
|
||||
rc= mca_coll_sm2_reduce_intra_fanin(sbuf, rbuf, count,
|
||||
dtype, op, root, comm, module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
rc= mca_coll_sm2_reduce_intra_reducescatter_gather(sbuf, rbuf, count,
|
||||
dtype, op, root, comm, module);
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
goto Error;
|
||||
}
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
Error:
|
||||
return rc;
|
||||
}
|
@ -1,314 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Most of the description of the data layout is in the
|
||||
* coll_sm_module.c file.
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "coll_sm2.h"
|
||||
#include "ompi/mca/coll/base/base.h"
|
||||
|
||||
|
||||
/* setup an multi-nomial tree - for each node in the tree
|
||||
* this returns it's parent, and it's children */
|
||||
|
||||
int setup_multinomial_tree(int tree_order, int num_nodes,
|
||||
tree_node_t *tree_nodes)
|
||||
{
|
||||
/* local variables */
|
||||
int i,result;
|
||||
int cnt, parent_cnt,n_nodes_in_this_level,node_index;
|
||||
int n_cum_nodes,current_level,node,n_nodes_prev_level,rank,parent_rank;
|
||||
int n_nodes_in_last_level,n_full_stripes,n_in_partial_stipe,n_children;
|
||||
int n_lvls_in_tree;
|
||||
|
||||
/* sanity check */
|
||||
if( 1 >= tree_order ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
|
||||
/* figure out number of levels in the tree */
|
||||
|
||||
n_lvls_in_tree=0;
|
||||
result=num_nodes;
|
||||
/* cnt - number of ranks in given level */
|
||||
cnt=1;
|
||||
/* parent_cnt - cummulative count of ranks */
|
||||
parent_cnt=0;
|
||||
while( 0 < result ) {
|
||||
result-=cnt;
|
||||
cnt*=tree_order;
|
||||
n_lvls_in_tree++;
|
||||
};
|
||||
|
||||
/* loop over tree levels */
|
||||
n_nodes_in_this_level=1;
|
||||
node_index=-1;
|
||||
n_cum_nodes=0;
|
||||
for( current_level = 0 ; current_level < n_lvls_in_tree ; current_level++) {
|
||||
|
||||
/* loop over nodes in current level */
|
||||
for ( node=0 ; node < n_nodes_in_this_level ; node++ ) {
|
||||
/* get node index */
|
||||
node_index++;
|
||||
|
||||
/* break if reach group size */
|
||||
if( node_index == num_nodes) {
|
||||
break;
|
||||
}
|
||||
|
||||
tree_nodes[node_index].my_rank=node_index;
|
||||
tree_nodes[node_index].children_ranks=NULL;
|
||||
|
||||
/*
|
||||
* Parents
|
||||
*/
|
||||
if( 0 == current_level ) {
|
||||
tree_nodes[node_index].n_parents=0;
|
||||
/* get parent index */
|
||||
tree_nodes[node_index].parent_rank=-1;
|
||||
} else {
|
||||
tree_nodes[node_index].n_parents=1;
|
||||
/* get parent index */
|
||||
n_nodes_prev_level=n_nodes_in_this_level/tree_order;
|
||||
if( current_level == n_lvls_in_tree -1 ) {
|
||||
/* load balance the lowest level */
|
||||
parent_rank=node-
|
||||
(node/n_nodes_prev_level)*n_nodes_prev_level;
|
||||
parent_rank=n_cum_nodes-n_nodes_prev_level+
|
||||
parent_rank;
|
||||
tree_nodes[node_index].parent_rank=parent_rank;
|
||||
} else {
|
||||
tree_nodes[node_index].parent_rank=
|
||||
(n_cum_nodes-n_nodes_prev_level)+node/tree_order;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Children
|
||||
*/
|
||||
|
||||
/* get number of children */
|
||||
if( (n_lvls_in_tree-1) == current_level ) {
|
||||
/* leaves have no nodes */
|
||||
tree_nodes[node_index].n_children=0;
|
||||
tree_nodes[node_index].children_ranks=NULL;
|
||||
} else {
|
||||
/* take into account last level being incomplete */
|
||||
if( (n_lvls_in_tree-2) == current_level ) {
|
||||
/* last level is load balanced */
|
||||
n_nodes_in_last_level=num_nodes-
|
||||
(n_cum_nodes+n_nodes_in_this_level);
|
||||
n_full_stripes=n_nodes_in_last_level/n_nodes_in_this_level;
|
||||
n_in_partial_stipe=n_nodes_in_last_level-
|
||||
n_full_stripes*n_nodes_in_this_level;
|
||||
n_children=n_full_stripes;
|
||||
if( n_full_stripes < tree_order ) {
|
||||
if( node <= n_in_partial_stipe-1 ) {
|
||||
n_children++;
|
||||
}
|
||||
}
|
||||
tree_nodes[node_index].n_children=n_children;
|
||||
if( 0 < n_children ) {
|
||||
tree_nodes[node_index].children_ranks=(int *)
|
||||
malloc(sizeof(int)*n_children);
|
||||
if( NULL == tree_nodes[node_index].children_ranks) {
|
||||
goto Error;
|
||||
}
|
||||
} else {
|
||||
tree_nodes[node_index].children_ranks=NULL;
|
||||
}
|
||||
/* fill in list */
|
||||
for( rank=0 ; rank < n_children ; rank++ ) {
|
||||
tree_nodes[node_index].children_ranks[rank]=
|
||||
node+rank*n_nodes_in_this_level;
|
||||
tree_nodes[node_index].children_ranks[rank]+=
|
||||
(n_cum_nodes+n_nodes_in_this_level);
|
||||
}
|
||||
} else {
|
||||
n_children=tree_order;
|
||||
tree_nodes[node_index].n_children=tree_order;
|
||||
tree_nodes[node_index].children_ranks=(int *)
|
||||
malloc(sizeof(int)*n_children);
|
||||
if( NULL == tree_nodes[node_index].children_ranks) {
|
||||
goto Error;
|
||||
}
|
||||
for( rank=0 ; rank < n_children ; rank++ ) {
|
||||
tree_nodes[node_index].children_ranks[rank]=
|
||||
rank+tree_order*node;
|
||||
tree_nodes[node_index].children_ranks[rank]+=
|
||||
(n_cum_nodes+n_nodes_in_this_level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /* end node loop */
|
||||
|
||||
/* update helper counters */
|
||||
n_cum_nodes+=n_nodes_in_this_level;
|
||||
n_nodes_in_this_level*=tree_order;
|
||||
}
|
||||
|
||||
/* set node type */
|
||||
for(i=0 ; i < num_nodes ; i++ ) {
|
||||
if( 0 == tree_nodes[i].n_parents ) {
|
||||
tree_nodes[i].my_node_type=ROOT_NODE;
|
||||
} else if ( 0 == tree_nodes[i].n_children ) {
|
||||
tree_nodes[i].my_node_type=LEAF_NODE;
|
||||
} else {
|
||||
tree_nodes[i].my_node_type=INTERIOR_NODE;
|
||||
}
|
||||
}
|
||||
|
||||
/* successful return */
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
Error:
|
||||
/* free allocated memory */
|
||||
for( i=0 ; i < num_nodes ; i++ ) {
|
||||
if( NULL != tree_nodes[i].children_ranks ) {
|
||||
free(tree_nodes[i].children_ranks);
|
||||
}
|
||||
}
|
||||
|
||||
/* error return */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/* setup recursive doubleing tree node */
|
||||
|
||||
int setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
|
||||
pair_exchange_node_t *exchange_node)
|
||||
{
|
||||
/* local variables */
|
||||
int i,tmp,cnt,result,tree_order,n_extra_nodes;
|
||||
int n_exchanges;
|
||||
|
||||
/* figure out number of levels in the tree */
|
||||
|
||||
n_exchanges=0;
|
||||
result=num_nodes;
|
||||
tree_order=2;
|
||||
/* cnt - number of ranks in given level */
|
||||
cnt=1;
|
||||
while( num_nodes > cnt ) {
|
||||
cnt*=tree_order;
|
||||
n_exchanges++;
|
||||
};
|
||||
|
||||
/* figure out the largest power of 2 that is less than or equal to
|
||||
* num_nodes */
|
||||
if( cnt > num_nodes) {
|
||||
cnt/=tree_order;
|
||||
n_exchanges--;
|
||||
}
|
||||
exchange_node->log_2=n_exchanges;
|
||||
|
||||
tmp=1;
|
||||
for(i=0 ; i < n_exchanges ; i++ ) {
|
||||
tmp*=2;
|
||||
}
|
||||
exchange_node->n_largest_pow_2=tmp;
|
||||
|
||||
/* set node characteristics - node that is not within the largest
|
||||
* power of 2 will just send it's data to node that will participate
|
||||
* in the recursive doubling, and get the result back at the end.
|
||||
*/
|
||||
if( node_rank+1 > cnt ) {
|
||||
exchange_node->node_type=EXTRA_NODE;
|
||||
} else {
|
||||
exchange_node->node_type=EXCHANGE_NODE;
|
||||
}
|
||||
|
||||
/* set the initial and final data exchanges - those that are not
|
||||
* part of the recursive doubling.
|
||||
*/
|
||||
n_extra_nodes=num_nodes-cnt;
|
||||
|
||||
if ( EXCHANGE_NODE == exchange_node->node_type ) {
|
||||
|
||||
if( node_rank < n_extra_nodes ) {
|
||||
exchange_node->n_extra_sources=1;
|
||||
exchange_node->rank_extra_source=cnt+node_rank;
|
||||
} else {
|
||||
exchange_node->n_extra_sources=0;
|
||||
exchange_node->rank_extra_source=-1;
|
||||
}
|
||||
|
||||
} else {
|
||||
exchange_node->n_extra_sources=1;
|
||||
exchange_node->rank_extra_source=node_rank-cnt;
|
||||
}
|
||||
|
||||
/* set the exchange pattern */
|
||||
if( EXCHANGE_NODE == exchange_node->node_type ) {
|
||||
|
||||
exchange_node->n_exchanges=n_exchanges;
|
||||
exchange_node->rank_exchanges=(int *) malloc
|
||||
(n_exchanges*sizeof(int));
|
||||
if( NULL == exchange_node->rank_exchanges ) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
/* fill in exchange partners */
|
||||
result=1;
|
||||
tmp=node_rank;
|
||||
for( i=0 ; i < n_exchanges ; i++ ) {
|
||||
if(tmp & 1 ) {
|
||||
exchange_node->rank_exchanges[i]=
|
||||
node_rank-result;
|
||||
} else {
|
||||
exchange_node->rank_exchanges[i]=
|
||||
node_rank+result;
|
||||
}
|
||||
result*=2;
|
||||
tmp/=2;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
exchange_node->n_exchanges=0;
|
||||
exchange_node->rank_exchanges=NULL;
|
||||
|
||||
}
|
||||
|
||||
/* set the number of tags needed per stripe - this must be the
|
||||
* same across all procs in the communicator.
|
||||
*/
|
||||
exchange_node->n_tags=2*n_exchanges+1;
|
||||
|
||||
/* successful return */
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
Error:
|
||||
|
||||
/* error return */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
@ -1,34 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for Open MPI's Shared memory
|
||||
# collective component.
|
||||
#
|
||||
[tree-degree-larger-than-control]
|
||||
The specified shared memory collective tree degree
|
||||
(coll_sm_tree_degree = %d) is too large. It must be less than or
|
||||
equal to the control size (coll_sm_control_size = %d).
|
||||
|
||||
Automatically adjusting the tree degree to be equal to the control
|
||||
size and continuing...
|
||||
[tree-degree-larger-than-255]
|
||||
The specified shared memory collective tree degree
|
||||
(coll_sm_tree_degree = %d) is too large. It must be less than or
|
||||
equal to 255.
|
||||
|
||||
Automatically adjusting the tree degree to be 255 and continuing...
|
Загрузка…
Ссылка в новой задаче
Block a user