diff --git a/ompi/mca/coll/sm2/.ompi_ignore b/ompi/mca/coll/sm2/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ompi/mca/coll/sm2/.ompi_unignore b/ompi/mca/coll/sm2/.ompi_unignore new file mode 100644 index 0000000000..fe165a9688 --- /dev/null +++ b/ompi/mca/coll/sm2/.ompi_unignore @@ -0,0 +1,2 @@ +rg6 +rlgraham diff --git a/ompi/mca/coll/sm2/Makefile.am b/ompi/mca/coll/sm2/Makefile.am new file mode 100644 index 0000000000..f1fdf154f9 --- /dev/null +++ b/ompi/mca/coll/sm2/Makefile.am @@ -0,0 +1,49 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +not_used_yet = + +sources = \ + coll_sm2.h \ + coll_sm2_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +component_noinst = +component_install = +if OMPI_BUILD_coll_sm2_DSO +component_install += mca_coll_sm2.la +else +component_noinst += libmca_coll_sm2.la +endif + +# See ompi/mca/btl/sm/Makefile.am for an explanation of +# libmca_common_sm.la. + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_coll_sm2_la_SOURCES = $(sources) +mca_coll_sm2_la_LDFLAGS = -module -avoid-version +mca_coll_sm2_la_LIBADD = \ + $(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_coll_sm2_la_SOURCES =$(sources) +libmca_coll_sm2_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/coll/sm2/coll_sm2.h b/ompi/mca/coll/sm2/coll_sm2.h new file mode 100644 index 0000000000..f7552ae55d --- /dev/null +++ b/ompi/mca/coll/sm2/coll_sm2.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file */ + +#ifndef MCA_COLL_SM2_EXPORT_H +#define MCA_COLL_SM2_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/mca/mca.h" +#include "orte/mca/ns/ns_types.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/common/sm/common_sm_mmap.h" + +BEGIN_C_DECLS + +#ifdef HAVE_SCHED_YIELD +# include +# define SPIN sched_yield() +#elif defined(__WINDOWS__) +# define SPIN SwitchToThread() +#else /* no switch available */ +# define SPIN +#endif + +/* + * Memory Management + * - All memory allocation will be done on a per-communictor basis + * - The two banks of memory will be used + * - Each bank of memory will have M buffers + * - These buffers will be used in a cirucular buffer order + * - Each buffer will be contigous in virtual memory, and will have page-aligned + * regions belonging to each process in the communicator + * - The memory associated with each process will have a control region, and + * a data region. + * - First touch will be used to enforce memory locality, and thus relies on + * processor affinity to be set. + * - A non-blocking collective will be issued when all buffers in a bank have + * been used. This will be completed before this bank is re-used. + */ + + /** + * Structure to hold the sm coll component. First it holds the + * base coll component, and then holds a bunch of + * sm-coll-component-specific stuff (e.g., current MCA param + * values). + */ + struct mca_coll_sm2_component_t { + /** Base coll component */ + mca_coll_base_component_1_1_0_t super; + + /** MCA parameter: Priority of this component */ + int sm_priority; + }; + + /** + * Convenience typedef + */ + typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t; + + + struct mca_coll_sm2_module_t { + /* base structure */ + mca_coll_base_module_1_1_0_t super; + }; + + typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t; + OBJ_CLASS_DECLARATION(mca_coll_sm2_module_t); + + + /** + * Global component instance + */ + OMPI_MODULE_DECLSPEC extern mca_coll_sm2_component_t mca_coll_sm2_component; + + /* + * coll module functions + */ + + int mca_coll_sm_init_query(bool enable_progress_threads, + bool enable_mpi_threads); + + struct mca_coll_base_module_1_1_0_t * + mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority); + + +/** + * Macro to setup flag usage + */ +#define FLAG_SETUP(flag_num, flag, data) \ + (flag) = (mca_coll_sm_in_use_flag_t*) \ + (((char *) (data)->mcb_in_use_flags) + \ + ((flag_num) * mca_coll_sm_component.sm_control_size)) + +/** + * Macro to wait for the in-use flag to become idle (used by the root) + */ +#define FLAG_WAIT_FOR_IDLE(flag) \ + while (0 != (flag)->mcsiuf_num_procs_using) SPIN + +/** + * Macro to wait for a flag to indicate that it's ready for this + * operation (used by non-root processes to know when FLAG_SET() has + * been called) + */ +#define FLAG_WAIT_FOR_OP(flag, op) \ + while ((op) != flag->mcsiuf_operation_count) SPIN + +/** + * Macro to set an in-use flag with relevant data to claim it + */ +#define FLAG_RETAIN(flag, num_procs, op_count) \ + (flag)->mcsiuf_num_procs_using = (num_procs); \ + (flag)->mcsiuf_operation_count = (op_count) + +/** + * Macro to release an in-use flag from this process + */ +#define FLAG_RELEASE(flag) \ + opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1) + +/** + * Macro to copy a single segment in from a user buffer to a shared + * segment + */ +#define COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data) \ + (iov).iov_base = \ + (index)->mcbmi_data + \ + ((rank) * mca_coll_sm_component.sm_fragment_size); \ + (max_data) = (iov).iov_len = mca_coll_sm_component.sm_fragment_size; \ + ompi_convertor_pack(&(convertor), &(iov), &mca_coll_sm_iov_size, \ + &(max_data) ) + +/** + * Macro to copy a single segment out from a shared segment to a user + * buffer + */ +#define COPY_FRAGMENT_OUT(convertor, src_rank, index, iov, max_data) \ + (iov).iov_base = (((char*) (index)->mcbmi_data) + \ + ((src_rank) * mca_coll_sm_component.sm_fragment_size)); \ + ompi_convertor_unpack(&(convertor), &(iov), &mca_coll_sm_iov_size, \ + &(max_data) ) + +/** + * Macro to memcpy a fragment between one shared segment and another + */ +#define COPY_FRAGMENT_BETWEEN(src_rank, dest_rank, index, len) \ + memcpy(((index)->mcbmi_data + \ + ((dest_rank) * mca_coll_sm_component.sm_fragment_size)), \ + ((index)->mcbmi_data + \ + ((src_rank) * \ + mca_coll_sm_component.sm_fragment_size)), \ + (len)) + +/** + * Macro to tell children that a segment is ready (normalize + * the child's ID based on the shift used to calculate the "me" node + * in the tree). Used in fan out opertations. + */ +#define PARENT_NOTIFY_CHILDREN(children, num_children, index, value) \ + do { \ + for (i = 0; i < (num_children); ++i) { \ + *((size_t*) \ + (((char*) index->mcbmi_control) + \ + (mca_coll_sm_component.sm_control_size * \ + (((children)[i]->mcstn_id + root) % size)))) = (value); \ + } \ + } while (0) + +/** + * Macro for childen to wait for parent notification (use real rank). + * Save the value passed and then reset it when done. Used in fan out + * operations. + */ +#define CHILD_WAIT_FOR_NOTIFY(rank, index, value) \ + do { \ + uint32_t volatile *ptr = ((uint32_t*) \ + (((char*) index->mcbmi_control) + \ + ((rank) * mca_coll_sm_component.sm_control_size))); \ + while (0 == *ptr) SPIN; \ + (value) = *ptr; \ + *ptr = 0; \ + } while (0) + +/** + * Macro for children to tell parent that the data is ready in their + * segment. Used for fan in operations. + */ +#define CHILD_NOTIFY_PARENT(child_rank, parent_rank, index, value) \ + ((size_t volatile *) \ + (((char*) (index)->mcbmi_control) + \ + (mca_coll_sm_component.sm_control_size * \ + (parent_rank))))[(child_rank)] = (value) + +/** + * Macro for parent to wait for a specific child to tell it that the + * data is in the child's segment. Save the value when done. Used + * for fan in operations. + */ +#define PARENT_WAIT_FOR_NOTIFY_SPECIFIC(child_rank, parent_rank, index, value) \ + do { \ + size_t volatile *ptr = ((size_t volatile *) \ + (((char*) index->mcbmi_control) + \ + (mca_coll_sm_component.sm_control_size * \ + (parent_rank)))) + child_rank; \ + while (0 == *ptr) SPIN; \ + (value) = *ptr; \ + *ptr = 0; \ + } while (0) + +END_C_DECLS + +#endif /* MCA_COLL_SM2_EXPORT_H */ diff --git a/ompi/mca/coll/sm2/coll_sm2_component.c b/ompi/mca/coll/sm2/coll_sm2_component.c new file mode 100644 index 0000000000..d5738d111a --- /dev/null +++ b/ompi/mca/coll/sm2/coll_sm2_component.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Most of the description of the data layout is in the + * coll_sm_module.c file. + */ + +#include "ompi_config.h" + +#include "ompi/constants.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/coll.h" +#include "opal/util/show_help.h" +#include "coll_sm2.h" + + +/* + * Public string showing the coll ompi_sm V2 component version number + */ +const char *mca_coll_sm_component_version_string = + "Open MPI sm-V2 collective MCA component version " OMPI_VERSION; + + +/* + * Local functions + */ + +static int sm2_open(void); +static int sm2_close(void); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +mca_coll_sm2_component_t mca_coll_sm2_component = { + + /* First, fill in the super (mca_coll_base_component_1_1_0_t) */ + + { + /* First, the mca_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a coll v1.1.0 component (which + also implies a specific MCA version) */ + + MCA_COLL_BASE_VERSION_1_1_0, + + /* Component name and version */ + + "sm-v2", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + + /* Component open and close functions */ + + sm2_open, + sm2_close, + }, + + /* Next the MCA v1.1.0 component meta data */ + + { + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + }, + + /* Initialization / querying functions */ + + mca_coll_sm2_init_query, + mca_coll_sm2_comm_query, + }, + + /* sm-component specifc information */ + + /* (default) priority */ + /* JMS temporarily lowered until we can get more testing */ + 0, + +}; + + +/* + * Open the component + */ +static int sm_open(void) +{ + size_t size1, size2; + mca_base_component_t *c = &mca_coll_sm2_component.super.collm_version; + mca_coll_sm2_component_t *cs = &mca_coll_sm2_component; + + mca_base_param_reg_int(c, "priority", + "Priority of the sm coll component", + false, false, + cs->sm_priority, + &cs->sm_priority); + + return OMPI_SUCCESS; +} + + +/* + * Close the component + */ +static int sm_close(void) +{ + return OMPI_SUCCESS; +} + + +static void +mca_coll_sm_module_construct(mca_coll_sm_module_t *module) +{ + module->sm_data = NULL; + module->previous_reduce_module = NULL; +} + +static void +mca_coll_sm_module_destruct(mca_coll_sm2_module_t *module) +{ +} + + +OBJ_CLASS_INSTANCE(mca_coll_sm2_module_t, + mca_coll_base_module_1_1_0_t, + mca_coll_sm2_module_construct, + mca_coll_sm2_module_destruct); diff --git a/ompi/mca/coll/sm2/configure.params b/ompi/mca/coll/sm2/configure.params new file mode 100644 index 0000000000..e35a45dd44 --- /dev/null +++ b/ompi/mca/coll/sm2/configure.params @@ -0,0 +1,22 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES=Makefile diff --git a/ompi/mca/coll/sm2/help-coll-sm.txt b/ompi/mca/coll/sm2/help-coll-sm.txt new file mode 100644 index 0000000000..0ed7946b86 --- /dev/null +++ b/ompi/mca/coll/sm2/help-coll-sm.txt @@ -0,0 +1,34 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI's Shared memory +# collective component. +# +[tree-degree-larger-than-control] +The specified shared memory collective tree degree +(coll_sm_tree_degree = %d) is too large. It must be less than or +equal to the control size (coll_sm_control_size = %d). + +Automatically adjusting the tree degree to be equal to the control +size and continuing... +[tree-degree-larger-than-255] +The specified shared memory collective tree degree +(coll_sm_tree_degree = %d) is too large. It must be less than or +equal to 255. + +Automatically adjusting the tree degree to be 255 and continuing...