From f41e4149fac338183d589261bec9d43ad5a033c1 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 15 Jul 2005 20:01:35 +0000 Subject: [PATCH] - Add new mpool base function: lookup by module name. This allows multiple components to share a single mpool module (e.g., the ptl/btl and coll sm components). - Re-tool the ptl, btl, and coll sm components to first look for the target mpool module, and if they don't find it, to create it. - coll sm component now correctly identifies when it is supposed to run or not (i.e., if all the processes in the communicator are on the same host). Now we just need to fill in some algorithms. :-) This commit was SVN r6530. --- ompi/mca/btl/sm/.ompi_unignore | 1 + ompi/mca/btl/sm/btl_sm.c | 6 +- ompi/mca/coll/sm/.ompi_unignore | 1 + ompi/mca/coll/sm/coll_sm.h | 53 +++++++++----- ompi/mca/coll/sm/coll_sm_component.c | 92 +++++++++++++++++-------- ompi/mca/coll/sm/coll_sm_module.c | 38 +++++----- ompi/mca/mpool/base/base.h | 1 + ompi/mca/mpool/base/mpool_base_lookup.c | 24 ++++++- ompi/mca/pml/ob1/.ompi_unignore | 1 + ompi/mca/ptl/sm/src/ptl_sm.c | 8 ++- 10 files changed, 156 insertions(+), 69 deletions(-) diff --git a/ompi/mca/btl/sm/.ompi_unignore b/ompi/mca/btl/sm/.ompi_unignore index a50292c94c..31ba84f34b 100644 --- a/ompi/mca/btl/sm/.ompi_unignore +++ b/ompi/mca/btl/sm/.ompi_unignore @@ -3,3 +3,4 @@ twoodall gshipman bosilca bwbarre +jsquyres diff --git a/ompi/mca/btl/sm/btl_sm.c b/ompi/mca/btl/sm/btl_sm.c index ef5c53740e..836bb87f3d 100644 --- a/ompi/mca/btl/sm/btl_sm.c +++ b/ompi/mca/btl/sm/btl_sm.c @@ -199,7 +199,11 @@ int mca_btl_sm_add_procs_same_base_addr( /* lookup shared memory pool */ if(NULL == mca_btl_sm_component.sm_mpool) { mca_btl_sm_component.sm_mpool = - mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name,btl,NULL); + mca_mpool_base_module_lookup(mca_btl_sm_component.sm_mpool_name); + if (NULL == mca_btl_sm_component.sm_mpool) { + mca_btl_sm_component.sm_mpool = + mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name,btl,NULL); + } /* Sanity check to ensure that we found it */ if (NULL == mca_btl_sm_component.sm_mpool) { diff --git a/ompi/mca/coll/sm/.ompi_unignore b/ompi/mca/coll/sm/.ompi_unignore index 204634653e..c9ac09ce98 100644 --- a/ompi/mca/coll/sm/.ompi_unignore +++ b/ompi/mca/coll/sm/.ompi_unignore @@ -1 +1,2 @@ jsquyres@mauve.lanl.gov +jsquyres diff --git a/ompi/mca/coll/sm/coll_sm.h b/ompi/mca/coll/sm/coll_sm.h index dadfb41edf..1c0f6a2d1a 100644 --- a/ompi/mca/coll/sm/coll_sm.h +++ b/ompi/mca/coll/sm/coll_sm.h @@ -13,6 +13,9 @@ * * $HEADER$ */ +/** + * @file + */ #ifndef MCA_COLL_SM_EXPORT_H #define MCA_COLL_SM_EXPORT_H @@ -20,8 +23,9 @@ #include "ompi_config.h" #include "mpi.h" -#include "mca/mca.h" -#include "mca/coll/coll.h" +#include "opal/mca/mca.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/mpool/mpool.h" #if defined(c_plusplus) || defined(__cplusplus) extern "C" { @@ -29,14 +33,33 @@ extern "C" { #define PUB(foo) mca_coll_sm##foo - /* Structure for sm collective module, per communicator. The - structure mainly stores memory pointers to the specific - poritions in the shared memory area. Each shared memory area is - reserved for special functions. The shared memory is split - between two types of areas. One is control section that stores - shared flags used during synchronization, while other section - is purely used to pass messages from one process to other. */ + /** + * Structure to hold the sm coll component. First it holds the + * base coll component, and then holds a bunch of + * sm-coll-component-specific stuff (e.g., current MCA param + * values). + */ + typedef struct mca_coll_sm_component_t { + /** Base coll component */ + mca_coll_base_component_1_0_0_t super; + /** Priority of this component */ + int sm_priority; + /** Name of the mpool that this component will look for */ + char *sm_mpool_name; + /** Mpool that will be used */ + mca_mpool_base_module_t *sm_mpool; + } mca_coll_sm_component_t; + + /** + * Structure for sm collective module, per communicator. The + * structure mainly stores memory pointers to the specific + * poritions in the shared memory area. Each shared memory area is + * reserved for special functions. The shared memory is split + * between two types of areas. One is control section that stores + * shared flags used during synchronization, while other section + * is purely used to pass messages from one process to other. + */ typedef struct mca_coll_base_module_comm_t { /* JMS fill in here */ @@ -45,12 +68,10 @@ extern "C" { } mca_coll_base_module_comm_t; - /* - * Globally exported variables + /** + * Global component instance */ - - extern const mca_coll_base_component_1_0_0_t mca_coll_sm_component; - extern int mca_coll_sm_param_priority; + extern mca_coll_sm_component_t mca_coll_sm_component; /* @@ -157,7 +178,3 @@ extern "C" { #endif #endif /* MCA_COLL_SM_EXPORT_H */ - - - - diff --git a/ompi/mca/coll/sm/coll_sm_component.c b/ompi/mca/coll/sm/coll_sm_component.c index 84e2942679..9a65817b70 100644 --- a/ompi/mca/coll/sm/coll_sm_component.c +++ b/ompi/mca/coll/sm/coll_sm_component.c @@ -39,6 +39,7 @@ const char *mca_coll_sm_component_version_string = */ static int sm_open(void); +static int sm_close(void); /* @@ -46,43 +47,56 @@ static int sm_open(void); * and pointers to our public functions in it */ -const mca_coll_base_component_1_0_0_t mca_coll_sm_component = { +mca_coll_sm_component_t mca_coll_sm_component = { - /* First, the mca_component_t struct containing meta information - about the component itself */ + /* First, fill in the super (mca_coll_base_component_1_0_0_t) */ { - /* Indicate that we are a coll v1.0.0 component (which also - implies a specific MCA version) */ + /* First, the mca_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a coll v1.0.0 component (which + also implies a specific MCA version) */ - MCA_COLL_BASE_VERSION_1_0_0, + MCA_COLL_BASE_VERSION_1_0_0, - /* Component name and version */ + /* Component name and version */ - "sm", - OMPI_MAJOR_VERSION, - OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION, + "sm", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, - /* Component open and close functions */ + /* Component open and close functions */ - sm_open, - NULL + sm_open, + sm_close, + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* Whether the component is checkpointable or not */ + + true + }, + + /* Initialization / querying functions */ + + mca_coll_sm_init_query, + mca_coll_sm_comm_query, + mca_coll_sm_comm_unquery, }, - /* Next the MCA v1.0.0 component meta data */ + /* sm-component specifc information */ - { - /* Whether the component is checkpointable or not */ + /* priority */ + 75, - true - }, - - /* Initialization / querying functions */ - - mca_coll_sm_init_query, - mca_coll_sm_comm_query, - mca_coll_sm_comm_unquery, + /* mpool name and instance */ + "sm", + NULL }; @@ -91,11 +105,33 @@ const mca_coll_base_component_1_0_0_t mca_coll_sm_component = { */ static int sm_open(void) { + int p, ival; + char *sval; + /* If we want to be selected (i.e., all procs on one node), then we should have a high priority */ - - mca_coll_sm_param_priority = - mca_base_param_register_int("coll", "sm", "priority", NULL, 75); + + p = mca_base_param_register_int("coll", "sm", "priority", NULL, 75); + mca_base_param_lookup_int(p, &ival); + mca_coll_sm_component.sm_priority = ival; + + p = mca_base_param_register_string("coll", "sm", "mpool", NULL, "sm"); + mca_base_param_lookup_string(p, &sval); + mca_coll_sm_component.sm_mpool_name = sval; + + return OMPI_SUCCESS; +} + + +/* + * Close the component + */ +static int sm_close(void) +{ + if (NULL != mca_coll_sm_component.sm_mpool_name) { + free(mca_coll_sm_component.sm_mpool_name); + mca_coll_sm_component.sm_mpool_name = NULL; + } return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index d0079d75f5..ba82e49de8 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -19,9 +19,11 @@ #include #include "mpi.h" -#include "communicator/communicator.h" -#include "mca/coll/coll.h" -#include "mca/coll/base/base.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/mpool/base/base.h" #include "coll_sm.h" @@ -55,8 +57,6 @@ static const mca_coll_base_module_1_0_0_t module = { NULL }; -int mca_coll_sm_param_priority = -1; - /* * Initial query function that is invoked during MPI_INIT, allowing @@ -92,11 +92,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority, /* Get our priority */ - if (OMPI_SUCCESS != - mca_base_param_lookup_int(mca_coll_sm_param_priority, - priority)) { - return NULL; - } + *priority = mca_coll_sm_component.sm_priority; /* We only want to run if all the processes in the communicator are on the same node */ @@ -108,15 +104,23 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority, } } - /* Can we get an mpool allocation? */ + /* Can we get an mpool allocation? See if there was one created + already. If not, try to make one. */ - /* JMS ... */ + mca_coll_sm_component.sm_mpool = + mca_mpool_base_module_lookup(mca_coll_sm_component.sm_mpool_name); + if (NULL == mca_coll_sm_component.sm_mpool) { + mca_coll_sm_component.sm_mpool = + mca_mpool_base_module_create(mca_coll_sm_component.sm_mpool_name, + NULL, NULL); + if (NULL == mca_coll_sm_component.sm_mpool) { + return NULL; + } + } + + /* All is good -- return a module */ -#if 1 - return NULL; -#else return &module; -#endif } @@ -154,5 +158,3 @@ int mca_coll_sm_module_finalize(struct ompi_communicator_t *comm) return OMPI_SUCCESS; } - - diff --git a/ompi/mca/mpool/base/base.h b/ompi/mca/mpool/base/base.h index 0afe9344a8..4a1b227083 100644 --- a/ompi/mca/mpool/base/base.h +++ b/ompi/mca/mpool/base/base.h @@ -106,6 +106,7 @@ OMPI_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_create( const char* name, void* user_data, struct mca_mpool_base_resources_t* mpool_resources); +OMPI_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name); /* * Globals diff --git a/ompi/mca/mpool/base/mpool_base_lookup.c b/ompi/mca/mpool/base/mpool_base_lookup.c index 847c28a6a0..a3f85eee70 100644 --- a/ompi/mca/mpool/base/mpool_base_lookup.c +++ b/ompi/mca/mpool/base/mpool_base_lookup.c @@ -62,13 +62,14 @@ mca_mpool_base_module_t* mca_mpool_base_module_create( (mca_base_component_list_item_t *) item; component = (mca_mpool_base_component_t *) cli->cli_component; - if(strcmp(component->mpool_version.mca_component_name, name) == 0) { + if(0 == strcmp(component->mpool_version.mca_component_name, name)) { break; } } - if(NULL == component) + if (NULL == component) { return NULL; + } module = component->mpool_init(resources); sm = OBJ_NEW(mca_mpool_base_selected_module_t); sm->mpool_component = component; @@ -78,3 +79,22 @@ mca_mpool_base_module_t* mca_mpool_base_module_create( opal_list_append(&mca_mpool_base_modules, (opal_list_item_t*) sm); return module; } + + +mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name) +{ + opal_list_item_t* item; + + for (item = opal_list_get_first(&mca_mpool_base_modules); + item != opal_list_get_end(&mca_mpool_base_modules); + item = opal_list_get_next(item)) { + mca_mpool_base_selected_module_t *mli = + (mca_mpool_base_selected_module_t *) item; + if(0 == strcmp(mli->mpool_component->mpool_version.mca_component_name, + name)) { + return mli->mpool_module; + } + } + + return NULL; +} diff --git a/ompi/mca/pml/ob1/.ompi_unignore b/ompi/mca/pml/ob1/.ompi_unignore index 1d69b49f2f..87de2e4eba 100644 --- a/ompi/mca/pml/ob1/.ompi_unignore +++ b/ompi/mca/pml/ob1/.ompi_unignore @@ -5,3 +5,4 @@ bosilca brbarret bbarrett bwbarre +jsquyres diff --git a/ompi/mca/ptl/sm/src/ptl_sm.c b/ompi/mca/ptl/sm/src/ptl_sm.c index 0552d0c32b..88175b857e 100644 --- a/ompi/mca/ptl/sm/src/ptl_sm.c +++ b/ompi/mca/ptl/sm/src/ptl_sm.c @@ -203,8 +203,12 @@ int mca_ptl_sm_add_procs_same_base_addr( /* lookup shared memory pool */ if(NULL == mca_ptl_sm_component.sm_mpool) { mca_ptl_sm_component.sm_mpool = - mca_mpool_base_module_create(mca_ptl_sm_component.sm_mpool_name,NULL,NULL); - + mca_mpool_base_module_lookup(mca_ptl_sm_component.sm_mpool_name); + if (NULL == mca_ptl_sm_component.sm_mpool) { + mca_ptl_sm_component.sm_mpool = + mca_mpool_base_module_create(mca_ptl_sm_component.sm_mpool_name,NULL,NULL); + } + /* Sanity check to ensure that we found it */ if (NULL == mca_ptl_sm_component.sm_mpool) { return_code = OMPI_ERR_OUT_OF_RESOURCE;