1
1
Adds coll_hcoll_np mca parameter similar to that of fca component (defaults to 32). Those who use hcoll be aware that from now on the communicators less than 32 procs will run w/o hcoll by default. - Resolves fallback issue in case libhcoll runs out of allowed contexts. The solution is moving hcoll_context_create from comm_enable to comm_query. Shortly, comm_enable should never return OMPI_ERROR in the coll component with highest priority (hcoll). Otherwise the ompi coll_base_select will unselect the coll funtion pointers and module references leaving the communicator w/o coll pointer. This will cause the fail. Same behavior can be reproduced even with tuned if one would hardcore some "return OMPI_ERROR" into it's module_enable funtion. - Additionally, removed all the dead code under #if 0; removed unused variables (path for library, active_modules list) and classes (module list wrapper)

Fixed by Val, Reviewed by Devendar/Josh/Miked

cmr=v1.7.4:reviewer=ompi-rm1.7

This commit was SVN r30341.
Этот коммит содержится в:
Mike Dubman 2014-01-21 12:19:47 +00:00
родитель 37343574e0
Коммит b8550a55a7
3 изменённых файлов: 114 добавлений и 203 удалений

Просмотреть файл

@ -54,21 +54,22 @@ struct mca_coll_hcoll_component_t {
/** MCA parameter: Verbose level of this component */
int hcoll_verbose;
/** MCA parameter: Path to libfca.so */
char* hcoll_lib_path;
/** MCA parameter: Enable FCA */
int hcoll_enable;
/** MCA parameter: Minimal number of processes in the communicator
for the corresponding hcoll context to be created */
int hcoll_np;
/** Whether or not hcoll_init was ever called */
bool libhcoll_initialized;
/** MCA parameter: ON/OFF user defined datatype through HCOLL */
int hcoll_datatype_fallback;
/* FCA global stuff */
void *hcoll_lib_handle; /* FCA dynamic library */
mca_coll_hcoll_ops_t hcoll_ops;
ompi_free_list_t requests;
opal_list_t active_modules;
volatile uint32_t progress_lock;
};
typedef struct mca_coll_hcoll_component_t mca_coll_hcoll_component_t;
@ -126,13 +127,6 @@ OBJ_CLASS_DECLARATION(mca_coll_hcoll_module_t);
typedef struct mca_coll_hcoll_module_list_item_wrapper_t{
opal_list_item_t super;
mca_coll_hcoll_module_t *module;
} mca_coll_hcoll_module_list_item_wrapper_t;
OBJ_CLASS_DECLARATION(mca_coll_hcoll_module_list_item_wrapper_t);
/* API functions */
int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads);

Просмотреть файл

@ -56,10 +56,9 @@ mca_coll_hcoll_component_t mca_coll_hcoll_component = {
mca_coll_hcoll_init_query,
mca_coll_hcoll_comm_query,
},
90,
0,
"",
1
90, /* priority */
0, /* verbose level */
1 /* hcoll_enable */
};
@ -74,11 +73,6 @@ int mca_coll_hcoll_get_lib(void)
return OMPI_SUCCESS;
}
static void mca_coll_hcoll_close_lib(void)
{
}
/*
* * Local flags
* */
@ -96,7 +90,6 @@ enum {
};
/*
* utility routine for string parameter registration
*/
@ -131,7 +124,6 @@ static int reg_string(const char* param_name,
}
/*
* Utility routine for integer parameter registration
*/
@ -177,42 +169,43 @@ static int hcoll_register(void)
ret = OMPI_SUCCESS;
#define CHECK(expr) do { \
tmp = (expr); \
if (OMPI_SUCCESS != tmp) ret = tmp; \
} while (0)
#define CHECK(expr) do { \
tmp = (expr); \
if (OMPI_SUCCESS != tmp) ret = tmp; \
} while (0)
CHECK(reg_int("priority",NULL,
"Priority of the hcol coll component",
90,
&mca_coll_hcoll_component.hcoll_priority,
0));
"Priority of the hcol coll component",
90,
&mca_coll_hcoll_component.hcoll_priority,
0));
CHECK(reg_int("verbose", NULL,
"Verbose level of the hcol coll component",
0,
&mca_coll_hcoll_component.hcoll_verbose,
0));
"Verbose level of the hcol coll component",
0,
&mca_coll_hcoll_component.hcoll_verbose,
0));
CHECK(reg_int("enable",NULL,
"[1|0|] Enable/Disable HCOL",
1 /*enable by default*/,
&mca_coll_hcoll_component.hcoll_enable,
0));
"[1|0|] Enable/Disable HCOL",
1 /*enable by default*/,
&mca_coll_hcoll_component.hcoll_enable,
0));
CHECK(reg_int("np",NULL,
"Minimal number of processes in the communicator"
" for the corresponding hcoll context to be created (default: 32)",
2 /*enable by default*/,
&mca_coll_hcoll_component.hcoll_np,
0));
CHECK(reg_int("datatype_fallback",NULL,
"[1|0|] Enable/Disable user defined dattypes fallback",
1 /*enable by default*/,
&mca_coll_hcoll_component.hcoll_datatype_fallback,
0));
"[1|0|] Enable/Disable user defined dattypes fallback",
1 /*enable by default*/,
&mca_coll_hcoll_component.hcoll_datatype_fallback,
0));
CHECK(reg_string("library_path", NULL,
"HCOL /path/to/libhcol.so",
""COLL_HCOLL_HOME"/libhcol.so",
&mca_coll_hcoll_component.hcoll_lib_path,
0));
return ret;
}
@ -220,23 +213,25 @@ static int hcoll_register(void)
static int hcoll_open(void)
{
int rc;
mca_coll_hcoll_component_t *cm;
cm = &mca_coll_hcoll_component;
mca_coll_hcoll_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_coll_hcoll_output, mca_coll_hcoll_component.hcoll_verbose);
opal_output_set_verbosity(mca_coll_hcoll_output, cm->hcoll_verbose);
hcoll_rte_fns_setup();
OBJ_CONSTRUCT(&mca_coll_hcoll_component.active_modules, opal_list_t);
mca_coll_hcoll_component.progress_lock = -1;
cm->libhcoll_initialized = false;
return OMPI_SUCCESS;
}
static int hcoll_close(void)
{
int rc;
mca_coll_hcoll_component_t *cm;
cm = &mca_coll_hcoll_component;
if (false == mca_coll_hcoll_component.hcoll_enable) {
if (false == cm->libhcoll_initialized) {
return OMPI_SUCCESS;
}
@ -244,8 +239,6 @@ static int hcoll_close(void)
rc = hcoll_finalize();
opal_progress_unregister(mca_coll_hcoll_progress);
OBJ_DESTRUCT(&mca_coll_hcoll_component.active_modules);
memset(&mca_coll_hcoll_component.active_modules,0,sizeof(mca_coll_hcoll_component.active_modules));
if (HCOLL_SUCCESS != rc){
HCOL_VERBOSE(1,"Hcol library finalize failed");
return OMPI_ERROR;

Просмотреть файл

@ -18,7 +18,6 @@ int hcoll_comm_attr_keyval;
*/
int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads)
{
return OMPI_SUCCESS;
}
@ -50,73 +49,45 @@ static void mca_coll_hcoll_module_construct(mca_coll_hcoll_module_t *hcoll_modul
static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module)
{
opal_list_item_t *item, *item_next;
opal_list_t *am;
mca_coll_hcoll_module_t *module;
ompi_communicator_t *comm;
int context_destroyed;
am = &mca_coll_hcoll_component.active_modules;
if (hcoll_module->comm == &ompi_mpi_comm_world.comm){
#if 0
/* If we get here then we are detroying MPI_COMM_WORLD now. So,
* it is safe to destory all the other communicators and corresponding
* hcoll contexts that could still be on the "active_modules" list.
*/
item = opal_list_get_first(am);
while (item != opal_list_get_end(am)){
item_next = opal_list_get_next(item);
module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module;
comm = module->comm;
context_destroyed = 0;
while(!context_destroyed){
hcoll_destroy_context(module->hcoll_context,
(rte_grp_handle_t)comm,
&context_destroyed);
}
module->hcoll_context = NULL;
OBJ_RELEASE(comm);
opal_list_remove_item(am,item);
OBJ_RELEASE(item);
item = item_next;
}
/* Now destory the comm_world hcoll context as well */
context_destroyed = 0;
while(!context_destroyed){
hcoll_destroy_context(hcoll_module->hcoll_context,
(rte_grp_handle_t)hcoll_module->comm,
&context_destroyed);
}
#endif
if (OMPI_SUCCESS != ompi_attr_free_keyval(COMM_ATTR, &hcoll_comm_attr_keyval, 0)) {
HCOL_VERBOSE(1,"hcoll ompi_attr_free_keyval failed");
}
}
OBJ_RELEASE(hcoll_module->previous_barrier_module);
OBJ_RELEASE(hcoll_module->previous_bcast_module);
OBJ_RELEASE(hcoll_module->previous_reduce_module);
OBJ_RELEASE(hcoll_module->previous_allreduce_module);
OBJ_RELEASE(hcoll_module->previous_allgather_module);
OBJ_RELEASE(hcoll_module->previous_allgatherv_module);
OBJ_RELEASE(hcoll_module->previous_gather_module);
OBJ_RELEASE(hcoll_module->previous_gatherv_module);
OBJ_RELEASE(hcoll_module->previous_alltoall_module);
OBJ_RELEASE(hcoll_module->previous_alltoallv_module);
OBJ_RELEASE(hcoll_module->previous_alltoallw_module);
OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module);
OBJ_RELEASE(hcoll_module->previous_ibarrier_module);
OBJ_RELEASE(hcoll_module->previous_ibcast_module);
OBJ_RELEASE(hcoll_module->previous_iallreduce_module);
OBJ_RELEASE(hcoll_module->previous_iallgather_module);
context_destroyed = 0;
hcoll_destroy_context(hcoll_module->hcoll_context,
(rte_grp_handle_t)hcoll_module->comm,
&context_destroyed);
assert(context_destroyed);
/* If the hcoll_context is null then we are destroying the hcoll_module
that didn't initialized fallback colls/modules.
Then just clear and return. Otherwise release module pointers and
destroy hcoll context*/
if (hcoll_module->hcoll_context != NULL){
OBJ_RELEASE(hcoll_module->previous_barrier_module);
OBJ_RELEASE(hcoll_module->previous_bcast_module);
OBJ_RELEASE(hcoll_module->previous_reduce_module);
OBJ_RELEASE(hcoll_module->previous_allreduce_module);
OBJ_RELEASE(hcoll_module->previous_allgather_module);
OBJ_RELEASE(hcoll_module->previous_allgatherv_module);
OBJ_RELEASE(hcoll_module->previous_gather_module);
OBJ_RELEASE(hcoll_module->previous_gatherv_module);
OBJ_RELEASE(hcoll_module->previous_alltoall_module);
OBJ_RELEASE(hcoll_module->previous_alltoallv_module);
OBJ_RELEASE(hcoll_module->previous_alltoallw_module);
OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module);
OBJ_RELEASE(hcoll_module->previous_ibarrier_module);
OBJ_RELEASE(hcoll_module->previous_ibcast_module);
OBJ_RELEASE(hcoll_module->previous_iallreduce_module);
OBJ_RELEASE(hcoll_module->previous_iallgather_module);
context_destroyed = 0;
hcoll_destroy_context(hcoll_module->hcoll_context,
(rte_grp_handle_t)hcoll_module->comm,
&context_destroyed);
assert(context_destroyed);
}
mca_coll_hcoll_module_clear(hcoll_module);
}
@ -130,9 +101,10 @@ static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module
} while(0)
static int __save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module)
static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module)
{
ompi_communicator_t *comm = hcoll_module->comm;
ompi_communicator_t *comm;
comm = hcoll_module->comm;
HCOL_SAVE_PREV_COLL_API(barrier);
HCOL_SAVE_PREV_COLL_API(bcast);
@ -154,13 +126,16 @@ static int __save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module)
return OMPI_SUCCESS;
}
/*
** Communicator free callback
*/
int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extra)
{
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) attr_val;
mca_coll_hcoll_module_t *hcoll_module;
hcoll_module = (mca_coll_hcoll_module_t*) attr_val;
hcoll_group_destroy_notify(hcoll_module->hcoll_context);
return OMPI_SUCCESS;
@ -172,36 +147,14 @@ int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extr
static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm)
{
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) module;
int ret;
hcoll_module->comm = comm;
if (OMPI_SUCCESS != __save_coll_handlers(hcoll_module)){
HCOL_ERROR("coll_hcol: __save_coll_handlers failed");
if (OMPI_SUCCESS != mca_coll_hcoll_save_coll_handlers((mca_coll_hcoll_module_t *)module)){
HCOL_ERROR("coll_hcol: mca_coll_hcoll_save_coll_handlers failed");
return OMPI_ERROR;
}
hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag);
hcoll_module->hcoll_context =
hcoll_create_context((rte_grp_handle_t)comm);
if (NULL == hcoll_module->hcoll_context){
HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
return OMPI_ERROR;
}
#if 0
if (comm != &ompi_mpi_comm_world.comm){
mca_coll_hcoll_module_list_item_wrapper_t *mw =
OBJ_NEW(mca_coll_hcoll_module_list_item_wrapper_t);
mw->module = hcoll_module;
OBJ_RETAIN(hcoll_module->comm);
opal_list_append(&mca_coll_hcoll_component.active_modules,
(opal_list_item_t*)mw);
}
#endif
ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, hcoll_comm_attr_keyval, (void *)hcoll_module, false);
ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, hcoll_comm_attr_keyval, (void *)module, false);
if (OMPI_SUCCESS != ret) {
HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed");
return OMPI_ERROR;
@ -212,55 +165,11 @@ static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module,
int mca_coll_hcoll_progress(void)
{
opal_list_item_t *item, *item_next;
opal_list_t *am;
mca_coll_hcoll_module_t *module;
ompi_communicator_t *comm;
int context_destroyed;
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,1);
am = &mca_coll_hcoll_component.active_modules;
if (mca_coll_hcoll_component.progress_lock){
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1);
(*hcoll_progress_fn)();
return OMPI_SUCCESS;
}
if (ompi_mpi_finalized){
hcoll_rte_p2p_disabled_notify();
}
#if 0
item = opal_list_get_first(am);
while (item != opal_list_get_end(am)){
item_next = opal_list_get_next(item);
module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module;
comm = module->comm;
if (((opal_object_t*)comm)->obj_reference_count == 1){
/* Ok, if we are here then nobody owns a communicator pointed with comm except
* for coll_hcoll. Hence, it is safe to remove the hcoll context firstly and
* call release on the communicator.
*
* The call to hcoll_destroy_context is not blocking. The last parameter on the return
* indicates whether the context has been destroyd (1) or not (0). In the latter
* case one should call destroy again after some progressing
*/
context_destroyed = 0;
hcoll_destroy_context(module->hcoll_context,
(rte_grp_handle_t)comm,
&context_destroyed);
if (context_destroyed){
module->hcoll_context = NULL;
OBJ_RELEASE(comm);
opal_list_remove_item(am,item);
OBJ_RELEASE(item);
}
}
item = item_next;
}
#endif
(*hcoll_progress_fn)();
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1);
return OMPI_SUCCESS;
}
@ -275,56 +184,75 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
{
mca_coll_base_module_t *module;
mca_coll_hcoll_module_t *hcoll_module;
static bool libhcoll_initialized = false;
ompi_attribute_fn_ptr_union_t del_fn;
ompi_attribute_fn_ptr_union_t copy_fn;
mca_coll_hcoll_component_t *cm;
int err;
int rc;
cm = &mca_coll_hcoll_component;
*priority = 0;
module = NULL;
if (!mca_coll_hcoll_component.hcoll_enable){
if (!cm->hcoll_enable){
goto exit;
}
if (!libhcoll_initialized)
if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < cm->hcoll_np
|| ompi_comm_size(comm) < 2){
goto exit;
}
if (!cm->libhcoll_initialized)
{
/* libhcoll should be initialized here since current implmentation of
mxm bcol in libhcoll needs world_group fully functional during init
world_group, i.e. ompi_comm_world, is not ready at hcoll component open
call */
opal_progress_register(mca_coll_hcoll_progress);
hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag);
HCOL_VERBOSE(10,"Calling hcoll_init();");
rc = hcoll_init();
if (HCOLL_SUCCESS != rc){
mca_coll_hcoll_component.hcoll_enable = 0;
cm->hcoll_enable = 0;
opal_progress_unregister(hcoll_progress_fn);
HCOL_VERBOSE(0,"Hcol library init failed");
HCOL_ERROR("Hcol library init failed");
return NULL;
}
copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN;
del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn;
del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn;
err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL);
if (OMPI_SUCCESS != err) {
HCOL_VERBOSE(0,"Hcol comm keyval create failed");
HCOL_ERROR("Hcol comm keyval create failed");
return NULL;
}
libhcoll_initialized = true;
cm->libhcoll_initialized = true;
}
hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
if (!hcoll_module){
goto exit;
}
if (ompi_comm_size(comm) < 2 || OMPI_COMM_IS_INTER(comm)){
hcoll_module->comm = comm;
HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d",
(void*)comm,comm->c_contextid,ompi_comm_size(comm));
hcoll_module->hcoll_context =
hcoll_create_context((rte_grp_handle_t)comm);
if (NULL == hcoll_module->hcoll_context){
HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
OBJ_RELEASE(hcoll_module);
goto exit;
}
hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
@ -337,7 +265,7 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;
hcoll_module->super.coll_gather = hcoll_collectives.coll_gather ? mca_coll_hcoll_gather : NULL;
*priority = mca_coll_hcoll_component.hcoll_priority;
*priority = cm->hcoll_priority;
module = &hcoll_module->super;
exit:
@ -352,7 +280,3 @@ OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t,
OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_list_item_wrapper_t,
opal_list_item_t,
NULL,NULL);