HCOLL: many fixes
Adds coll_hcoll_np mca parameter similar to that of fca component (defaults to 32). Those who use hcoll be aware that from now on the communicators less than 32 procs will run w/o hcoll by default. - Resolves fallback issue in case libhcoll runs out of allowed contexts. The solution is moving hcoll_context_create from comm_enable to comm_query. Shortly, comm_enable should never return OMPI_ERROR in the coll component with highest priority (hcoll). Otherwise the ompi coll_base_select will unselect the coll funtion pointers and module references leaving the communicator w/o coll pointer. This will cause the fail. Same behavior can be reproduced even with tuned if one would hardcore some "return OMPI_ERROR" into it's module_enable funtion. - Additionally, removed all the dead code under #if 0; removed unused variables (path for library, active_modules list) and classes (module list wrapper) Fixed by Val, Reviewed by Devendar/Josh/Miked cmr=v1.7.4:reviewer=ompi-rm1.7 This commit was SVN r30341.
Этот коммит содержится в:
родитель
37343574e0
Коммит
b8550a55a7
@ -54,21 +54,22 @@ struct mca_coll_hcoll_component_t {
|
||||
/** MCA parameter: Verbose level of this component */
|
||||
int hcoll_verbose;
|
||||
|
||||
/** MCA parameter: Path to libfca.so */
|
||||
char* hcoll_lib_path;
|
||||
|
||||
/** MCA parameter: Enable FCA */
|
||||
int hcoll_enable;
|
||||
|
||||
/** MCA parameter: Minimal number of processes in the communicator
|
||||
for the corresponding hcoll context to be created */
|
||||
int hcoll_np;
|
||||
|
||||
/** Whether or not hcoll_init was ever called */
|
||||
bool libhcoll_initialized;
|
||||
|
||||
/** MCA parameter: ON/OFF user defined datatype through HCOLL */
|
||||
int hcoll_datatype_fallback;
|
||||
|
||||
/* FCA global stuff */
|
||||
void *hcoll_lib_handle; /* FCA dynamic library */
|
||||
mca_coll_hcoll_ops_t hcoll_ops;
|
||||
ompi_free_list_t requests;
|
||||
opal_list_t active_modules;
|
||||
volatile uint32_t progress_lock;
|
||||
};
|
||||
typedef struct mca_coll_hcoll_component_t mca_coll_hcoll_component_t;
|
||||
|
||||
@ -126,13 +127,6 @@ OBJ_CLASS_DECLARATION(mca_coll_hcoll_module_t);
|
||||
|
||||
|
||||
|
||||
typedef struct mca_coll_hcoll_module_list_item_wrapper_t{
|
||||
opal_list_item_t super;
|
||||
mca_coll_hcoll_module_t *module;
|
||||
} mca_coll_hcoll_module_list_item_wrapper_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_coll_hcoll_module_list_item_wrapper_t);
|
||||
|
||||
|
||||
/* API functions */
|
||||
int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads);
|
||||
|
@ -56,10 +56,9 @@ mca_coll_hcoll_component_t mca_coll_hcoll_component = {
|
||||
mca_coll_hcoll_init_query,
|
||||
mca_coll_hcoll_comm_query,
|
||||
},
|
||||
90,
|
||||
0,
|
||||
"",
|
||||
1
|
||||
90, /* priority */
|
||||
0, /* verbose level */
|
||||
1 /* hcoll_enable */
|
||||
};
|
||||
|
||||
|
||||
@ -74,11 +73,6 @@ int mca_coll_hcoll_get_lib(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_coll_hcoll_close_lib(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* * Local flags
|
||||
* */
|
||||
@ -96,7 +90,6 @@ enum {
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* utility routine for string parameter registration
|
||||
*/
|
||||
@ -131,7 +124,6 @@ static int reg_string(const char* param_name,
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Utility routine for integer parameter registration
|
||||
*/
|
||||
@ -177,42 +169,43 @@ static int hcoll_register(void)
|
||||
|
||||
ret = OMPI_SUCCESS;
|
||||
|
||||
#define CHECK(expr) do { \
|
||||
tmp = (expr); \
|
||||
if (OMPI_SUCCESS != tmp) ret = tmp; \
|
||||
} while (0)
|
||||
|
||||
#define CHECK(expr) do { \
|
||||
tmp = (expr); \
|
||||
if (OMPI_SUCCESS != tmp) ret = tmp; \
|
||||
} while (0)
|
||||
|
||||
|
||||
CHECK(reg_int("priority",NULL,
|
||||
"Priority of the hcol coll component",
|
||||
90,
|
||||
&mca_coll_hcoll_component.hcoll_priority,
|
||||
0));
|
||||
"Priority of the hcol coll component",
|
||||
90,
|
||||
&mca_coll_hcoll_component.hcoll_priority,
|
||||
0));
|
||||
|
||||
CHECK(reg_int("verbose", NULL,
|
||||
"Verbose level of the hcol coll component",
|
||||
0,
|
||||
&mca_coll_hcoll_component.hcoll_verbose,
|
||||
0));
|
||||
"Verbose level of the hcol coll component",
|
||||
0,
|
||||
&mca_coll_hcoll_component.hcoll_verbose,
|
||||
0));
|
||||
|
||||
CHECK(reg_int("enable",NULL,
|
||||
"[1|0|] Enable/Disable HCOL",
|
||||
1 /*enable by default*/,
|
||||
&mca_coll_hcoll_component.hcoll_enable,
|
||||
0));
|
||||
"[1|0|] Enable/Disable HCOL",
|
||||
1 /*enable by default*/,
|
||||
&mca_coll_hcoll_component.hcoll_enable,
|
||||
0));
|
||||
|
||||
CHECK(reg_int("np",NULL,
|
||||
"Minimal number of processes in the communicator"
|
||||
" for the corresponding hcoll context to be created (default: 32)",
|
||||
2 /*enable by default*/,
|
||||
&mca_coll_hcoll_component.hcoll_np,
|
||||
0));
|
||||
|
||||
CHECK(reg_int("datatype_fallback",NULL,
|
||||
"[1|0|] Enable/Disable user defined dattypes fallback",
|
||||
1 /*enable by default*/,
|
||||
&mca_coll_hcoll_component.hcoll_datatype_fallback,
|
||||
0));
|
||||
"[1|0|] Enable/Disable user defined dattypes fallback",
|
||||
1 /*enable by default*/,
|
||||
&mca_coll_hcoll_component.hcoll_datatype_fallback,
|
||||
0));
|
||||
|
||||
CHECK(reg_string("library_path", NULL,
|
||||
"HCOL /path/to/libhcol.so",
|
||||
""COLL_HCOLL_HOME"/libhcol.so",
|
||||
&mca_coll_hcoll_component.hcoll_lib_path,
|
||||
0));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -220,23 +213,25 @@ static int hcoll_register(void)
|
||||
static int hcoll_open(void)
|
||||
{
|
||||
int rc;
|
||||
mca_coll_hcoll_component_t *cm;
|
||||
cm = &mca_coll_hcoll_component;
|
||||
|
||||
mca_coll_hcoll_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_coll_hcoll_output, mca_coll_hcoll_component.hcoll_verbose);
|
||||
opal_output_set_verbosity(mca_coll_hcoll_output, cm->hcoll_verbose);
|
||||
|
||||
hcoll_rte_fns_setup();
|
||||
|
||||
OBJ_CONSTRUCT(&mca_coll_hcoll_component.active_modules, opal_list_t);
|
||||
|
||||
mca_coll_hcoll_component.progress_lock = -1;
|
||||
cm->libhcoll_initialized = false;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int hcoll_close(void)
|
||||
{
|
||||
int rc;
|
||||
mca_coll_hcoll_component_t *cm;
|
||||
cm = &mca_coll_hcoll_component;
|
||||
|
||||
if (false == mca_coll_hcoll_component.hcoll_enable) {
|
||||
if (false == cm->libhcoll_initialized) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -244,8 +239,6 @@ static int hcoll_close(void)
|
||||
rc = hcoll_finalize();
|
||||
|
||||
opal_progress_unregister(mca_coll_hcoll_progress);
|
||||
OBJ_DESTRUCT(&mca_coll_hcoll_component.active_modules);
|
||||
memset(&mca_coll_hcoll_component.active_modules,0,sizeof(mca_coll_hcoll_component.active_modules));
|
||||
if (HCOLL_SUCCESS != rc){
|
||||
HCOL_VERBOSE(1,"Hcol library finalize failed");
|
||||
return OMPI_ERROR;
|
||||
|
@ -18,7 +18,6 @@ int hcoll_comm_attr_keyval;
|
||||
*/
|
||||
int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads)
|
||||
{
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -50,73 +49,45 @@ static void mca_coll_hcoll_module_construct(mca_coll_hcoll_module_t *hcoll_modul
|
||||
|
||||
static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module)
|
||||
{
|
||||
opal_list_item_t *item, *item_next;
|
||||
opal_list_t *am;
|
||||
mca_coll_hcoll_module_t *module;
|
||||
ompi_communicator_t *comm;
|
||||
int context_destroyed;
|
||||
|
||||
am = &mca_coll_hcoll_component.active_modules;
|
||||
|
||||
if (hcoll_module->comm == &ompi_mpi_comm_world.comm){
|
||||
#if 0
|
||||
/* If we get here then we are detroying MPI_COMM_WORLD now. So,
|
||||
* it is safe to destory all the other communicators and corresponding
|
||||
* hcoll contexts that could still be on the "active_modules" list.
|
||||
*/
|
||||
item = opal_list_get_first(am);
|
||||
while (item != opal_list_get_end(am)){
|
||||
item_next = opal_list_get_next(item);
|
||||
module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module;
|
||||
comm = module->comm;
|
||||
context_destroyed = 0;
|
||||
while(!context_destroyed){
|
||||
hcoll_destroy_context(module->hcoll_context,
|
||||
(rte_grp_handle_t)comm,
|
||||
&context_destroyed);
|
||||
}
|
||||
module->hcoll_context = NULL;
|
||||
OBJ_RELEASE(comm);
|
||||
opal_list_remove_item(am,item);
|
||||
OBJ_RELEASE(item);
|
||||
item = item_next;
|
||||
}
|
||||
|
||||
/* Now destory the comm_world hcoll context as well */
|
||||
context_destroyed = 0;
|
||||
while(!context_destroyed){
|
||||
hcoll_destroy_context(hcoll_module->hcoll_context,
|
||||
(rte_grp_handle_t)hcoll_module->comm,
|
||||
&context_destroyed);
|
||||
}
|
||||
#endif
|
||||
if (OMPI_SUCCESS != ompi_attr_free_keyval(COMM_ATTR, &hcoll_comm_attr_keyval, 0)) {
|
||||
HCOL_VERBOSE(1,"hcoll ompi_attr_free_keyval failed");
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_RELEASE(hcoll_module->previous_barrier_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_bcast_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_reduce_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_allreduce_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_allgather_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_allgatherv_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_gather_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_gatherv_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_alltoall_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_alltoallv_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_alltoallw_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_ibarrier_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_ibcast_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_iallreduce_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_iallgather_module);
|
||||
context_destroyed = 0;
|
||||
hcoll_destroy_context(hcoll_module->hcoll_context,
|
||||
(rte_grp_handle_t)hcoll_module->comm,
|
||||
&context_destroyed);
|
||||
assert(context_destroyed);
|
||||
/* If the hcoll_context is null then we are destroying the hcoll_module
|
||||
that didn't initialized fallback colls/modules.
|
||||
Then just clear and return. Otherwise release module pointers and
|
||||
destroy hcoll context*/
|
||||
|
||||
if (hcoll_module->hcoll_context != NULL){
|
||||
OBJ_RELEASE(hcoll_module->previous_barrier_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_bcast_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_reduce_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_allreduce_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_allgather_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_allgatherv_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_gather_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_gatherv_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_alltoall_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_alltoallv_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_alltoallw_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_ibarrier_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_ibcast_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_iallreduce_module);
|
||||
OBJ_RELEASE(hcoll_module->previous_iallgather_module);
|
||||
|
||||
context_destroyed = 0;
|
||||
hcoll_destroy_context(hcoll_module->hcoll_context,
|
||||
(rte_grp_handle_t)hcoll_module->comm,
|
||||
&context_destroyed);
|
||||
assert(context_destroyed);
|
||||
}
|
||||
mca_coll_hcoll_module_clear(hcoll_module);
|
||||
}
|
||||
|
||||
@ -130,9 +101,10 @@ static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module
|
||||
} while(0)
|
||||
|
||||
|
||||
static int __save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module)
|
||||
static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module)
|
||||
{
|
||||
ompi_communicator_t *comm = hcoll_module->comm;
|
||||
ompi_communicator_t *comm;
|
||||
comm = hcoll_module->comm;
|
||||
|
||||
HCOL_SAVE_PREV_COLL_API(barrier);
|
||||
HCOL_SAVE_PREV_COLL_API(bcast);
|
||||
@ -154,13 +126,16 @@ static int __save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** Communicator free callback
|
||||
*/
|
||||
int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extra)
|
||||
{
|
||||
|
||||
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) attr_val;
|
||||
mca_coll_hcoll_module_t *hcoll_module;
|
||||
hcoll_module = (mca_coll_hcoll_module_t*) attr_val;
|
||||
|
||||
hcoll_group_destroy_notify(hcoll_module->hcoll_context);
|
||||
return OMPI_SUCCESS;
|
||||
@ -172,36 +147,14 @@ int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extr
|
||||
static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) module;
|
||||
int ret;
|
||||
hcoll_module->comm = comm;
|
||||
if (OMPI_SUCCESS != __save_coll_handlers(hcoll_module)){
|
||||
HCOL_ERROR("coll_hcol: __save_coll_handlers failed");
|
||||
|
||||
if (OMPI_SUCCESS != mca_coll_hcoll_save_coll_handlers((mca_coll_hcoll_module_t *)module)){
|
||||
HCOL_ERROR("coll_hcol: mca_coll_hcoll_save_coll_handlers failed");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag);
|
||||
|
||||
|
||||
hcoll_module->hcoll_context =
|
||||
hcoll_create_context((rte_grp_handle_t)comm);
|
||||
if (NULL == hcoll_module->hcoll_context){
|
||||
HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (comm != &ompi_mpi_comm_world.comm){
|
||||
mca_coll_hcoll_module_list_item_wrapper_t *mw =
|
||||
OBJ_NEW(mca_coll_hcoll_module_list_item_wrapper_t);
|
||||
mw->module = hcoll_module;
|
||||
OBJ_RETAIN(hcoll_module->comm);
|
||||
opal_list_append(&mca_coll_hcoll_component.active_modules,
|
||||
(opal_list_item_t*)mw);
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, hcoll_comm_attr_keyval, (void *)hcoll_module, false);
|
||||
ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, hcoll_comm_attr_keyval, (void *)module, false);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed");
|
||||
return OMPI_ERROR;
|
||||
@ -212,55 +165,11 @@ static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module,
|
||||
|
||||
int mca_coll_hcoll_progress(void)
|
||||
{
|
||||
opal_list_item_t *item, *item_next;
|
||||
opal_list_t *am;
|
||||
mca_coll_hcoll_module_t *module;
|
||||
ompi_communicator_t *comm;
|
||||
int context_destroyed;
|
||||
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,1);
|
||||
|
||||
am = &mca_coll_hcoll_component.active_modules;
|
||||
|
||||
if (mca_coll_hcoll_component.progress_lock){
|
||||
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1);
|
||||
(*hcoll_progress_fn)();
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
if (ompi_mpi_finalized){
|
||||
hcoll_rte_p2p_disabled_notify();
|
||||
}
|
||||
#if 0
|
||||
item = opal_list_get_first(am);
|
||||
while (item != opal_list_get_end(am)){
|
||||
item_next = opal_list_get_next(item);
|
||||
module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module;
|
||||
comm = module->comm;
|
||||
if (((opal_object_t*)comm)->obj_reference_count == 1){
|
||||
/* Ok, if we are here then nobody owns a communicator pointed with comm except
|
||||
* for coll_hcoll. Hence, it is safe to remove the hcoll context firstly and
|
||||
* call release on the communicator.
|
||||
*
|
||||
* The call to hcoll_destroy_context is not blocking. The last parameter on the return
|
||||
* indicates whether the context has been destroyd (1) or not (0). In the latter
|
||||
* case one should call destroy again after some progressing
|
||||
*/
|
||||
context_destroyed = 0;
|
||||
hcoll_destroy_context(module->hcoll_context,
|
||||
(rte_grp_handle_t)comm,
|
||||
&context_destroyed);
|
||||
if (context_destroyed){
|
||||
module->hcoll_context = NULL;
|
||||
OBJ_RELEASE(comm);
|
||||
opal_list_remove_item(am,item);
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
}
|
||||
item = item_next;
|
||||
}
|
||||
#endif
|
||||
|
||||
(*hcoll_progress_fn)();
|
||||
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -275,56 +184,75 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
{
|
||||
mca_coll_base_module_t *module;
|
||||
mca_coll_hcoll_module_t *hcoll_module;
|
||||
static bool libhcoll_initialized = false;
|
||||
ompi_attribute_fn_ptr_union_t del_fn;
|
||||
ompi_attribute_fn_ptr_union_t copy_fn;
|
||||
mca_coll_hcoll_component_t *cm;
|
||||
int err;
|
||||
int rc;
|
||||
|
||||
cm = &mca_coll_hcoll_component;
|
||||
*priority = 0;
|
||||
module = NULL;
|
||||
|
||||
if (!mca_coll_hcoll_component.hcoll_enable){
|
||||
if (!cm->hcoll_enable){
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (!libhcoll_initialized)
|
||||
if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < cm->hcoll_np
|
||||
|| ompi_comm_size(comm) < 2){
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
if (!cm->libhcoll_initialized)
|
||||
{
|
||||
/* libhcoll should be initialized here since current implmentation of
|
||||
mxm bcol in libhcoll needs world_group fully functional during init
|
||||
world_group, i.e. ompi_comm_world, is not ready at hcoll component open
|
||||
call */
|
||||
opal_progress_register(mca_coll_hcoll_progress);
|
||||
|
||||
hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag);
|
||||
|
||||
HCOL_VERBOSE(10,"Calling hcoll_init();");
|
||||
rc = hcoll_init();
|
||||
|
||||
if (HCOLL_SUCCESS != rc){
|
||||
mca_coll_hcoll_component.hcoll_enable = 0;
|
||||
cm->hcoll_enable = 0;
|
||||
opal_progress_unregister(hcoll_progress_fn);
|
||||
HCOL_VERBOSE(0,"Hcol library init failed");
|
||||
HCOL_ERROR("Hcol library init failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN;
|
||||
del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn;
|
||||
del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn;
|
||||
err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
HCOL_VERBOSE(0,"Hcol comm keyval create failed");
|
||||
HCOL_ERROR("Hcol comm keyval create failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
libhcoll_initialized = true;
|
||||
cm->libhcoll_initialized = true;
|
||||
}
|
||||
hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
|
||||
if (!hcoll_module){
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (ompi_comm_size(comm) < 2 || OMPI_COMM_IS_INTER(comm)){
|
||||
hcoll_module->comm = comm;
|
||||
|
||||
HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d",
|
||||
(void*)comm,comm->c_contextid,ompi_comm_size(comm));
|
||||
|
||||
hcoll_module->hcoll_context =
|
||||
hcoll_create_context((rte_grp_handle_t)comm);
|
||||
|
||||
if (NULL == hcoll_module->hcoll_context){
|
||||
HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
|
||||
OBJ_RELEASE(hcoll_module);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
|
||||
hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
|
||||
hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
|
||||
hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
|
||||
@ -337,7 +265,7 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;
|
||||
hcoll_module->super.coll_gather = hcoll_collectives.coll_gather ? mca_coll_hcoll_gather : NULL;
|
||||
|
||||
*priority = mca_coll_hcoll_component.hcoll_priority;
|
||||
*priority = cm->hcoll_priority;
|
||||
module = &hcoll_module->super;
|
||||
|
||||
exit:
|
||||
@ -352,7 +280,3 @@ OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t,
|
||||
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_list_item_wrapper_t,
|
||||
opal_list_item_t,
|
||||
NULL,NULL);
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user