Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
/**
|
|
|
|
Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
|
|
|
$COPYRIGHT$
|
|
|
|
|
|
|
|
Additional copyrights may follow
|
|
|
|
|
|
|
|
$HEADER$
|
|
|
|
*/
|
2013-12-04 00:21:40 +04:00
|
|
|
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "coll_hcoll.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initial query function that is invoked during MPI_INIT, allowing
|
|
|
|
* this module to indicate what level of thread support it provides.
|
|
|
|
*/
|
|
|
|
int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads)
|
|
|
|
{
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_coll_hcoll_module_clear(mca_coll_hcoll_module_t *hcoll_module)
|
|
|
|
{
|
|
|
|
hcoll_module->hcoll_context = NULL;
|
|
|
|
hcoll_module->previous_barrier = NULL;
|
|
|
|
hcoll_module->previous_bcast = NULL;
|
|
|
|
hcoll_module->previous_reduce = NULL;
|
|
|
|
hcoll_module->previous_allreduce = NULL;
|
|
|
|
hcoll_module->previous_allgather = NULL;
|
|
|
|
hcoll_module->previous_allgatherv = NULL;
|
|
|
|
hcoll_module->previous_gather = NULL;
|
|
|
|
hcoll_module->previous_gatherv = NULL;
|
|
|
|
hcoll_module->previous_alltoall = NULL;
|
|
|
|
hcoll_module->previous_alltoallv = NULL;
|
|
|
|
hcoll_module->previous_alltoallw = NULL;
|
|
|
|
hcoll_module->previous_reduce_scatter = NULL;
|
2013-09-25 20:14:59 +04:00
|
|
|
hcoll_module->previous_ibarrier = NULL;
|
|
|
|
hcoll_module->previous_ibcast = NULL;
|
|
|
|
hcoll_module->previous_iallreduce = NULL;
|
|
|
|
hcoll_module->previous_iallgather = NULL;
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_coll_hcoll_module_construct(mca_coll_hcoll_module_t *hcoll_module)
|
|
|
|
{
|
|
|
|
mca_coll_hcoll_module_clear(hcoll_module);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module)
|
|
|
|
{
|
2013-12-23 10:57:12 +04:00
|
|
|
opal_list_item_t *item, *item_next;
|
|
|
|
opal_list_t *am;
|
|
|
|
mca_coll_hcoll_module_t *module;
|
|
|
|
ompi_communicator_t *comm;
|
|
|
|
int context_destroyed;
|
|
|
|
|
|
|
|
am = &mca_coll_hcoll_component.active_modules;
|
|
|
|
|
|
|
|
if (hcoll_module->comm == &ompi_mpi_comm_world.comm){
|
|
|
|
/* If we get here then we are detroying MPI_COMM_WORLD now. So,
|
|
|
|
* it is safe to destory all the other communicators and corresponding
|
|
|
|
* hcoll contexts that could still be on the "active_modules" list.
|
|
|
|
*/
|
|
|
|
item = opal_list_get_first(am);
|
|
|
|
while (item != opal_list_get_end(am)){
|
|
|
|
item_next = opal_list_get_next(item);
|
|
|
|
module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module;
|
|
|
|
comm = module->comm;
|
|
|
|
context_destroyed = 0;
|
|
|
|
while(!context_destroyed){
|
|
|
|
hcoll_destroy_context(module->hcoll_context,
|
|
|
|
(rte_grp_handle_t)comm,
|
|
|
|
&context_destroyed);
|
|
|
|
}
|
|
|
|
module->hcoll_context = NULL;
|
|
|
|
OBJ_RELEASE(comm);
|
|
|
|
opal_list_remove_item(am,item);
|
|
|
|
OBJ_RELEASE(item);
|
|
|
|
item = item_next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now destory the comm_world hcoll context as well */
|
|
|
|
context_destroyed = 0;
|
|
|
|
while(!context_destroyed){
|
|
|
|
hcoll_destroy_context(hcoll_module->hcoll_context,
|
|
|
|
(rte_grp_handle_t)hcoll_module->comm,
|
|
|
|
&context_destroyed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
OBJ_RELEASE(hcoll_module->previous_barrier_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_bcast_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_reduce_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_allreduce_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_allgather_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_allgatherv_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_gather_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_gatherv_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_alltoall_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_alltoallv_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_alltoallw_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module);
|
2013-09-25 20:14:59 +04:00
|
|
|
OBJ_RELEASE(hcoll_module->previous_ibarrier_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_ibcast_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_iallreduce_module);
|
|
|
|
OBJ_RELEASE(hcoll_module->previous_iallgather_module);
|
2013-12-23 10:57:12 +04:00
|
|
|
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
mca_coll_hcoll_module_clear(hcoll_module);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define HCOL_SAVE_PREV_COLL_API(__api) do {\
|
|
|
|
hcoll_module->previous_ ## __api = comm->c_coll.coll_ ## __api;\
|
|
|
|
hcoll_module->previous_ ## __api ## _module = comm->c_coll.coll_ ## __api ## _module;\
|
|
|
|
if (!comm->c_coll.coll_ ## __api || !comm->c_coll.coll_ ## __api ## _module) {\
|
|
|
|
return OMPI_ERROR;\
|
|
|
|
}\
|
|
|
|
OBJ_RETAIN(hcoll_module->previous_ ## __api ## _module);\
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
|
|
|
static int __save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module)
|
|
|
|
{
|
|
|
|
ompi_communicator_t *comm = hcoll_module->comm;
|
|
|
|
|
|
|
|
HCOL_SAVE_PREV_COLL_API(barrier);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(bcast);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(reduce);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(allreduce);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(allgather);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(allgatherv);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(gather);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(gatherv);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(alltoall);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(alltoallv);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(alltoallw);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(reduce_scatter);
|
2013-09-25 20:14:59 +04:00
|
|
|
HCOL_SAVE_PREV_COLL_API(ibarrier);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(ibcast);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(iallreduce);
|
|
|
|
HCOL_SAVE_PREV_COLL_API(iallgather);
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize module on the communicator
|
|
|
|
*/
|
|
|
|
static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module,
|
2013-10-02 17:38:47 +04:00
|
|
|
struct ompi_communicator_t *comm)
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
{
|
|
|
|
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) module;
|
|
|
|
hcoll_module->comm = comm;
|
|
|
|
if (OMPI_SUCCESS != __save_coll_handlers(hcoll_module)){
|
|
|
|
HCOL_ERROR("coll_hcol: __save_coll_handlers failed");
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
hcoll_set_runtime_tag_offset(-100,mca_pml.pml_max_tag);
|
2013-12-04 00:21:40 +04:00
|
|
|
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
|
2013-09-18 16:22:02 +04:00
|
|
|
hcoll_module->hcoll_context =
|
2013-10-02 17:38:47 +04:00
|
|
|
hcoll_create_context((rte_grp_handle_t)comm);
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
if (NULL == hcoll_module->hcoll_context){
|
|
|
|
HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
2013-12-04 00:21:40 +04:00
|
|
|
if (comm != &ompi_mpi_comm_world.comm){
|
|
|
|
mca_coll_hcoll_module_list_item_wrapper_t *mw =
|
|
|
|
OBJ_NEW(mca_coll_hcoll_module_list_item_wrapper_t);
|
|
|
|
mw->module = hcoll_module;
|
|
|
|
OBJ_RETAIN(hcoll_module->comm);
|
|
|
|
opal_list_append(&mca_coll_hcoll_component.active_modules,
|
|
|
|
(opal_list_item_t*)mw);
|
|
|
|
}
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2013-12-04 00:21:40 +04:00
|
|
|
int mca_coll_hcoll_progress(void)
|
|
|
|
{
|
|
|
|
opal_list_item_t *item, *item_next;
|
2013-12-23 10:57:12 +04:00
|
|
|
opal_list_t *am;
|
2013-12-04 00:21:40 +04:00
|
|
|
mca_coll_hcoll_module_t *module;
|
|
|
|
ompi_communicator_t *comm;
|
2013-12-23 10:57:12 +04:00
|
|
|
int context_destroyed;
|
2013-12-04 00:21:40 +04:00
|
|
|
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,1);
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
|
2013-12-23 10:57:12 +04:00
|
|
|
am = &mca_coll_hcoll_component.active_modules;
|
|
|
|
|
2013-12-04 00:21:40 +04:00
|
|
|
if (mca_coll_hcoll_component.progress_lock){
|
|
|
|
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1);
|
|
|
|
(*hcoll_progress_fn)();
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
if (ompi_mpi_finalized){
|
|
|
|
hcoll_rte_p2p_disabled_notify();
|
2013-12-23 10:57:12 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
item = opal_list_get_first(am);
|
|
|
|
while (item != opal_list_get_end(am)){
|
|
|
|
item_next = opal_list_get_next(item);
|
|
|
|
module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module;
|
|
|
|
comm = module->comm;
|
|
|
|
if (((opal_object_t*)comm)->obj_reference_count == 1){
|
|
|
|
/* Ok, if we are here then nobody owns a communicator pointed with comm except
|
|
|
|
* for coll_hcoll. Hence, it is safe to remove the hcoll context firstly and
|
|
|
|
* call release on the communicator.
|
|
|
|
*
|
|
|
|
* The call to hcoll_destroy_context is not blocking. The last parameter on the return
|
|
|
|
* indicates whether the context has been destroyd (1) or not (0). In the latter
|
|
|
|
* case one should call destroy again after some progressing
|
|
|
|
*/
|
|
|
|
context_destroyed = 0;
|
|
|
|
hcoll_destroy_context(module->hcoll_context,
|
|
|
|
(rte_grp_handle_t)comm,
|
|
|
|
&context_destroyed);
|
|
|
|
if (context_destroyed){
|
2013-12-04 00:21:40 +04:00
|
|
|
module->hcoll_context = NULL;
|
|
|
|
OBJ_RELEASE(comm);
|
|
|
|
opal_list_remove_item(am,item);
|
|
|
|
OBJ_RELEASE(item);
|
|
|
|
}
|
|
|
|
}
|
2013-12-23 10:57:12 +04:00
|
|
|
item = item_next;
|
2013-12-04 00:21:40 +04:00
|
|
|
}
|
2013-12-23 10:57:12 +04:00
|
|
|
|
2014-01-08 14:55:25 +04:00
|
|
|
if (!ompi_mpi_finalized) {
|
|
|
|
(*hcoll_progress_fn)();
|
|
|
|
}
|
2013-12-04 00:21:40 +04:00
|
|
|
OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Invoked when there's a new communicator that has been created.
|
|
|
|
* Look at the communicator and decide which set of functions and
|
|
|
|
* priority we want to return.
|
|
|
|
*/
|
|
|
|
mca_coll_base_module_t *
|
|
|
|
mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|
|
|
{
|
|
|
|
mca_coll_base_module_t *module;
|
|
|
|
mca_coll_hcoll_module_t *hcoll_module;
|
2013-09-18 16:22:02 +04:00
|
|
|
static bool libhcoll_initialized = false;
|
2013-12-23 10:57:12 +04:00
|
|
|
int rc;
|
|
|
|
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
*priority = 0;
|
|
|
|
module = NULL;
|
|
|
|
|
|
|
|
if (!mca_coll_hcoll_component.hcoll_enable){
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
2013-09-18 16:22:02 +04:00
|
|
|
if (!libhcoll_initialized)
|
|
|
|
{
|
|
|
|
/* libhcoll should be initialized here since current implmentation of
|
|
|
|
mxm bcol in libhcoll needs world_group fully functional during init
|
|
|
|
world_group, i.e. ompi_comm_world, is not ready at hcoll component open
|
|
|
|
call */
|
2013-12-04 00:21:40 +04:00
|
|
|
opal_progress_register(mca_coll_hcoll_progress);
|
2013-12-23 10:57:12 +04:00
|
|
|
rc = hcoll_init();
|
2013-09-18 16:22:02 +04:00
|
|
|
|
|
|
|
if (HCOLL_SUCCESS != rc){
|
|
|
|
mca_coll_hcoll_component.hcoll_enable = 0;
|
|
|
|
opal_progress_unregister(hcoll_progress_fn);
|
|
|
|
HCOL_VERBOSE(0,"Hcol library init failed");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
libhcoll_initialized = true;
|
|
|
|
}
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
|
|
|
|
if (!hcoll_module){
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ompi_comm_size(comm) < 2 || OMPI_COMM_IS_INTER(comm)){
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
|
|
|
|
hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
|
|
|
|
hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
|
|
|
|
hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL;
|
|
|
|
hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL;
|
|
|
|
hcoll_module->super.coll_alltoall = /*hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : */ NULL;
|
2013-09-25 20:14:59 +04:00
|
|
|
hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL;
|
|
|
|
hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL;
|
|
|
|
hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL;
|
|
|
|
hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;
|
2013-12-04 00:21:40 +04:00
|
|
|
hcoll_module->super.coll_gather = hcoll_collectives.coll_gather ? mca_coll_hcoll_gather : NULL;
|
Per the details and schedule outlined in the attached RFC, Mellanox Technologies would like to CMR the new 'coll/hcoll' component. This component enables Mellanox Technologies' latest HPC middleware offering - 'Hcoll'. 'Hcoll' is a high-performance, standalone collectives library with support for truly asynchronous, non-blocking, hierarchical collectives via hardware offload on supporting Mellanox HCAs (ConnectX-3 and above.) To build the component, libhcoll must first be installed on your system, then you must configure OMPI with the configure flag: '--with-hcoll=/path/to/libhcoll'. Subsequent to installing, you may select the 'coll/hcoll' component at runtime as you would any other coll component, e.g. '-mca coll hcoll,tuned,libnbc'. This has been reviewed by Josh Ladd and should be added to cmr:v1.7:reviewer=jladd
This commit was SVN r28694.
2013-07-02 02:39:43 +04:00
|
|
|
|
|
|
|
*priority = mca_coll_hcoll_component.hcoll_priority;
|
|
|
|
module = &hcoll_module->super;
|
|
|
|
|
|
|
|
exit:
|
|
|
|
return module;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t,
|
|
|
|
mca_coll_base_module_t,
|
|
|
|
mca_coll_hcoll_module_construct,
|
|
|
|
mca_coll_hcoll_module_destruct);
|
|
|
|
|
|
|
|
|
|
|
|
|
2013-12-04 00:21:40 +04:00
|
|
|
OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_list_item_wrapper_t,
|
|
|
|
opal_list_item_t,
|
|
|
|
NULL,NULL);
|
|
|
|
|