HCOLL: register memory release hook to avoid races
fixed by Devender, reviewed by Miked cmr=v1.8.2:reviewer=ompi-rm1.8 This commit was SVN r31809.
Этот коммит содержится в:
родитель
747ad77a97
Коммит
cadc1485ff
@ -14,6 +14,7 @@
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/memoryhooks/memory.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
@ -29,6 +30,9 @@
|
||||
|
||||
|
||||
#include "coll_hcoll_debug.h"
|
||||
#ifndef HCOLL_VERSION
|
||||
#define HCOLL_VERSION(major, minor) (((major)<<HCOLL_MAJOR_BIT)|((minor)<<HCOLL_MINOR_BIT))
|
||||
#endif
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
||||
@ -64,11 +68,13 @@ struct mca_coll_hcoll_component_t {
|
||||
/** Whether or not hcoll_init was ever called */
|
||||
bool libhcoll_initialized;
|
||||
|
||||
bool using_mem_hooks;
|
||||
|
||||
/** MCA parameter: ON/OFF user defined datatype through HCOLL */
|
||||
int hcoll_datatype_fallback;
|
||||
|
||||
/** r/o MCA parameter: libhcoll runtime version */
|
||||
char* runtime_version;
|
||||
const char* runtime_version;
|
||||
|
||||
/** r/o MCA parameter: libhcoll compiletime version */
|
||||
char* compiletime_version;
|
||||
@ -203,6 +209,7 @@ int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count,
|
||||
ompi_request_t** request,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hcoll_progress(void);
|
||||
void mca_coll_hcoll_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc);
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -241,6 +241,20 @@ static int hcoll_open(void)
|
||||
hcoll_rte_fns_setup();
|
||||
|
||||
cm->libhcoll_initialized = false;
|
||||
|
||||
/* Register memory hooks */
|
||||
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
|
||||
opal_mem_hooks_support_level()))
|
||||
{
|
||||
setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0);
|
||||
HCOL_VERBOSE(1, "Enabling on-demand memory mapping");
|
||||
cm->using_mem_hooks = 1;
|
||||
} else {
|
||||
HCOL_VERBOSE(1, "Disabling on-demand memory mapping");
|
||||
cm->using_mem_hooks = 0;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -254,6 +268,10 @@ static int hcoll_close(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (cm->using_mem_hooks) {
|
||||
opal_mem_hooks_unregister_release(mca_coll_hcoll_mem_release_cb);
|
||||
}
|
||||
|
||||
HCOL_VERBOSE(5,"HCOLL FINALIZE");
|
||||
rc = hcoll_finalize();
|
||||
|
||||
|
@ -47,6 +47,14 @@ static void mca_coll_hcoll_module_construct(mca_coll_hcoll_module_t *hcoll_modul
|
||||
mca_coll_hcoll_module_clear(hcoll_module);
|
||||
}
|
||||
|
||||
void mca_coll_hcoll_mem_release_cb(void *buf, size_t length,
|
||||
void *cbdata, bool from_alloc)
|
||||
{
|
||||
#if HCOLL_API > HCOLL_VERSION(3,0)
|
||||
hcoll_mem_unmap(buf, length, cbdata, from_alloc);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module)
|
||||
{
|
||||
mca_coll_hcoll_module_t *module;
|
||||
@ -225,6 +233,12 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
HCOL_VERBOSE(10,"Calling hcoll_init();");
|
||||
rc = hcoll_init();
|
||||
|
||||
if (cm->using_mem_hooks && hcoll_check_mem_release_cb_needed()) {
|
||||
opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL);
|
||||
} else {
|
||||
cm->using_mem_hooks = 0;
|
||||
}
|
||||
|
||||
if (HCOLL_SUCCESS != rc){
|
||||
cm->hcoll_enable = 0;
|
||||
opal_progress_unregister(hcoll_progress_fn);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user