1
1

Merge pull request #7824 from hoopoepg/topic/ucx-test-external-events-v4.1

COMMON/UCX: improved missing events test - v4.1
Этот коммит содержится в:
Brian Barrett 2020-06-25 08:10:54 -07:00 коммит произвёл GitHub
родитель a3258afad9 d52b64c488
Коммит 173142bf32
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 31 добавлений и 8 удалений

Просмотреть файл

@ -112,7 +112,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
ucp_request_check_status, ucp_put_nb, ucp_get_nb], ucp_request_check_status, ucp_put_nb, ucp_get_nb],
[], [], [], [],
[#include <ucp/api/ucp.h>]) [#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucm_test_events], AC_CHECK_DECLS([ucm_test_events,
ucm_test_external_events],
[], [], [], [],
[#include <ucm/api/ucm.h>]) [#include <ucm/api/ucm.h>])
AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND, AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND,

Просмотреть файл

@ -132,24 +132,46 @@ static void opal_common_ucx_mca_fence_complete_cb(int status, void *fenced)
*(int*)fenced = 1; *(int*)fenced = 1;
} }
void opal_common_ucx_mca_proc_added(void)
{
#if HAVE_DECL_UCM_TEST_EVENTS #if HAVE_DECL_UCM_TEST_EVENTS
static ucs_status_t opal_common_ucx_mca_test_external_events(int events)
{
#if HAVE_DECL_UCM_TEST_EXTERNAL_EVENTS
return ucm_test_external_events(UCM_EVENT_VM_UNMAPPED);
#else
return ucm_test_events(UCM_EVENT_VM_UNMAPPED);
#endif
}
static void opal_common_ucx_mca_test_events(void)
{
static int warned = 0; static int warned = 0;
static char *mem_hooks_suggestion = "Pls try adding --mca opal_common_ucx_opal_mem_hooks 1 " const char *suggestion;
"to mpirun/oshrun command line to resolve this issue.";
ucs_status_t status; ucs_status_t status;
if (!warned) { if (!warned) {
status = ucm_test_events(UCM_EVENT_VM_UNMAPPED); if (opal_common_ucx.opal_mem_hooks) {
suggestion = "Please check OPAL memory events infrastructure.";
status = opal_common_ucx_mca_test_external_events(UCM_EVENT_VM_UNMAPPED);
} else {
suggestion = "Pls try adding --mca opal_common_ucx_opal_mem_hooks 1 "
"to mpirun/oshrun command line to resolve this issue.";
status = ucm_test_events(UCM_EVENT_VM_UNMAPPED);
}
if (status != UCS_OK) { if (status != UCS_OK) {
MCA_COMMON_UCX_WARN("UCX is unable to handle VM_UNMAP event. " MCA_COMMON_UCX_WARN("UCX is unable to handle VM_UNMAP event. "
"This may cause performance degradation or data " "This may cause performance degradation or data "
"corruption. %s", "corruption. %s", suggestion);
opal_common_ucx.opal_mem_hooks ? "" : mem_hooks_suggestion);
warned = 1; warned = 1;
} }
} }
}
#endif
void opal_common_ucx_mca_proc_added(void)
{
#if HAVE_DECL_UCM_TEST_EVENTS
opal_common_ucx_mca_test_events();
#endif #endif
} }