MCA/COMMON/UCX: added parameter for UCX/opal progress
- added parameter to set UCX/opal progresses - minor refactoring of request wait routines Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Этот коммит содержится в:
родитель
d57ae62dee
Коммит
63e7ba6843
@ -12,6 +12,7 @@
|
||||
#include "ompi/mca/osc/osc.h"
|
||||
#include "ompi/mca/osc/base/base.h"
|
||||
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
|
||||
#include "opal/mca/common/ucx/common_ucx.h"
|
||||
|
||||
#include "osc_ucx.h"
|
||||
#include "osc_ucx_request.h"
|
||||
@ -179,6 +180,7 @@ static int component_init(bool enable_progress_threads, bool enable_mpi_threads)
|
||||
goto error;
|
||||
}
|
||||
|
||||
opal_common_ucx_mca_register();
|
||||
return ret;
|
||||
error:
|
||||
if (progress_registered) opal_progress_unregister(progress_callback);
|
||||
|
@ -53,15 +53,15 @@ lib@OPAL_LIB_PREFIX@mca_common_ucx_la_SOURCES = \
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ucx_la_LDFLAGS = \
|
||||
-version-info $(libmca_opal_common_ucx_so_version) \
|
||||
$(common_ucx_LDFLAGS)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ucx_noinst_la_LDFLAGS = \
|
||||
$(common_ucx_LDFLAGS)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ucx_la_LIBADD = \
|
||||
$(common_ucx_LIBS) \
|
||||
$(OMPI_TOP_BUILDDIR)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ucx_noinst_la_LIBADD = \
|
||||
$(common_ucx_LIBS)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ucx_noinst_la_SOURCES = \
|
||||
$(headers) $(sources)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ucx_noinst_la_LDFLAGS = \
|
||||
$(common_ucx_LDFLAGS)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ucx_noinst_la_LIBADD = \
|
||||
$(common_ucx_LIBS)
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
|
@ -10,25 +10,29 @@
|
||||
#include "opal_config.h"
|
||||
|
||||
#include "common_ucx.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
static void opal_common_ucp_send_cb(void *request, ucs_status_t status)
|
||||
int opal_common_ucx_progress_iterations = 100;
|
||||
|
||||
OPAL_DECLSPEC void opal_common_ucx_mca_register(void)
|
||||
{
|
||||
static int registered = 0;
|
||||
|
||||
if (registered) {
|
||||
/* process once */
|
||||
return;
|
||||
}
|
||||
|
||||
registered = 1;
|
||||
mca_base_var_register("opal", "opal_common", "ucx", "progress_iterations",
|
||||
"Set number of calls of internal UCX progress calls per opal_progress call",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&opal_common_ucx_progress_iterations);
|
||||
}
|
||||
|
||||
ucs_status_t opal_common_ucx_ep_flush(ucp_ep_h ep, ucp_worker_h worker)
|
||||
void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status)
|
||||
{
|
||||
ucs_status_ptr_t status;
|
||||
|
||||
status = ucp_ep_flush_nb(ep, 0, opal_common_ucp_send_cb);
|
||||
return opal_common_ucx_wait_request(status, worker);
|
||||
}
|
||||
|
||||
ucs_status_t opal_common_ucx_worker_flush(ucp_worker_h worker)
|
||||
{
|
||||
ucs_status_ptr_t status;
|
||||
|
||||
status = ucp_worker_flush_nb(worker, 0, opal_common_ucp_send_cb);
|
||||
return opal_common_ucx_wait_request(status, worker);
|
||||
}
|
||||
|
@ -23,8 +23,10 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
OPAL_DECLSPEC ucs_status_t opal_common_ucx_ep_flush(ucp_ep_h ep, ucp_worker_h worker);
|
||||
OPAL_DECLSPEC ucs_status_t opal_common_ucx_worker_flush(ucp_worker_h worker);
|
||||
extern int opal_common_ucx_progress_iterations;
|
||||
|
||||
OPAL_DECLSPEC void opal_common_ucx_mca_register(void);
|
||||
OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status);
|
||||
|
||||
static inline
|
||||
ucs_status_t opal_common_ucx_wait_request(ucs_status_ptr_t request, ucp_worker_h worker)
|
||||
@ -33,28 +35,44 @@ ucs_status_t opal_common_ucx_wait_request(ucs_status_ptr_t request, ucp_worker_h
|
||||
int i;
|
||||
|
||||
/* check for request completed or failed */
|
||||
if (UCS_OK == request) {
|
||||
if (OPAL_LIKELY(UCS_OK == request)) {
|
||||
return UCS_OK;
|
||||
} else if (UCS_PTR_IS_ERR(request)) {
|
||||
} else if (OPAL_UNLIKELY(UCS_PTR_IS_ERR(request))) {
|
||||
return UCS_PTR_STATUS(request);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
/* call UCX progress */
|
||||
for (i = 0; i < 100; i++) {
|
||||
for (i = 0; i < opal_common_ucx_progress_iterations; i++) {
|
||||
if (UCS_INPROGRESS != (status = ucp_request_check_status(request))) {
|
||||
ucp_request_free(request);
|
||||
return status;
|
||||
}
|
||||
ucp_worker_progress(worker);
|
||||
}
|
||||
/* call OPAL progress on every 100 call to UCX progress */
|
||||
/* call OPAL progress on every opal_common_ucx_progress_iterations
|
||||
* calls to UCX progress */
|
||||
opal_progress();
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
ucs_status_t opal_common_ucx_ep_flush(ucp_ep_h ep, ucp_worker_h worker)
|
||||
{
|
||||
ucs_status_ptr_t status;
|
||||
|
||||
status = ucp_ep_flush_nb(ep, 0, opal_common_ucx_empty_complete_cb);
|
||||
return opal_common_ucx_wait_request(status, worker);
|
||||
}
|
||||
|
||||
static inline
|
||||
ucs_status_t opal_common_ucx_worker_flush(ucp_worker_h worker)
|
||||
{
|
||||
ucs_status_ptr_t status;
|
||||
|
||||
status = ucp_worker_flush_nb(worker, 0, opal_common_ucx_empty_complete_cb);
|
||||
return opal_common_ucx_wait_request(status, worker);
|
||||
}
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -61,14 +61,6 @@ struct mca_atomic_ucx_module_t {
|
||||
typedef struct mca_atomic_ucx_module_t mca_atomic_ucx_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_atomic_ucx_module_t);
|
||||
|
||||
|
||||
void mca_atomic_ucx_complete_cb(void *request, ucs_status_t status);
|
||||
|
||||
static inline
|
||||
ucs_status_t mca_atomic_ucx_wait_request(ucs_status_ptr_t request)
|
||||
{
|
||||
return opal_common_ucx_wait_request(request, mca_spml_self->ucp_worker);
|
||||
}
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_ATOMIC_UCX_H */
|
||||
|
@ -41,15 +41,17 @@ int mca_atomic_ucx_cswap_inner(void *target,
|
||||
if (NULL == cond) {
|
||||
status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn,
|
||||
UCP_ATOMIC_FETCH_OP_SWAP, val, prev, nlong,
|
||||
rva, ucx_mkey->rkey, mca_atomic_ucx_complete_cb);
|
||||
status = mca_atomic_ucx_wait_request(status_ptr);
|
||||
rva, ucx_mkey->rkey,
|
||||
opal_common_ucx_empty_complete_cb);
|
||||
status = opal_common_ucx_wait_request(status_ptr, mca_spml_self->ucp_worker);
|
||||
}
|
||||
else {
|
||||
cmp = (4 == nlong) ? *(uint32_t*)cond : *(uint64_t*)cond;
|
||||
status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn,
|
||||
UCP_ATOMIC_FETCH_OP_CSWAP, cmp, &val, nlong,
|
||||
rva, ucx_mkey->rkey, mca_atomic_ucx_complete_cb);
|
||||
status = mca_atomic_ucx_wait_request(status_ptr);
|
||||
rva, ucx_mkey->rkey,
|
||||
opal_common_ucx_empty_complete_cb);
|
||||
status = opal_common_ucx_wait_request(status_ptr, mca_spml_self->ucp_worker);
|
||||
if (UCS_OK == status) {
|
||||
assert(NULL != prev);
|
||||
memcpy(prev, &val, nlong);
|
||||
|
@ -49,8 +49,9 @@ int mca_atomic_ucx_fadd(void *target,
|
||||
else {
|
||||
status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn,
|
||||
UCP_ATOMIC_FETCH_OP_FADD, val, prev, nlong,
|
||||
rva, ucx_mkey->rkey, mca_atomic_ucx_complete_cb);
|
||||
status = mca_atomic_ucx_wait_request(status_ptr);
|
||||
rva, ucx_mkey->rkey,
|
||||
opal_common_ucx_empty_complete_cb);
|
||||
status = opal_common_ucx_wait_request(status_ptr, mca_spml_self->ucp_worker);
|
||||
}
|
||||
|
||||
return ucx_status_to_oshmem(status);
|
||||
|
@ -48,8 +48,3 @@ mca_atomic_ucx_query(int *priority)
|
||||
|
||||
return NULL ;
|
||||
}
|
||||
|
||||
void mca_atomic_ucx_complete_cb(void *request, ucs_status_t status)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -506,6 +506,7 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
|
||||
mkeys[0].va_base = addr;
|
||||
*count = 1;
|
||||
mca_spml_ucx_cache_mkey(&mkeys[0], segno, my_pe);
|
||||
opal_common_ucx_mca_register();
|
||||
return mkeys;
|
||||
|
||||
error_unmap:
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user