1
1

Merge pull request #6605 from brminich/topic/shmem_all2all_put

SPML/UCX: Add shmemx_alltoall_global_nb routine to shmemx.h
Этот коммит содержится в:
Yossi Itigin 2019-05-01 12:00:21 +03:00 коммит произвёл GitHub
родитель 399b7133ab d4843b1651
Коммит 5d2200a7d6
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 350 добавлений и 36 удалений

Просмотреть файл

@ -20,8 +20,6 @@
#include "osc_ucx.h" #include "osc_ucx.h"
#include "osc_ucx_request.h" #include "osc_ucx_request.h"
#define UCX_VERSION(_major, _minor, _build) (((_major) * 100) + (_minor))
#define memcpy_off(_dst, _src, _len, _off) \ #define memcpy_off(_dst, _src, _len, _off) \
memcpy(((char*)(_dst)) + (_off), _src, _len); \ memcpy(((char*)(_dst)) + (_off), _src, _len); \
(_off) += (_len); (_off) += (_len);

Просмотреть файл

@ -39,6 +39,9 @@ BEGIN_C_DECLS
#define MCA_COMMON_UCX_PER_TARGET_OPS_THRESHOLD 1000 #define MCA_COMMON_UCX_PER_TARGET_OPS_THRESHOLD 1000
#define MCA_COMMON_UCX_GLOBAL_OPS_THRESHOLD 1000 #define MCA_COMMON_UCX_GLOBAL_OPS_THRESHOLD 1000
#define UCX_VERSION(_major, _minor, _build) (((_major) * 100) + (_minor))
#define _MCA_COMMON_UCX_QUOTE(_x) \ #define _MCA_COMMON_UCX_QUOTE(_x) \
# _x # _x
#define MCA_COMMON_UCX_QUOTE(_x) \ #define MCA_COMMON_UCX_QUOTE(_x) \

Просмотреть файл

@ -168,6 +168,21 @@ OSHMEM_DECLSPEC void shmemx_int16_prod_to_all(int16_t *target, const int16_t *so
OSHMEM_DECLSPEC void shmemx_int32_prod_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync); OSHMEM_DECLSPEC void shmemx_int32_prod_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
OSHMEM_DECLSPEC void shmemx_int64_prod_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync); OSHMEM_DECLSPEC void shmemx_int64_prod_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
/* shmemx_alltoall_global_nb is a nonblocking collective routine, where each PE
* exchanges size bytes of data with all other PEs in the OpenSHMEM job.
* @param dest A symmetric data object that is large enough to receive
* size bytes of data from each PE in the OpenSHMEM job.
* @param source A symmetric data object that contains size bytes of data
* for each PE in the OpenSHMEM job.
* @param size The number of bytes to be sent to each PE in the job.
* @param counter A symmetric data object to be atomically incremented after
* the target buffer is updated.
*
* @return OSHMEM_SUCCESS or failure status.
*/
OSHMEM_DECLSPEC void shmemx_alltoall_global_nb(void *dest, const void *source, size_t size, long *counter);
/* /*
* Backward compatibility section * Backward compatibility section
*/ */

Просмотреть файл

@ -93,6 +93,10 @@ OSHMEM_DECLSPEC int mca_spml_base_get_nb(void *dst_addr,
void **handle); void **handle);
OSHMEM_DECLSPEC void mca_spml_base_memuse_hook(void *addr, size_t length); OSHMEM_DECLSPEC void mca_spml_base_memuse_hook(void *addr, size_t length);
OSHMEM_DECLSPEC int mca_spml_base_put_all_nb(void *target, const void *source,
size_t size, long *counter);
/* /*
* MCA framework * MCA framework
*/ */

Просмотреть файл

@ -280,3 +280,9 @@ int mca_spml_base_get_nb(void *dst_addr, size_t size,
void mca_spml_base_memuse_hook(void *addr, size_t length) void mca_spml_base_memuse_hook(void *addr, size_t length)
{ {
} }
int mca_spml_base_put_all_nb(void *target, const void *source,
size_t size, long *counter)
{
return OSHMEM_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -179,6 +179,7 @@ mca_spml_ikrit_t mca_spml_ikrit = {
mca_spml_base_rmkey_free, mca_spml_base_rmkey_free,
mca_spml_base_rmkey_ptr, mca_spml_base_rmkey_ptr,
mca_spml_base_memuse_hook, mca_spml_base_memuse_hook,
mca_spml_base_put_all_nb,
(void*)&mca_spml_ikrit (void*)&mca_spml_ikrit
}, },

Просмотреть файл

@ -314,6 +314,35 @@ typedef int (*mca_spml_base_module_send_fn_t)(void *buf,
int dst, int dst,
mca_spml_base_put_mode_t mode); mca_spml_base_put_mode_t mode);
/**
* The routine transfers the data asynchronously from the source PE to all
* PEs in the OpenSHMEM job. The routine returns immediately. The source and
* target buffers are reusable only after the completion of the routine.
* After the data is transferred to the target buffers, the counter object
* is updated atomically. The counter object can be read either using atomic
* operations such as shmem_atomic_fetch or can use point-to-point synchronization
* routines such as shmem_wait_until and shmem_test.
*
* Shmem_quiet may be used for completing the operation, but not required for
* progress or completion. In a multithreaded OpenSHMEM program, the user
* (the OpenSHMEM program) should ensure the correct ordering of
* shmemx_alltoall_global calls.
*
* @param dest A symmetric data object that is large enough to receive
* size bytes of data from each PE in the OpenSHMEM job.
* @param source A symmetric data object that contains size bytes of data
* for each PE in the OpenSHMEM job.
* @param size The number of bytes to be sent to each PE in the job.
* @param counter A symmetric data object to be atomically incremented after
* the target buffer is updated.
*
* @return OSHMEM_SUCCESS or failure status.
*/
typedef int (*mca_spml_base_module_put_all_nb_fn_t)(void *dest,
const void *source,
size_t size,
long *counter);
/** /**
* Assures ordering of delivery of put() requests * Assures ordering of delivery of put() requests
* *
@ -381,6 +410,7 @@ struct mca_spml_base_module_1_0_0_t {
mca_spml_base_module_mkey_ptr_fn_t spml_rmkey_ptr; mca_spml_base_module_mkey_ptr_fn_t spml_rmkey_ptr;
mca_spml_base_module_memuse_hook_fn_t spml_memuse_hook; mca_spml_base_module_memuse_hook_fn_t spml_memuse_hook;
mca_spml_base_module_put_all_nb_fn_t spml_put_all_nb;
void *self; void *self;
}; };

Просмотреть файл

@ -32,6 +32,7 @@
#include "oshmem/proc/proc.h" #include "oshmem/proc/proc.h"
#include "oshmem/mca/spml/base/base.h" #include "oshmem/mca/spml/base/base.h"
#include "oshmem/mca/spml/base/spml_base_putreq.h" #include "oshmem/mca/spml/base/spml_base_putreq.h"
#include "oshmem/mca/atomic/atomic.h"
#include "oshmem/runtime/runtime.h" #include "oshmem/runtime/runtime.h"
#include "oshmem/mca/spml/ucx/spml_ucx_component.h" #include "oshmem/mca/spml/ucx/spml_ucx_component.h"
@ -67,6 +68,7 @@ mca_spml_ucx_t mca_spml_ucx = {
.spml_rmkey_free = mca_spml_ucx_rmkey_free, .spml_rmkey_free = mca_spml_ucx_rmkey_free,
.spml_rmkey_ptr = mca_spml_ucx_rmkey_ptr, .spml_rmkey_ptr = mca_spml_ucx_rmkey_ptr,
.spml_memuse_hook = mca_spml_ucx_memuse_hook, .spml_memuse_hook = mca_spml_ucx_memuse_hook,
.spml_put_all_nb = mca_spml_ucx_put_all_nb,
.self = (void*)&mca_spml_ucx .self = (void*)&mca_spml_ucx
}, },
@ -439,8 +441,8 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
ucx_mkey->mem_h = (ucp_mem_h)mem_seg->context; ucx_mkey->mem_h = (ucp_mem_h)mem_seg->context;
} }
status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h,
&mkeys[0].u.data, &len); &mkeys[0].u.data, &len);
if (UCS_OK != status) { if (UCS_OK != status) {
goto error_unmap; goto error_unmap;
} }
@ -477,8 +479,6 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
{ {
spml_ucx_mkey_t *ucx_mkey; spml_ucx_mkey_t *ucx_mkey;
map_segment_t *mem_seg; map_segment_t *mem_seg;
int segno;
int my_pe = oshmem_my_proc_id();
MCA_SPML_CALL(quiet(oshmem_ctx_default)); MCA_SPML_CALL(quiet(oshmem_ctx_default));
if (!mkeys) if (!mkeys)
@ -493,7 +493,7 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
if (OPAL_UNLIKELY(NULL == mem_seg)) { if (OPAL_UNLIKELY(NULL == mem_seg)) {
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) { if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) {
ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h); ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h);
} }
@ -545,17 +545,15 @@ static inline void _ctx_remove(mca_spml_ucx_ctx_array_t *array, mca_spml_ucx_ctx
opal_atomic_wmb (); opal_atomic_wmb ();
} }
int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx_ctx_p)
{ {
mca_spml_ucx_ctx_t *ucx_ctx;
ucp_worker_params_t params; ucp_worker_params_t params;
ucp_ep_params_t ep_params; ucp_ep_params_t ep_params;
size_t i, j, nprocs = oshmem_num_procs(); size_t i, j, nprocs = oshmem_num_procs();
ucs_status_t err; ucs_status_t err;
int my_pe = oshmem_my_proc_id();
size_t len;
spml_ucx_mkey_t *ucx_mkey; spml_ucx_mkey_t *ucx_mkey;
sshmem_mkey_t *mkey; sshmem_mkey_t *mkey;
mca_spml_ucx_ctx_t *ucx_ctx;
int rc = OSHMEM_ERROR; int rc = OSHMEM_ERROR;
ucx_ctx = malloc(sizeof(mca_spml_ucx_ctx_t)); ucx_ctx = malloc(sizeof(mca_spml_ucx_ctx_t));
@ -580,10 +578,6 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx)
goto error; goto error;
} }
if (mca_spml_ucx.active_array.ctxs_count == 0) {
opal_progress_register(spml_ucx_ctx_progress);
}
for (i = 0; i < nprocs; i++) { for (i = 0; i < nprocs; i++) {
ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]); ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]);
@ -609,11 +603,8 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx)
} }
} }
SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); *ucx_ctx_p = ucx_ctx;
_ctx_add(&mca_spml_ucx.active_array, ucx_ctx);
SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex);
(*ctx) = (shmem_ctx_t)ucx_ctx;
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
error2: error2:
@ -634,6 +625,33 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx)
return rc; return rc;
} }
int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx)
{
mca_spml_ucx_ctx_t *ucx_ctx;
int rc;
/* Take a lock controlling context creation. AUX context may set specific
* UCX parameters affecting worker creation, which are not needed for
* regular contexts. */
pthread_mutex_lock(&mca_spml_ucx.ctx_create_mutex);
rc = mca_spml_ucx_ctx_create_common(options, &ucx_ctx);
pthread_mutex_unlock(&mca_spml_ucx.ctx_create_mutex);
if (rc != OSHMEM_SUCCESS) {
return rc;
}
if (mca_spml_ucx.active_array.ctxs_count == 0) {
opal_progress_register(spml_ucx_ctx_progress);
}
SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex);
_ctx_add(&mca_spml_ucx.active_array, ucx_ctx);
SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex);
(*ctx) = (shmem_ctx_t)ucx_ctx;
return OSHMEM_SUCCESS;
}
void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx) void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx)
{ {
MCA_SPML_CALL(quiet(ctx)); MCA_SPML_CALL(quiet(ctx));
@ -748,6 +766,15 @@ int mca_spml_ucx_quiet(shmem_ctx_t ctx)
oshmem_shmem_abort(-1); oshmem_shmem_abort(-1);
return ret; return ret;
} }
/* If put_all_nb op/s is/are being executed asynchronously, need to wait its
* completion as well. */
if (ctx == oshmem_ctx_default) {
while (mca_spml_ucx.aux_refcnt) {
opal_progress();
}
}
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }
@ -785,3 +812,99 @@ int mca_spml_ucx_send(void* buf,
return rc; return rc;
} }
/* this can be called with request==NULL in case of immediate completion */
static void mca_spml_ucx_put_all_complete_cb(void *request, ucs_status_t status)
{
if (mca_spml_ucx.async_progress && (--mca_spml_ucx.aux_refcnt == 0)) {
opal_event_evtimer_del(mca_spml_ucx.tick_event);
opal_progress_unregister(spml_ucx_progress_aux_ctx);
}
if (request != NULL) {
ucp_request_free(request);
}
}
/* Should be called with AUX lock taken */
static int mca_spml_ucx_create_aux_ctx(void)
{
unsigned major = 0;
unsigned minor = 0;
unsigned rel_number = 0;
int rc;
bool rand_dci_supp;
ucp_get_version(&major, &minor, &rel_number);
rand_dci_supp = UCX_VERSION(major, minor, rel_number) >= UCX_VERSION(1, 6, 0);
if (rand_dci_supp) {
pthread_mutex_lock(&mca_spml_ucx.ctx_create_mutex);
opal_setenv("UCX_DC_MLX5_TX_POLICY", "rand", 0, &environ);
}
rc = mca_spml_ucx_ctx_create_common(SHMEM_CTX_PRIVATE, &mca_spml_ucx.aux_ctx);
if (rand_dci_supp) {
opal_unsetenv("UCX_DC_MLX5_TX_POLICY", &environ);
pthread_mutex_unlock(&mca_spml_ucx.ctx_create_mutex);
}
return rc;
}
int mca_spml_ucx_put_all_nb(void *dest, const void *source, size_t size, long *counter)
{
int my_pe = oshmem_my_proc_id();
long val = 1;
int peer, dst_pe, rc;
shmem_ctx_t ctx;
struct timeval tv;
void *request;
mca_spml_ucx_aux_lock();
if (mca_spml_ucx.async_progress) {
if (mca_spml_ucx.aux_ctx == NULL) {
rc = mca_spml_ucx_create_aux_ctx();
if (rc != OMPI_SUCCESS) {
mca_spml_ucx_aux_unlock();
oshmem_shmem_abort(-1);
}
}
if (mca_spml_ucx.aux_refcnt++ == 0) {
tv.tv_sec = 0;
tv.tv_usec = mca_spml_ucx.async_tick;
opal_event_evtimer_add(mca_spml_ucx.tick_event, &tv);
opal_progress_register(spml_ucx_progress_aux_ctx);
}
ctx = (shmem_ctx_t)mca_spml_ucx.aux_ctx;
} else {
ctx = oshmem_ctx_default;
}
for (peer = 0; peer < oshmem_num_procs(); peer++) {
dst_pe = (peer + my_pe) % oshmem_group_all->proc_count;
rc = mca_spml_ucx_put_nb(ctx,
(void*)((uintptr_t)dest + my_pe * size),
size,
(void*)((uintptr_t)source + dst_pe * size),
dst_pe, NULL);
RUNTIME_CHECK_RC(rc);
mca_spml_ucx_fence(ctx);
rc = MCA_ATOMIC_CALL(add(ctx, (void*)counter, val, sizeof(val), dst_pe));
RUNTIME_CHECK_RC(rc);
}
request = ucp_worker_flush_nb(((mca_spml_ucx_ctx_t*)ctx)->ucp_worker, 0,
mca_spml_ucx_put_all_complete_cb);
if (!UCS_PTR_IS_PTR(request)) {
mca_spml_ucx_put_all_complete_cb(NULL, UCS_PTR_STATUS(request));
}
mca_spml_ucx_aux_unlock();
return OSHMEM_SUCCESS;
}

Просмотреть файл

@ -94,10 +94,19 @@ struct mca_spml_ucx {
mca_spml_ucx_ctx_array_t idle_array; mca_spml_ucx_ctx_array_t idle_array;
int priority; /* component priority */ int priority; /* component priority */
shmem_internal_mutex_t internal_mutex; shmem_internal_mutex_t internal_mutex;
pthread_mutex_t ctx_create_mutex;
/* Fields controlling aux context for put_all_nb SPML routine */
bool async_progress;
int async_tick;
opal_event_base_t *async_event_base;
opal_event_t *tick_event;
mca_spml_ucx_ctx_t *aux_ctx;
pthread_spinlock_t async_lock;
int aux_refcnt;
}; };
typedef struct mca_spml_ucx mca_spml_ucx_t; typedef struct mca_spml_ucx mca_spml_ucx_t;
extern mca_spml_ucx_t mca_spml_ucx; extern mca_spml_ucx_t mca_spml_ucx;
extern int mca_spml_ucx_enable(bool enable); extern int mca_spml_ucx_enable(bool enable);
@ -117,23 +126,28 @@ extern int mca_spml_ucx_get_nb(shmem_ctx_t ctx,
void **handle); void **handle);
extern int mca_spml_ucx_put(shmem_ctx_t ctx, extern int mca_spml_ucx_put(shmem_ctx_t ctx,
void* dst_addr, void* dst_addr,
size_t size, size_t size,
void* src_addr, void* src_addr,
int dst); int dst);
extern int mca_spml_ucx_put_nb(shmem_ctx_t ctx, extern int mca_spml_ucx_put_nb(shmem_ctx_t ctx,
void* dst_addr, void* dst_addr,
size_t size, size_t size,
void* src_addr, void* src_addr,
int dst, int dst,
void **handle); void **handle);
extern int mca_spml_ucx_recv(void* buf, size_t size, int src); extern int mca_spml_ucx_recv(void* buf, size_t size, int src);
extern int mca_spml_ucx_send(void* buf, extern int mca_spml_ucx_send(void* buf,
size_t size, size_t size,
int dst, int dst,
mca_spml_base_put_mode_t mode); mca_spml_base_put_mode_t mode);
extern int mca_spml_ucx_put_all_nb(void *target,
const void *source,
size_t size,
long *counter);
extern sshmem_mkey_t *mca_spml_ucx_register(void* addr, extern sshmem_mkey_t *mca_spml_ucx_register(void* addr,
size_t size, size_t size,
@ -153,6 +167,22 @@ extern int mca_spml_ucx_fence(shmem_ctx_t ctx);
extern int mca_spml_ucx_quiet(shmem_ctx_t ctx); extern int mca_spml_ucx_quiet(shmem_ctx_t ctx);
extern int spml_ucx_default_progress(void); extern int spml_ucx_default_progress(void);
extern int spml_ucx_ctx_progress(void); extern int spml_ucx_ctx_progress(void);
extern int spml_ucx_progress_aux_ctx(void);
void mca_spml_ucx_async_cb(int fd, short event, void *cbdata);
static inline void mca_spml_ucx_aux_lock(void)
{
if (mca_spml_ucx.async_progress) {
pthread_spin_lock(&mca_spml_ucx.async_lock);
}
}
static inline void mca_spml_ucx_aux_unlock(void)
{
if (mca_spml_ucx.async_progress) {
pthread_spin_unlock(&mca_spml_ucx.async_lock);
}
}
static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe)
{ {

Просмотреть файл

@ -24,6 +24,7 @@
#include "oshmem/mca/spml/ucx/spml_ucx.h" #include "oshmem/mca/spml/ucx/spml_ucx.h"
#include "opal/util/opal_environ.h" #include "opal/util/opal_environ.h"
#include "opal/runtime/opal_progress_threads.h"
static int mca_spml_ucx_component_register(void); static int mca_spml_ucx_component_register(void);
static int mca_spml_ucx_component_open(void); static int mca_spml_ucx_component_open(void);
@ -90,11 +91,26 @@ static inline void mca_spml_ucx_param_register_string(const char* param_name,
storage); storage);
} }
static inline void mca_spml_ucx_param_register_bool(const char* param_name,
bool default_value,
const char *help_msg,
bool *storage)
{
*storage = default_value;
(void) mca_base_component_var_register(&mca_spml_ucx_component.spmlm_version,
param_name,
help_msg,
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
storage);
}
static int mca_spml_ucx_component_register(void) static int mca_spml_ucx_component_register(void)
{ {
mca_spml_ucx_param_register_int("priority", 21, mca_spml_ucx_param_register_int("priority", 21,
"[integer] ucx priority", "[integer] ucx priority",
&mca_spml_ucx.priority); &mca_spml_ucx.priority);
mca_spml_ucx_param_register_int("num_disconnect", 1, mca_spml_ucx_param_register_int("num_disconnect", 1,
"How may disconnects go in parallel", "How may disconnects go in parallel",
@ -104,6 +120,14 @@ static int mca_spml_ucx_component_register(void)
"Use non-blocking memory registration for shared heap", "Use non-blocking memory registration for shared heap",
&mca_spml_ucx.heap_reg_nb); &mca_spml_ucx.heap_reg_nb);
mca_spml_ucx_param_register_bool("async_progress", 0,
"Enable asynchronous progress thread",
&mca_spml_ucx.async_progress);
mca_spml_ucx_param_register_int("async_tick_usec", 3000,
"Asynchronous progress tick granularity (in usec)",
&mca_spml_ucx.async_tick);
opal_common_ucx_mca_var_register(&mca_spml_ucx_component.spmlm_version); opal_common_ucx_mca_var_register(&mca_spml_ucx_component.spmlm_version);
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
@ -124,6 +148,39 @@ int spml_ucx_default_progress(void)
return 1; return 1;
} }
int spml_ucx_progress_aux_ctx(void)
{
unsigned count;
if (OPAL_UNLIKELY(!mca_spml_ucx.aux_ctx)) {
return 0;
}
if (pthread_spin_trylock(&mca_spml_ucx.async_lock)) {
return 0;
}
count = ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker);
pthread_spin_unlock(&mca_spml_ucx.async_lock);
return count;
}
void mca_spml_ucx_async_cb(int fd, short event, void *cbdata)
{
int count = 0;
if (pthread_spin_trylock(&mca_spml_ucx.async_lock)) {
return;
}
do {
count = ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker);
} while (count);
pthread_spin_unlock(&mca_spml_ucx.async_lock);
}
static int mca_spml_ucx_component_open(void) static int mca_spml_ucx_component_open(void)
{ {
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
@ -185,6 +242,7 @@ static int spml_ucx_init(void)
sizeof(mca_spml_ucx_ctx_t *)); sizeof(mca_spml_ucx_ctx_t *));
SHMEM_MUTEX_INIT(mca_spml_ucx.internal_mutex); SHMEM_MUTEX_INIT(mca_spml_ucx.internal_mutex);
pthread_mutex_init(&mca_spml_ucx.ctx_create_mutex, NULL);
wkr_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; wkr_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) { if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) {
@ -207,6 +265,22 @@ static int spml_ucx_init(void)
oshmem_mpi_thread_provided = SHMEM_THREAD_SINGLE; oshmem_mpi_thread_provided = SHMEM_THREAD_SINGLE;
} }
if (mca_spml_ucx.async_progress) {
pthread_spin_init(&mca_spml_ucx.async_lock, 0);
mca_spml_ucx.async_event_base = opal_progress_thread_init(NULL);
if (NULL == mca_spml_ucx.async_event_base) {
SPML_UCX_ERROR("failed to init async progress thread");
return OSHMEM_ERROR;
}
mca_spml_ucx.tick_event = opal_event_alloc();
opal_event_set(mca_spml_ucx.async_event_base, mca_spml_ucx.tick_event,
-1, EV_PERSIST, mca_spml_ucx_async_cb, NULL);
}
mca_spml_ucx.aux_ctx = NULL;
mca_spml_ucx.aux_refcnt = 0;
oshmem_ctx_default = (shmem_ctx_t) &mca_spml_ucx_ctx_default; oshmem_ctx_default = (shmem_ctx_t) &mca_spml_ucx_ctx_default;
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
@ -252,8 +326,8 @@ static void _ctx_cleanup(mca_spml_ucx_ctx_t *ctx)
} }
opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(), opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(),
mca_spml_ucx.num_disconnect, mca_spml_ucx.num_disconnect,
ctx->ucp_worker); ctx->ucp_worker);
free(del_procs); free(del_procs);
free(ctx->ucp_peers); free(ctx->ucp_peers);
} }
@ -271,6 +345,16 @@ static int mca_spml_ucx_component_fini(void)
if(!mca_spml_ucx.enabled) if(!mca_spml_ucx.enabled)
return OSHMEM_SUCCESS; /* never selected.. return success.. */ return OSHMEM_SUCCESS; /* never selected.. return success.. */
if (mca_spml_ucx.async_progress) {
opal_progress_thread_finalize(NULL);
opal_event_evtimer_del(mca_spml_ucx.tick_event);
if (mca_spml_ucx.aux_ctx != NULL) {
_ctx_cleanup(mca_spml_ucx.aux_ctx);
}
opal_progress_unregister(spml_ucx_progress_aux_ctx);
pthread_spin_destroy(&mca_spml_ucx.async_lock);
}
/* delete context objects from list */ /* delete context objects from list */
for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) {
_ctx_cleanup(mca_spml_ucx.active_array.ctxs[i]); _ctx_cleanup(mca_spml_ucx.active_array.ctxs[i]);
@ -280,6 +364,7 @@ static int mca_spml_ucx_component_fini(void)
_ctx_cleanup(mca_spml_ucx.idle_array.ctxs[i]); _ctx_cleanup(mca_spml_ucx.idle_array.ctxs[i]);
} }
ret = opal_common_ucx_mca_pmix_fence_nb(&fenced); ret = opal_common_ucx_mca_pmix_fence_nb(&fenced);
if (OPAL_SUCCESS != ret) { if (OPAL_SUCCESS != ret) {
return ret; return ret;
@ -295,6 +380,10 @@ static int mca_spml_ucx_component_fini(void)
} }
ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker); ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker);
if (mca_spml_ucx.aux_ctx != NULL) {
ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker);
}
} }
/* delete all workers */ /* delete all workers */
@ -312,12 +401,18 @@ static int mca_spml_ucx_component_fini(void)
ucp_worker_destroy(mca_spml_ucx_ctx_default.ucp_worker); ucp_worker_destroy(mca_spml_ucx_ctx_default.ucp_worker);
} }
if (mca_spml_ucx.aux_ctx != NULL) {
ucp_worker_destroy(mca_spml_ucx.aux_ctx->ucp_worker);
}
mca_spml_ucx.enabled = false; /* not anymore */ mca_spml_ucx.enabled = false; /* not anymore */
free(mca_spml_ucx.active_array.ctxs); free(mca_spml_ucx.active_array.ctxs);
free(mca_spml_ucx.idle_array.ctxs); free(mca_spml_ucx.idle_array.ctxs);
free(mca_spml_ucx.aux_ctx);
SHMEM_MUTEX_DESTROY(mca_spml_ucx.internal_mutex); SHMEM_MUTEX_DESTROY(mca_spml_ucx.internal_mutex);
pthread_mutex_destroy(&mca_spml_ucx.ctx_create_mutex);
if (mca_spml_ucx.ucp_context) { if (mca_spml_ucx.ucp_context) {
ucp_cleanup(mca_spml_ucx.ucp_context); ucp_cleanup(mca_spml_ucx.ucp_context);

Просмотреть файл

@ -226,3 +226,12 @@ SHMEM_TYPE_PUTMEM_NB(_put32, 4, shmem)
SHMEM_TYPE_PUTMEM_NB(_put64, 8, shmem) SHMEM_TYPE_PUTMEM_NB(_put64, 8, shmem)
SHMEM_TYPE_PUTMEM_NB(_put128, 16, shmem) SHMEM_TYPE_PUTMEM_NB(_put128, 16, shmem)
SHMEM_TYPE_PUTMEM_NB(_putmem, 1, shmem) SHMEM_TYPE_PUTMEM_NB(_putmem, 1, shmem)
void shmemx_alltoall_global_nb(void *dest,
const void *source,
size_t size,
long *counter)
{
int rc = MCA_SPML_CALL(put_all_nb(dest, source, size, counter));
RUNTIME_CHECK_RC(rc);
}