1
1

opal/common/ucx: add periodical flush and counter to opal directory.

Signed-off-by: Xin Zhao <xinz@mellanox.com>
Этот коммит содержится в:
Xin Zhao 2018-12-04 13:45:32 -08:00 коммит произвёл Artem Polyakov
родитель 1fa7054041
Коммит 33517428a1
3 изменённых файлов: 182 добавлений и 29 удалений

Просмотреть файл

@ -25,6 +25,9 @@ BEGIN_C_DECLS
# define MCA_COMMON_UCX_ASSERT(_x)
#endif
#define MCA_COMMON_UCX_PER_TARGET_OPS_THRESHOLD 100000
#define MCA_COMMON_UCX_GLOBAL_OPS_THRESHOLD 1000000
#define _MCA_COMMON_UCX_QUOTE(_x) \
# _x
#define MCA_COMMON_UCX_QUOTE(_x) \

Просмотреть файл

@ -62,6 +62,9 @@ _winfo_create(opal_common_ucx_wpool_t *wpool)
winfo->endpoints = NULL;
winfo->comm_size = 0;
winfo->released = 0;
winfo->inflight_ops = NULL;
winfo->global_inflight_ops = 0;
winfo->inflight_req = UCS_OK;
WPOOL_DBG_OUT(_dbg_winfo, "winfo = %p, worker = %p\n",
(void*)winfo, (void *)winfo->worker);
@ -76,14 +79,24 @@ exit:
static void
_winfo_reset(opal_common_ucx_winfo_t *winfo)
{
if (winfo->inflight_req != UCS_OK) {
opal_common_ucx_wait_request(winfo->inflight_req, winfo->worker,
"opal_common_ucx_flush");
winfo->inflight_req = UCS_OK;
}
assert(winfo->global_inflight_ops == 0);
if(winfo->comm_size != 0) {
size_t i;
for (i = 0; i < winfo->comm_size; i++) {
if (NULL != winfo->endpoints[i]){
ucp_ep_destroy(winfo->endpoints[i]);
}
assert(winfo->inflight_ops[i] == 0);
}
free(winfo->endpoints);
free(winfo->inflight_ops);
}
winfo->endpoints = NULL;
winfo->comm_size = 0;
@ -372,6 +385,7 @@ _wpool_get_idle(opal_common_ucx_wpool_t *wpool, size_t comm_size)
(void *)wpool, (void *)winfo);
winfo->endpoints = calloc(comm_size, sizeof(ucp_ep_h));
winfo->inflight_ops = calloc(comm_size, sizeof(short));
winfo->comm_size = comm_size;
return winfo;
}
@ -1213,6 +1227,46 @@ opal_common_ucx_tlocal_fetch_spath(opal_common_ucx_wpmem_t *mem, int target)
return OPAL_SUCCESS;
}
OPAL_DECLSPEC int
opal_common_ucx_flush(ucp_ep_h ep, ucp_worker_h worker,
opal_common_ucx_flush_type_t type,
opal_common_ucx_flush_scope_t scope,
ucs_status_ptr_t *req_ptr)
{
ucs_status_ptr_t req;
ucs_status_t status = UCS_OK;
int rc = OPAL_SUCCESS;
#if HAVE_DECL_UCP_EP_FLUSH_NB
if (scope == OPAL_COMMON_UCX_SCOPE_EP) {
req = ucp_ep_flush_nb(ep, 0, opal_common_ucx_empty_complete_cb);
} else {
req = ucp_worker_flush_nb(worker, 0, opal_common_ucx_empty_complete_cb);
}
if(OPAL_COMMON_UCX_FLUSH_B) {
rc = opal_common_ucx_wait_request(req, worker, "ucp_ep_flush_nb");
} else {
*req_ptr = req;
}
return rc;
#endif
switch (type) {
case OPAL_COMMON_UCX_FLUSH_NB_PREFERRED:
case OPAL_COMMON_UCX_FLUSH_B:
if (scope == OPAL_COMMON_UCX_SCOPE_EP) {
status = ucp_ep_flush(ep);
} else {
status = ucp_worker_flush(worker);
}
rc = (status == UCS_OK) ? OPAL_SUCCESS : OPAL_ERROR;
case OPAL_COMMON_UCX_FLUSH_NB:
default:
rc = OPAL_ERROR;
}
return rc;
}
OPAL_DECLSPEC int
opal_common_ucx_wpmem_flush(opal_common_ucx_wpmem_t *mem,
opal_common_ucx_flush_scope_t scope,
@ -1228,37 +1282,36 @@ opal_common_ucx_wpmem_flush(opal_common_ucx_wpmem_t *mem,
opal_mutex_lock(&ctx->mutex);
OPAL_LIST_FOREACH(item, &ctx->tls_workers, _ctx_record_list_item_t) {
if ((scope == OPAL_COMMON_UCX_SCOPE_EP) &&
(NULL == item->ptr->endpoints[target])) {
continue;
}
opal_mutex_lock(&item->ptr->mutex);
rc = opal_common_ucx_flush(item->ptr->endpoints[target],
item->ptr->worker, OPAL_COMMON_UCX_FLUSH_B,
scope, NULL);
switch (scope) {
case OPAL_COMMON_UCX_SCOPE_WORKER:
opal_mutex_lock(&item->ptr->mutex);
rc = opal_common_ucx_worker_flush(item->ptr->worker);
if (rc != OPAL_SUCCESS) {
MCA_COMMON_UCX_ERROR("opal_common_ucx_worker_flush failed: %d",
rc);
rc = OPAL_ERROR;
}
WPOOL_DBG_OUT(_dbg_tls || _dbg_mem, "worker = %p\n",
(void *)item->ptr->worker);
opal_mutex_unlock(&item->ptr->mutex);
item->ptr->global_inflight_ops = 0;
memset(item->ptr->inflight_ops, 0, item->ptr->comm_size * sizeof(short));
break;
case OPAL_COMMON_UCX_SCOPE_EP:
if (NULL != item->ptr->endpoints[target] ) {
opal_mutex_lock(&item->ptr->mutex);
rc = opal_common_ucx_ep_flush(item->ptr->endpoints[target],
item->ptr->worker);
if (rc != OPAL_SUCCESS) {
MCA_COMMON_UCX_ERROR("opal_common_ucx_ep_flush failed: %d",
rc);
rc = OPAL_ERROR;
}
WPOOL_DBG_OUT(_dbg_tls || _dbg_mem,
"target = %d, ep = %p worker = %p\n",
(int)target,
(void *)item->ptr->endpoints[target],
(void *)item->ptr->worker);
opal_mutex_unlock(&item->ptr->mutex);
}
item->ptr->global_inflight_ops -= item->ptr->inflight_ops[target];
item->ptr->inflight_ops[target] = 0;
break;
}
opal_mutex_unlock(&item->ptr->mutex);
if (rc != OPAL_SUCCESS) {
MCA_COMMON_UCX_ERROR("opal_common_ucx_flush failed: %d",
rc);
rc = OPAL_ERROR;
}
WPOOL_DBG_OUT(_dbg_tls || _dbg_mem,
"target = %d, ep = %p worker = %p\n",
(int)target,
(void *)item->ptr->endpoints[target],
(void *)item->ptr->worker);
}
opal_mutex_unlock(&ctx->mutex);

Просмотреть файл

@ -7,6 +7,7 @@
#include "common_ucx_int.h"
#include "common_ucx_request.h"
#include <stdint.h>
#include <string.h>
#include <ucp/api/ucp.h>
#include <pthread.h>
@ -84,6 +85,9 @@ typedef struct {
ucp_worker_h worker;
ucp_ep_h *endpoints;
size_t comm_size;
short *inflight_ops;
short global_inflight_ops;
ucs_status_ptr_t inflight_req;
} opal_common_ucx_winfo_t;
typedef struct {
@ -101,6 +105,12 @@ typedef enum {
OPAL_COMMON_UCX_SCOPE_WORKER
} opal_common_ucx_flush_scope_t;
typedef enum {
OPAL_COMMON_UCX_FLUSH_NB,
OPAL_COMMON_UCX_FLUSH_B,
OPAL_COMMON_UCX_FLUSH_NB_PREFERRED
} opal_common_ucx_flush_type_t;
typedef enum {
OPAL_COMMON_UCX_MEM_ALLOCATE_MAP,
OPAL_COMMON_UCX_MEM_MAP
@ -236,6 +246,58 @@ OPAL_DECLSPEC int opal_common_ucx_wpmem_flush(opal_common_ucx_wpmem_t *mem,
int target);
OPAL_DECLSPEC int opal_common_ucx_wpmem_fence(opal_common_ucx_wpmem_t *mem);
OPAL_DECLSPEC int opal_common_ucx_flush(ucp_ep_h ep, ucp_worker_h worker,
opal_common_ucx_flush_type_t type,
opal_common_ucx_flush_scope_t scope,
ucs_status_ptr_t *req_ptr);
static inline int _periodical_flush_nb(opal_common_ucx_wpmem_t *mem,
opal_common_ucx_winfo_t *winfo,
int target) {
int rc = OPAL_SUCCESS;
winfo->inflight_ops[target]++;
winfo->global_inflight_ops++;
if (OPAL_UNLIKELY(winfo->inflight_ops[target] >= MCA_COMMON_UCX_PER_TARGET_OPS_THRESHOLD) ||
OPAL_UNLIKELY(winfo->global_inflight_ops >= MCA_COMMON_UCX_GLOBAL_OPS_THRESHOLD)) {
opal_common_ucx_flush_scope_t scope;
if (winfo->inflight_req != UCS_OK) {
rc = opal_common_ucx_wait_request(winfo->inflight_req, winfo->worker,
"opal_common_ucx_flush_nb");
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
MCA_COMMON_UCX_VERBOSE(1, "opal_common_ucx_wait_request failed: %d", rc);
return rc;
}
winfo->inflight_req = UCS_OK;
}
if (winfo->global_inflight_ops >= MCA_COMMON_UCX_GLOBAL_OPS_THRESHOLD) {
scope = OPAL_COMMON_UCX_SCOPE_WORKER;
winfo->global_inflight_ops = 0;
memset(winfo->inflight_ops, 0, winfo->comm_size * sizeof(short));
} else {
scope = OPAL_COMMON_UCX_SCOPE_EP;
winfo->global_inflight_ops -= winfo->inflight_ops[target];
winfo->inflight_ops[target] = 0;
}
rc = opal_common_ucx_flush(winfo->endpoints[target], winfo->worker,
OPAL_COMMON_UCX_FLUSH_NB_PREFERRED, scope,
&winfo->inflight_req);
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
MCA_COMMON_UCX_VERBOSE(1, "opal_common_ucx_flush failed: %d", rc);
return rc;
}
} else if (OPAL_UNLIKELY(winfo->inflight_req != UCS_OK)) {
int ret;
do {
ret = ucp_worker_progress(winfo->worker);
} while (ret);
}
return rc;
}
static inline int
opal_common_ucx_wpmem_putget(opal_common_ucx_wpmem_t *mem, opal_common_ucx_op_t op,
@ -269,7 +331,6 @@ opal_common_ucx_wpmem_putget(opal_common_ucx_wpmem_t *mem, opal_common_ucx_op_t
called_func = "ucp_get_nbi";
break;
}
opal_mutex_unlock(&winfo->mutex);
if (OPAL_UNLIKELY(status != UCS_OK && status != UCS_INPROGRESS)) {
MCA_COMMON_UCX_ERROR("%s failed: %d", called_func, status);
@ -278,6 +339,15 @@ opal_common_ucx_wpmem_putget(opal_common_ucx_wpmem_t *mem, opal_common_ucx_op_t
WPOOL_DBG_OUT(_dbg_mem,"ep = %p, rkey = %p\n",
(void *)ep, (void *)rkey);
}
rc = _periodical_flush_nb(mem, winfo, target);
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
MCA_COMMON_UCX_VERBOSE(1, "_incr_and_check_inflight_ops failed: %d", rc);
return rc;
}
opal_mutex_unlock(&winfo->mutex);
return rc;
}
@ -314,6 +384,13 @@ opal_common_ucx_wpmem_cmpswp(opal_common_ucx_wpmem_t *mem, uint64_t compare,
WPOOL_DBG_OUT(_dbg_mem, "ep = %p, rkey = %p\n",
(void *)ep, (void *)rkey);
}
rc = _periodical_flush_nb(mem, winfo, target);
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
MCA_COMMON_UCX_VERBOSE(1, "_incr_and_check_inflight_ops failed: %d", rc);
return rc;
}
opal_mutex_unlock(&winfo->mutex);
return rc;
@ -349,6 +426,13 @@ opal_common_ucx_wpmem_post(opal_common_ucx_wpmem_t *mem, ucp_atomic_post_op_t op
WPOOL_DBG_OUT(_dbg_mem, "ep = %p, rkey = %p\n",
(void *)ep, (void *)rkey);
}
rc = _periodical_flush_nb(mem, winfo, target);
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
MCA_COMMON_UCX_VERBOSE(1, "_incr_and_check_inflight_ops failed: %d", rc);
return rc;
}
opal_mutex_unlock(&winfo->mutex);
return rc;
}
@ -386,6 +470,13 @@ opal_common_ucx_wpmem_fetch(opal_common_ucx_wpmem_t *mem,
WPOOL_DBG_OUT(_dbg_mem, "ep = %p, rkey = %p\n",
(void *)ep, (void *)rkey);
}
rc = _periodical_flush_nb(mem, winfo, target);
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
MCA_COMMON_UCX_VERBOSE(1, "_incr_and_check_inflight_ops failed: %d", rc);
return rc;
}
opal_mutex_unlock(&winfo->mutex);
return rc;
@ -416,8 +507,6 @@ opal_common_ucx_wpmem_fetch_nb(opal_common_ucx_wpmem_t *mem,
req = opal_common_ucx_atomic_fetch_nb(ep, opcode, value, buffer, len,
rem_addr, rkey, opal_common_ucx_req_completion,
winfo->worker);
opal_mutex_unlock(&winfo->mutex);
if (UCS_PTR_IS_PTR(req)) {
req->ext_req = user_req_ptr;
req->ext_cb = user_req_cb;
@ -427,6 +516,14 @@ opal_common_ucx_wpmem_fetch_nb(opal_common_ucx_wpmem_t *mem,
}
}
rc = _periodical_flush_nb(mem, winfo, target);
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
MCA_COMMON_UCX_VERBOSE(1, "_incr_and_check_inflight_ops failed: %d", rc);
return rc;
}
opal_mutex_unlock(&winfo->mutex);
return rc;
}