opal/common/ucx: add winfo ptr into req
Signed-off-by: Artem Polyakov <artpol84@gmail.com>
Этот коммит содержится в:
родитель
33517428a1
Коммит
bcb52ecade
@ -16,15 +16,13 @@ headers = \
|
||||
common_ucx.h \
|
||||
common_ucx_int.h \
|
||||
common_ucx_wpool.h \
|
||||
common_ucx_wpool_int.h \
|
||||
common_ucx_request.h
|
||||
common_ucx_wpool_int.h
|
||||
|
||||
# Source files
|
||||
|
||||
sources = \
|
||||
common_ucx.c \
|
||||
common_ucx_wpool.c \
|
||||
common_ucx_request.c
|
||||
common_ucx_wpool.c
|
||||
|
||||
# Help file
|
||||
|
||||
|
@ -15,6 +15,5 @@
|
||||
|
||||
#include "common_ucx_int.h"
|
||||
#include "common_ucx_wpool.h"
|
||||
#include "common_ucx_request.h"
|
||||
|
||||
#endif
|
||||
|
@ -2,7 +2,6 @@
|
||||
#define COMMON_UCX_INT_H
|
||||
|
||||
#include "opal_config.h"
|
||||
#include "common_ucx_request.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -1,17 +0,0 @@
|
||||
#include "common_ucx_request.h"
|
||||
|
||||
OPAL_DECLSPEC void
|
||||
opal_common_ucx_req_init(void *request) {
|
||||
opal_common_ucx_request_t *req = (opal_common_ucx_request_t *)request;
|
||||
req->ext_req = NULL;
|
||||
req->ext_cb = NULL;
|
||||
}
|
||||
|
||||
OPAL_DECLSPEC void
|
||||
opal_common_ucx_req_completion(void *request, ucs_status_t status) {
|
||||
opal_common_ucx_request_t *req = (opal_common_ucx_request_t *)request;
|
||||
if (req->ext_cb != NULL) {
|
||||
(*req->ext_cb)(req->ext_req);
|
||||
}
|
||||
ucp_request_release(req);
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
#ifndef COMMON_UCX_REQUEST_H
|
||||
#define COMMON_UCX_REQUEST_H
|
||||
|
||||
#include "opal_config.h"
|
||||
#include <ucp/api/ucp.h>
|
||||
|
||||
typedef void (*opal_common_ucx_user_req_handler_t)(void *request);
|
||||
|
||||
typedef struct {
|
||||
void *ext_req;
|
||||
opal_common_ucx_user_req_handler_t ext_cb;
|
||||
} opal_common_ucx_request_t;
|
||||
|
||||
OPAL_DECLSPEC void opal_common_ucx_req_init(void *request);
|
||||
OPAL_DECLSPEC void opal_common_ucx_req_completion(void *request, ucs_status_t status);
|
||||
|
||||
#endif // COMMON_UCX_REQUEST_H
|
@ -80,8 +80,8 @@ static void
|
||||
_winfo_reset(opal_common_ucx_winfo_t *winfo)
|
||||
{
|
||||
if (winfo->inflight_req != UCS_OK) {
|
||||
opal_common_ucx_wait_request(winfo->inflight_req, winfo->worker,
|
||||
"opal_common_ucx_flush");
|
||||
opal_common_ucx_wait_request_mt(winfo->inflight_req,
|
||||
"opal_common_ucx_flush");
|
||||
winfo->inflight_req = UCS_OK;
|
||||
}
|
||||
|
||||
@ -1228,10 +1228,10 @@ opal_common_ucx_tlocal_fetch_spath(opal_common_ucx_wpmem_t *mem, int target)
|
||||
}
|
||||
|
||||
OPAL_DECLSPEC int
|
||||
opal_common_ucx_flush(ucp_ep_h ep, ucp_worker_h worker,
|
||||
opal_common_ucx_flush_type_t type,
|
||||
opal_common_ucx_flush_scope_t scope,
|
||||
ucs_status_ptr_t *req_ptr)
|
||||
opal_common_ucx_winfo_flush(opal_common_ucx_winfo_t *winfo, int target,
|
||||
opal_common_ucx_flush_type_t type,
|
||||
opal_common_ucx_flush_scope_t scope,
|
||||
ucs_status_ptr_t *req_ptr)
|
||||
{
|
||||
ucs_status_ptr_t req;
|
||||
ucs_status_t status = UCS_OK;
|
||||
@ -1239,12 +1239,14 @@ opal_common_ucx_flush(ucp_ep_h ep, ucp_worker_h worker,
|
||||
|
||||
#if HAVE_DECL_UCP_EP_FLUSH_NB
|
||||
if (scope == OPAL_COMMON_UCX_SCOPE_EP) {
|
||||
req = ucp_ep_flush_nb(ep, 0, opal_common_ucx_empty_complete_cb);
|
||||
req = ucp_ep_flush_nb(winfo->endpoints[target], 0, opal_common_ucx_empty_complete_cb);
|
||||
} else {
|
||||
req = ucp_worker_flush_nb(worker, 0, opal_common_ucx_empty_complete_cb);
|
||||
req = ucp_worker_flush_nb(winfo->worker, 0, opal_common_ucx_empty_complete_cb);
|
||||
}
|
||||
((opal_common_ucx_request_t *)req)->winfo = winfo;
|
||||
|
||||
if(OPAL_COMMON_UCX_FLUSH_B) {
|
||||
rc = opal_common_ucx_wait_request(req, worker, "ucp_ep_flush_nb");
|
||||
rc = opal_common_ucx_wait_request_mt(req, "ucp_ep_flush_nb");
|
||||
} else {
|
||||
*req_ptr = req;
|
||||
}
|
||||
@ -1254,9 +1256,9 @@ opal_common_ucx_flush(ucp_ep_h ep, ucp_worker_h worker,
|
||||
case OPAL_COMMON_UCX_FLUSH_NB_PREFERRED:
|
||||
case OPAL_COMMON_UCX_FLUSH_B:
|
||||
if (scope == OPAL_COMMON_UCX_SCOPE_EP) {
|
||||
status = ucp_ep_flush(ep);
|
||||
status = ucp_ep_flush(winfo->endpoints[target]);
|
||||
} else {
|
||||
status = ucp_worker_flush(worker);
|
||||
status = ucp_worker_flush(winfo->worker);
|
||||
}
|
||||
rc = (status == UCS_OK) ? OPAL_SUCCESS : OPAL_ERROR;
|
||||
case OPAL_COMMON_UCX_FLUSH_NB:
|
||||
@ -1287,9 +1289,8 @@ opal_common_ucx_wpmem_flush(opal_common_ucx_wpmem_t *mem,
|
||||
continue;
|
||||
}
|
||||
opal_mutex_lock(&item->ptr->mutex);
|
||||
rc = opal_common_ucx_flush(item->ptr->endpoints[target],
|
||||
item->ptr->worker, OPAL_COMMON_UCX_FLUSH_B,
|
||||
scope, NULL);
|
||||
rc = opal_common_ucx_winfo_flush(item->ptr, target, OPAL_COMMON_UCX_FLUSH_B,
|
||||
scope, NULL);
|
||||
switch (scope) {
|
||||
case OPAL_COMMON_UCX_SCOPE_WORKER:
|
||||
item->ptr->global_inflight_ops = 0;
|
||||
@ -1323,3 +1324,20 @@ opal_common_ucx_wpmem_fence(opal_common_ucx_wpmem_t *mem) {
|
||||
/* TODO */
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_DECLSPEC void
|
||||
opal_common_ucx_req_init(void *request) {
|
||||
opal_common_ucx_request_t *req = (opal_common_ucx_request_t *)request;
|
||||
req->ext_req = NULL;
|
||||
req->ext_cb = NULL;
|
||||
req->winfo = NULL;
|
||||
}
|
||||
|
||||
OPAL_DECLSPEC void
|
||||
opal_common_ucx_req_completion(void *request, ucs_status_t status) {
|
||||
opal_common_ucx_request_t *req = (opal_common_ucx_request_t *)request;
|
||||
if (req->ext_cb != NULL) {
|
||||
(*req->ext_cb)(req->ext_req);
|
||||
}
|
||||
ucp_request_release(req);
|
||||
}
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include "opal_config.h"
|
||||
|
||||
#include "common_ucx_int.h"
|
||||
#include "common_ucx_request.h"
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
@ -79,7 +78,7 @@ typedef struct {
|
||||
pthread_key_t mem_tls_key;
|
||||
} opal_common_ucx_wpmem_t;
|
||||
|
||||
typedef struct {
|
||||
typedef struct opal_common_ucx_winfo {
|
||||
opal_recursive_mutex_t mutex;
|
||||
volatile int released;
|
||||
ucp_worker_h worker;
|
||||
@ -95,6 +94,14 @@ typedef struct {
|
||||
ucp_rkey_h *rkeys;
|
||||
} opal_common_ucx_tlocal_fast_ptrs_t;
|
||||
|
||||
typedef void (*opal_common_ucx_user_req_handler_t)(void *request);
|
||||
|
||||
typedef struct {
|
||||
void *ext_req;
|
||||
opal_common_ucx_user_req_handler_t ext_cb;
|
||||
opal_common_ucx_winfo_t *winfo;
|
||||
} opal_common_ucx_request_t;
|
||||
|
||||
typedef enum {
|
||||
OPAL_COMMON_UCX_PUT,
|
||||
OPAL_COMMON_UCX_GET
|
||||
@ -198,6 +205,10 @@ OPAL_DECLSPEC int opal_common_ucx_wpctx_create(opal_common_ucx_wpool_t *wpool, i
|
||||
opal_common_ucx_ctx_t **ctx_ptr);
|
||||
OPAL_DECLSPEC void opal_common_ucx_wpctx_release(opal_common_ucx_ctx_t *ctx);
|
||||
|
||||
/* request init / completion */
|
||||
OPAL_DECLSPEC void opal_common_ucx_req_init(void *request);
|
||||
OPAL_DECLSPEC void opal_common_ucx_req_completion(void *request, ucs_status_t status);
|
||||
|
||||
/* Managing thread local storage */
|
||||
OPAL_DECLSPEC int opal_common_ucx_tlocal_fetch_spath(opal_common_ucx_wpmem_t *mem, int target);
|
||||
static inline int
|
||||
@ -246,10 +257,57 @@ OPAL_DECLSPEC int opal_common_ucx_wpmem_flush(opal_common_ucx_wpmem_t *mem,
|
||||
int target);
|
||||
OPAL_DECLSPEC int opal_common_ucx_wpmem_fence(opal_common_ucx_wpmem_t *mem);
|
||||
|
||||
OPAL_DECLSPEC int opal_common_ucx_flush(ucp_ep_h ep, ucp_worker_h worker,
|
||||
opal_common_ucx_flush_type_t type,
|
||||
opal_common_ucx_flush_scope_t scope,
|
||||
ucs_status_ptr_t *req_ptr);
|
||||
OPAL_DECLSPEC int opal_common_ucx_winfo_flush(opal_common_ucx_winfo_t *winfo, int target,
|
||||
opal_common_ucx_flush_type_t type,
|
||||
opal_common_ucx_flush_scope_t scope,
|
||||
ucs_status_ptr_t *req_ptr);
|
||||
|
||||
static inline
|
||||
int opal_common_ucx_wait_request_mt(ucs_status_ptr_t request, const char *msg)
|
||||
{
|
||||
ucs_status_t status;
|
||||
int ctr = 0, ret = 0;
|
||||
opal_common_ucx_winfo_t *winfo;
|
||||
|
||||
/* check for request completed or failed */
|
||||
if (OPAL_LIKELY(UCS_OK == request)) {
|
||||
return OPAL_SUCCESS;
|
||||
} else if (OPAL_UNLIKELY(UCS_PTR_IS_ERR(request))) {
|
||||
MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s", msg ? msg : __func__,
|
||||
UCS_PTR_STATUS(request),
|
||||
ucs_status_string(UCS_PTR_STATUS(request)));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
winfo = ((opal_common_ucx_request_t *)request)->winfo;
|
||||
assert(winfo != NULL);
|
||||
|
||||
do {
|
||||
ctr = opal_common_ucx.progress_iterations;
|
||||
opal_mutex_lock(&winfo->mutex);
|
||||
do {
|
||||
ret = ucp_worker_progress(winfo->worker);
|
||||
status = opal_common_ucx_request_status(request);
|
||||
if (status != UCS_INPROGRESS) {
|
||||
ucp_request_free(request);
|
||||
if (OPAL_UNLIKELY(UCS_OK != status)) {
|
||||
MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s",
|
||||
msg ? msg : __func__,
|
||||
UCS_PTR_STATUS(request),
|
||||
ucs_status_string(UCS_PTR_STATUS(request)));
|
||||
opal_mutex_unlock(&winfo->mutex);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ctr--;
|
||||
} while (ctr > 0 && ret > 0 && status == UCS_INPROGRESS);
|
||||
opal_mutex_unlock(&winfo->mutex);
|
||||
opal_progress();
|
||||
} while (status == UCS_INPROGRESS);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int _periodical_flush_nb(opal_common_ucx_wpmem_t *mem,
|
||||
opal_common_ucx_winfo_t *winfo,
|
||||
@ -264,8 +322,8 @@ static inline int _periodical_flush_nb(opal_common_ucx_wpmem_t *mem,
|
||||
opal_common_ucx_flush_scope_t scope;
|
||||
|
||||
if (winfo->inflight_req != UCS_OK) {
|
||||
rc = opal_common_ucx_wait_request(winfo->inflight_req, winfo->worker,
|
||||
"opal_common_ucx_flush_nb");
|
||||
rc = opal_common_ucx_wait_request_mt(winfo->inflight_req,
|
||||
"opal_common_ucx_flush_nb");
|
||||
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
|
||||
MCA_COMMON_UCX_VERBOSE(1, "opal_common_ucx_wait_request failed: %d", rc);
|
||||
return rc;
|
||||
@ -283,13 +341,13 @@ static inline int _periodical_flush_nb(opal_common_ucx_wpmem_t *mem,
|
||||
winfo->inflight_ops[target] = 0;
|
||||
}
|
||||
|
||||
rc = opal_common_ucx_flush(winfo->endpoints[target], winfo->worker,
|
||||
OPAL_COMMON_UCX_FLUSH_NB_PREFERRED, scope,
|
||||
&winfo->inflight_req);
|
||||
rc = opal_common_ucx_winfo_flush(winfo, target, OPAL_COMMON_UCX_FLUSH_NB_PREFERRED,
|
||||
scope, &winfo->inflight_req);
|
||||
if(OPAL_UNLIKELY(OPAL_SUCCESS != rc)){
|
||||
MCA_COMMON_UCX_VERBOSE(1, "opal_common_ucx_flush failed: %d", rc);
|
||||
return rc;
|
||||
}
|
||||
((opal_common_ucx_request_t *)winfo->inflight_req)->winfo = winfo;
|
||||
} else if (OPAL_UNLIKELY(winfo->inflight_req != UCS_OK)) {
|
||||
int ret;
|
||||
do {
|
||||
@ -510,6 +568,7 @@ opal_common_ucx_wpmem_fetch_nb(opal_common_ucx_wpmem_t *mem,
|
||||
if (UCS_PTR_IS_PTR(req)) {
|
||||
req->ext_req = user_req_ptr;
|
||||
req->ext_cb = user_req_cb;
|
||||
req->winfo = winfo;
|
||||
} else {
|
||||
if (user_req_cb != NULL) {
|
||||
(*user_req_cb)(user_req_ptr);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user