opal/common/ucx: Set of bug fixes in wpool
Signed-off-by: Xin Zhao <xinz@mellanox.com>
Этот коммит содержится в:
родитель
344bb641a1
Коммит
07cb4134be
@ -817,20 +817,19 @@ static void _common_ucx_tls_cleanup(_tlocal_table_t *tls)
|
|||||||
// Cleanup memory table
|
// Cleanup memory table
|
||||||
size = tls->mem_tbl_size;
|
size = tls->mem_tbl_size;
|
||||||
for (i = 0; i < size; i++) {
|
for (i = 0; i < size; i++) {
|
||||||
if (NULL == tls->mem_tbl[i]->gmem){
|
if (NULL != tls->mem_tbl[i]->gmem){
|
||||||
continue;
|
|
||||||
}
|
|
||||||
_tlocal_mem_record_cleanup(tls->mem_tbl[i]);
|
_tlocal_mem_record_cleanup(tls->mem_tbl[i]);
|
||||||
|
}
|
||||||
|
|
||||||
free(tls->mem_tbl[i]);
|
free(tls->mem_tbl[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cleanup ctx table
|
// Cleanup ctx table
|
||||||
size = tls->ctx_tbl_size;
|
size = tls->ctx_tbl_size;
|
||||||
for (i = 0; i < size; i++) {
|
for (i = 0; i < size; i++) {
|
||||||
if (NULL == tls->ctx_tbl[i]->gctx){
|
if (NULL != tls->ctx_tbl[i]->gctx){
|
||||||
continue;
|
|
||||||
}
|
|
||||||
_tlocal_ctx_record_cleanup(tls->ctx_tbl[i]);
|
_tlocal_ctx_record_cleanup(tls->ctx_tbl[i]);
|
||||||
|
}
|
||||||
free(tls->ctx_tbl[i]);
|
free(tls->ctx_tbl[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -918,7 +917,7 @@ static _tlocal_ctx_t *
|
|||||||
_tlocal_add_ctx(_tlocal_table_t *tls, opal_common_ucx_ctx_t *ctx)
|
_tlocal_add_ctx(_tlocal_table_t *tls, opal_common_ucx_ctx_t *ctx)
|
||||||
{
|
{
|
||||||
size_t i, free_idx = -1;
|
size_t i, free_idx = -1;
|
||||||
int rc;
|
int rc, found = 0;
|
||||||
|
|
||||||
/* Try to find available record in the TLS table
|
/* Try to find available record in the TLS table
|
||||||
* In parallel perform deferred cleanups */
|
* In parallel perform deferred cleanups */
|
||||||
@ -929,14 +928,15 @@ _tlocal_add_ctx(_tlocal_table_t *tls, opal_common_ucx_ctx_t *ctx)
|
|||||||
_tlocal_ctx_record_cleanup(tls->ctx_tbl[i]);
|
_tlocal_ctx_record_cleanup(tls->ctx_tbl[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((NULL != tls->ctx_tbl[i]->gctx) && (0 > free_idx)) {
|
if ((NULL == tls->ctx_tbl[i]->gctx) && !found) {
|
||||||
/* Found clean record */
|
/* Found clean record */
|
||||||
free_idx = i;
|
free_idx = i;
|
||||||
|
found = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if needed - extend the table */
|
/* if needed - extend the table */
|
||||||
if (0 > free_idx) {
|
if (!found) {
|
||||||
free_idx = tls->ctx_tbl_size;
|
free_idx = tls->ctx_tbl_size;
|
||||||
rc = _tlocal_tls_ctxtbl_extend(tls, 4);
|
rc = _tlocal_tls_ctxtbl_extend(tls, 4);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
@ -1025,15 +1025,6 @@ _tlocal_mem_record_cleanup(_tlocal_mem_t *mem_rec)
|
|||||||
size_t i;
|
size_t i;
|
||||||
WPOOL_DBG_OUT(_dbg_tls || _dbg_mem, "record=%p, is_freed = %d\n",
|
WPOOL_DBG_OUT(_dbg_tls || _dbg_mem, "record=%p, is_freed = %d\n",
|
||||||
(void *)mem_rec, mem_rec->gmem->released);
|
(void *)mem_rec, mem_rec->gmem->released);
|
||||||
if (mem_rec->gmem->released) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* Remove myself from the memory context structure
|
|
||||||
* This may result in context release as we are using
|
|
||||||
* delayed cleanup */
|
|
||||||
_common_ucx_mem_signout(mem_rec->gmem);
|
|
||||||
WPOOL_DBG_OUT(_dbg_tls || _dbg_mem, "gmem = %p mem_rec = %p\n",
|
|
||||||
(void *)mem_rec->gmem, (void *)mem_rec);
|
|
||||||
|
|
||||||
for(i = 0; i < mem_rec->gmem->ctx->comm_size; i++) {
|
for(i = 0; i < mem_rec->gmem->ctx->comm_size; i++) {
|
||||||
if (mem_rec->mem->rkeys[i]) {
|
if (mem_rec->mem->rkeys[i]) {
|
||||||
@ -1044,6 +1035,13 @@ _tlocal_mem_record_cleanup(_tlocal_mem_t *mem_rec)
|
|||||||
}
|
}
|
||||||
free(mem_rec->mem->rkeys);
|
free(mem_rec->mem->rkeys);
|
||||||
|
|
||||||
|
/* Remove myself from the memory context structure
|
||||||
|
* This may result in context release as we are using
|
||||||
|
* delayed cleanup */
|
||||||
|
_common_ucx_mem_signout(mem_rec->gmem);
|
||||||
|
WPOOL_DBG_OUT(_dbg_tls || _dbg_mem, "gmem = %p mem_rec = %p\n",
|
||||||
|
(void *)mem_rec->gmem, (void *)mem_rec);
|
||||||
|
|
||||||
/* Release fast-path pointers */
|
/* Release fast-path pointers */
|
||||||
if (NULL != mem_rec->mem_tls_ptr) {
|
if (NULL != mem_rec->mem_tls_ptr) {
|
||||||
free(mem_rec->mem_tls_ptr);
|
free(mem_rec->mem_tls_ptr);
|
||||||
@ -1059,24 +1057,24 @@ static _tlocal_mem_t *_tlocal_add_mem(_tlocal_table_t *tls,
|
|||||||
{
|
{
|
||||||
size_t i, free_idx = -1;
|
size_t i, free_idx = -1;
|
||||||
_tlocal_ctx_t *ctx_rec = NULL;
|
_tlocal_ctx_t *ctx_rec = NULL;
|
||||||
int rc = OPAL_SUCCESS;
|
int rc = OPAL_SUCCESS, found = 0;
|
||||||
|
|
||||||
/* Try to find available spot in the table */
|
/* Try to find available spot in the table */
|
||||||
for (i=0; i<tls->mem_tbl_size; i++) {
|
for (i=0; i<tls->mem_tbl_size; i++) {
|
||||||
if (NULL == tls->mem_tbl[i]->gmem) {
|
if (NULL != tls->mem_tbl[i]->gmem) {
|
||||||
if (tls->mem_tbl[i]->gmem->released) {
|
if (tls->mem_tbl[i]->gmem->released) {
|
||||||
/* Found a dirty record. Need to clean it first */
|
/* Found a dirty record. Need to clean it first */
|
||||||
_tlocal_mem_record_cleanup(tls->mem_tbl[i]);
|
_tlocal_mem_record_cleanup(tls->mem_tbl[i]);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((NULL == tls->mem_tbl[i]->gmem) && (0 > free_idx)) {
|
if ((NULL == tls->mem_tbl[i]->gmem) && !found) {
|
||||||
/* Found a clear record */
|
/* Found a clear record */
|
||||||
free_idx = i;
|
free_idx = i;
|
||||||
|
found = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (0 > free_idx){
|
if (!found){
|
||||||
free_idx = tls->mem_tbl_size;
|
free_idx = tls->mem_tbl_size;
|
||||||
rc = _tlocal_tls_memtbl_extend(tls, 4);
|
rc = _tlocal_tls_memtbl_extend(tls, 4);
|
||||||
if (rc != OPAL_SUCCESS) {
|
if (rc != OPAL_SUCCESS) {
|
||||||
|
@ -110,7 +110,7 @@ typedef int (*opal_common_ucx_exchange_func_t)(void *my_info, size_t my_info_len
|
|||||||
void *metadata);
|
void *metadata);
|
||||||
|
|
||||||
/* For developer use only */
|
/* For developer use only */
|
||||||
#define OPAL_COMMON_UCX_WPOOL_DBG
|
//#define OPAL_COMMON_UCX_WPOOL_DBG
|
||||||
#ifdef OPAL_COMMON_UCX_WPOOL_DBG
|
#ifdef OPAL_COMMON_UCX_WPOOL_DBG
|
||||||
extern __thread FILE *tls_pf;
|
extern __thread FILE *tls_pf;
|
||||||
extern __thread int initialized;
|
extern __thread int initialized;
|
||||||
@ -168,7 +168,7 @@ static inline void opal_common_ucx_wpool_dbg_init(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define DBG_OUT(...)
|
#define WPOOL_DBG_OUT(...)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user