1
1

The release of memory used by registration lists in rcaches must be delayed until the rcache lock is not held or deadlock

can occur ( fixes trac:2111 ).
Should not deregister memory with the rcache lock held otherwise a deadlock can occur as the lower
level infiniband libraries can free memory ( fixes trac:2110 )

cmr:v1.4

This commit was SVN r22683.

The following Trac tickets were found above:
  Ticket 2110 --> https://svn.open-mpi.org/trac/ompi/ticket/2110
  Ticket 2111 --> https://svn.open-mpi.org/trac/ompi/ticket/2111
Этот коммит содержится в:
Christopher Yeoh 2010-02-23 11:31:58 +00:00
родитель 322e73d8c4
Коммит c1dcf1c164
6 изменённых файлов: 56 добавлений и 9 удалений

Просмотреть файл

@ -119,16 +119,25 @@ void* mca_mpool_rdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
return addr; return addr;
} }
/* This function must be called with the rcache lock held */
static void do_unregistration_gc(struct mca_mpool_base_module_t *mpool) static void do_unregistration_gc(struct mca_mpool_base_module_t *mpool)
{ {
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
mca_mpool_base_registration_t *reg; mca_mpool_base_registration_t *reg;
do { do {
/* Remove registration from garbage collection list
before deregistering it */
reg = (mca_mpool_base_registration_t *) reg = (mca_mpool_base_registration_t *)
opal_list_remove_first(&mpool_rdma->gc_list); opal_list_remove_first(&mpool_rdma->gc_list);
dereg_mem(mpool, reg);
mpool->rcache->rcache_delete(mpool->rcache, reg); mpool->rcache->rcache_delete(mpool->rcache, reg);
/* Drop the rcache lock before calling dereg_mem as there
may be memory allocations */
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
dereg_mem(mpool, reg);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
(ompi_free_list_item_t*)reg); (ompi_free_list_item_t*)reg);
} while(!opal_list_is_empty(&mpool_rdma->gc_list)); } while(!opal_list_is_empty(&mpool_rdma->gc_list));
@ -254,11 +263,22 @@ int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
opal_list_get_last(&mpool_rdma->mru_list); opal_list_get_last(&mpool_rdma->mru_list);
if(opal_list_get_end(&mpool_rdma->mru_list) != if(opal_list_get_end(&mpool_rdma->mru_list) !=
(opal_list_item_t*)old_reg) { (opal_list_item_t*)old_reg) {
/* Remove the registration from the cache and list before
deregistering the memory */
mpool->rcache->rcache_delete(mpool->rcache, old_reg);
opal_list_remove_item(&mpool_rdma->mru_list,
(opal_list_item_t*)old_reg);
/* Drop the rcache lock while we deregister the memory */
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
rc = dereg_mem(mpool, old_reg); rc = dereg_mem(mpool, old_reg);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
/* This introduces a potential leak of registrations if
the deregistration fails to occur as we no longer have
a reference to it. Is this possible? */
if(OMPI_SUCCESS == rc) { if(OMPI_SUCCESS == rc) {
mpool->rcache->rcache_delete(mpool->rcache, old_reg);
opal_list_remove_item(&mpool_rdma->mru_list,
(opal_list_item_t*)old_reg);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
(ompi_free_list_item_t*)old_reg); (ompi_free_list_item_t*)old_reg);
mpool_rdma->stat_evicted++; mpool_rdma->stat_evicted++;
@ -379,10 +399,16 @@ int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
* on MRU list for future use */ * on MRU list for future use */
opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg); opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
} else { } else {
/* Remove from rcache first */
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
mpool->rcache->rcache_delete(mpool->rcache, reg);
/* Drop the rcache lock before deregistring the memory */
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
rc = dereg_mem(mpool, reg); rc = dereg_mem(mpool, reg);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
if(OMPI_SUCCESS == rc) { if(OMPI_SUCCESS == rc) {
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
mpool->rcache->rcache_delete(mpool->rcache, reg);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
(ompi_free_list_item_t*)reg); (ompi_free_list_item_t*)reg);
} }
@ -440,6 +466,7 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
mca_mpool_base_registration_t *reg; mca_mpool_base_registration_t *reg;
mca_mpool_base_registration_t *regs[RDMA_MPOOL_NREGS]; mca_mpool_base_registration_t *regs[RDMA_MPOOL_NREGS];
int reg_cnt, i; int reg_cnt, i;
int rc;
/* Statistic */ /* Statistic */
if(true == mca_mpool_rdma_component.print_stats) { if(true == mca_mpool_rdma_component.print_stats) {
@ -468,11 +495,20 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
(opal_list_item_t*)reg); (opal_list_item_t*)reg);
} }
if(dereg_mem(mpool, reg) != OMPI_SUCCESS) { /* Remove from rcache first */
mpool->rcache->rcache_delete(mpool->rcache, reg);
/* Drop lock before deregistering memory */
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
rc = dereg_mem(mpool, reg);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
if(rc != OMPI_SUCCESS) {
/* Potentially lose track of registrations
do we have to put it back? */
continue; continue;
} }
mpool->rcache->rcache_delete(mpool->rcache, reg);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
(ompi_free_list_item_t*)reg); (ompi_free_list_item_t*)reg);
} }

Просмотреть файл

@ -10,6 +10,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow

Просмотреть файл

@ -12,6 +12,7 @@
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
* *

Просмотреть файл

@ -12,6 +12,7 @@
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
* *

Просмотреть файл

@ -13,6 +13,7 @@
* *
* Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -36,12 +37,14 @@ static void mca_rcache_vma_construct(opal_object_t *object)
{ {
mca_rcache_vma_t *vma = (mca_rcache_vma_t*)object; mca_rcache_vma_t *vma = (mca_rcache_vma_t*)object;
OBJ_CONSTRUCT(&vma->reg_list, opal_list_t); OBJ_CONSTRUCT(&vma->reg_list, opal_list_t);
OBJ_CONSTRUCT(&vma->reg_delete_list, opal_list_t);
} }
static void mca_rcache_vma_destruct(opal_object_t *object) static void mca_rcache_vma_destruct(opal_object_t *object)
{ {
mca_rcache_vma_t *vma = (mca_rcache_vma_t*)object; mca_rcache_vma_t *vma = (mca_rcache_vma_t*)object;
OBJ_DESTRUCT(&vma->reg_list); OBJ_DESTRUCT(&vma->reg_list);
OBJ_DESTRUCT(&vma->reg_delete_list);
} }
OBJ_CLASS_INSTANCE(mca_rcache_vma_t, ompi_free_list_item_t, OBJ_CLASS_INSTANCE(mca_rcache_vma_t, ompi_free_list_item_t,
@ -127,6 +130,9 @@ void mca_rcache_vma_destroy(mca_rcache_vma_t *vma)
while ((item = opal_list_remove_first(&vma->reg_list))) while ((item = opal_list_remove_first(&vma->reg_list)))
OBJ_RELEASE(item); OBJ_RELEASE(item);
while ((item = opal_list_remove_first(&vma->reg_delete_list)))
OBJ_RELEASE(item);
OBJ_RELEASE(vma); OBJ_RELEASE(vma);
} }
@ -191,7 +197,7 @@ static inline void mca_rcache_vma_remove_reg(mca_rcache_vma_t *vma,
if(item->reg == reg) { if(item->reg == reg) {
opal_list_remove_item(&vma->reg_list, &item->super); opal_list_remove_item(&vma->reg_list, &item->super);
OBJ_RELEASE(item); opal_list_append(&vma->reg_delete_list, &item->super);
break; break;
} }
} }

Просмотреть файл

@ -12,6 +12,7 @@
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -49,6 +50,7 @@ struct mca_rcache_vma_t
uintptr_t start; /**< the base of the memory range */ uintptr_t start; /**< the base of the memory range */
uintptr_t end; /**< the bound of the memory range */ uintptr_t end; /**< the bound of the memory range */
opal_list_t reg_list; /**< list of regs on this vma */ opal_list_t reg_list; /**< list of regs on this vma */
opal_list_t reg_delete_list; /**< delayed deletions list for regs on this vma */
mca_rcache_vma_module_t *rcache; /**< pointer to rcache vma belongs to */ mca_rcache_vma_module_t *rcache; /**< pointer to rcache vma belongs to */
}; };
typedef struct mca_rcache_vma_t mca_rcache_vma_t; typedef struct mca_rcache_vma_t mca_rcache_vma_t;