opal/free_list: fix race condition
There was a race condition in opal_free_list_get. Code throughout the Open MPI codebase was assuming that a NULL return from this function was due to an out-of-memory condition. In some cases this can lead to a fatal condition (MPI_Irecv and MPI_Isend in pml/ob1 for example). Before this commit opal_free_list_get_mt looked like this: ```c static inline opal_free_list_item_t *opal_free_list_get_mt (opal_free_list_t *flist) { opal_free_list_item_t *item = (opal_free_list_item_t*) opal_lifo_pop_atomic (&flist->super); if (OPAL_UNLIKELY(NULL == item)) { opal_mutex_lock (&flist->fl_lock); opal_free_list_grow_st (flist, flist->fl_num_per_alloc); opal_mutex_unlock (&flist->fl_lock); item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&flist->super); } return item; } ``` The problem is in a multithreaded environment is *is* possible for the free list to be grown successfully but the thread calling opal_free_list_get_mt to be left without an item. The happens if between the calls to opal_lifo_push_atomic in opal_free_list_grow_st and the call to opal_lifo_pop_atomic other threads pop all the items added to the free list. This commit fixes the issue by ensuring the thread that successfully grew the free list **always** gets a free list item. Fixes #2921 Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
d1881519f9
Коммит
5c770a7bec
@ -13,7 +13,7 @@
|
|||||||
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
|
||||||
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||||
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -155,13 +155,13 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
|
|||||||
flist->ctx = ctx;
|
flist->ctx = ctx;
|
||||||
|
|
||||||
if (num_elements_to_alloc) {
|
if (num_elements_to_alloc) {
|
||||||
return opal_free_list_grow_st (flist, num_elements_to_alloc);
|
return opal_free_list_grow_st (flist, num_elements_to_alloc, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
|
int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements, opal_free_list_item_t **item_out)
|
||||||
{
|
{
|
||||||
unsigned char *ptr, *payload_ptr = NULL;
|
unsigned char *ptr, *payload_ptr = NULL;
|
||||||
opal_free_list_memory_t *alloc_ptr;
|
opal_free_list_memory_t *alloc_ptr;
|
||||||
@ -263,10 +263,16 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
|
|||||||
/* NTH: in case the free list may be accessed from multiple threads
|
/* NTH: in case the free list may be accessed from multiple threads
|
||||||
* use the atomic lifo push. The overhead is small compared to the
|
* use the atomic lifo push. The overhead is small compared to the
|
||||||
* overall overhead of opal_free_list_grow(). */
|
* overall overhead of opal_free_list_grow(). */
|
||||||
|
if (item_out && 0 == i) {
|
||||||
|
/* ensure the thread that is growing the free list always gets an item
|
||||||
|
* if one is available */
|
||||||
|
*item_out = item;
|
||||||
|
} else {
|
||||||
opal_lifo_push_atomic (&flist->super, &item->super);
|
opal_lifo_push_atomic (&flist->super, &item->super);
|
||||||
|
}
|
||||||
|
|
||||||
ptr += head_size;
|
ptr += head_size;
|
||||||
payload_ptr += elem_size;
|
payload_ptr += elem_size;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OPAL_SUCCESS != rc && 0 == num_elements) {
|
if (OPAL_SUCCESS != rc && 0 == num_elements) {
|
||||||
@ -298,7 +304,7 @@ int opal_free_list_resize_mt(opal_free_list_t *flist, size_t size)
|
|||||||
|
|
||||||
opal_mutex_lock (&flist->fl_lock);
|
opal_mutex_lock (&flist->fl_lock);
|
||||||
do {
|
do {
|
||||||
ret = opal_free_list_grow_st (flist, flist->fl_num_per_alloc);
|
ret = opal_free_list_grow_st (flist, flist->fl_num_per_alloc, NULL);
|
||||||
if (OPAL_SUCCESS != ret) {
|
if (OPAL_SUCCESS != ret) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -146,6 +146,7 @@ OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list,
|
|||||||
*
|
*
|
||||||
* @param flist (IN) Free list to grow
|
* @param flist (IN) Free list to grow
|
||||||
* @param num_elements (IN) Number of elements to add
|
* @param num_elements (IN) Number of elements to add
|
||||||
|
* @param item_out (OUT) Location to store new free list item (can be NULL)
|
||||||
*
|
*
|
||||||
* @returns OPAL_SUCCESS if any elements were added
|
* @returns OPAL_SUCCESS if any elements were added
|
||||||
* @returns OPAL_ERR_OUT_OF_RESOURCE if no elements could be added
|
* @returns OPAL_ERR_OUT_OF_RESOURCE if no elements could be added
|
||||||
@ -155,8 +156,14 @@ OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list,
|
|||||||
* that may be accessed by multiple threads simultaneously. Note: this is an
|
* that may be accessed by multiple threads simultaneously. Note: this is an
|
||||||
* internal function that will be used when needed by opal_free_list_get* and
|
* internal function that will be used when needed by opal_free_list_get* and
|
||||||
* opal_free_list_wait*.
|
* opal_free_list_wait*.
|
||||||
|
*
|
||||||
|
* The item_out parameter can be used to ensure that the thread calling this
|
||||||
|
* function always gets a free list item if the list is successfully grown.
|
||||||
|
* This eliminates a race condition with code that simply calls free_list_get
|
||||||
|
* and assumes NULL is an out of memory condition (which it wasn't necessarily
|
||||||
|
* before this parameter was added).
|
||||||
*/
|
*/
|
||||||
OPAL_DECLSPEC int opal_free_list_grow_st (opal_free_list_t *flist, size_t num_elements);
|
OPAL_DECLSPEC int opal_free_list_grow_st (opal_free_list_t *flist, size_t num_elements, opal_free_list_item_t **item_out);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Grow the free list to be at least size elements.
|
* Grow the free list to be at least size elements.
|
||||||
@ -195,9 +202,8 @@ static inline opal_free_list_item_t *opal_free_list_get_mt (opal_free_list_t *fl
|
|||||||
|
|
||||||
if (OPAL_UNLIKELY(NULL == item)) {
|
if (OPAL_UNLIKELY(NULL == item)) {
|
||||||
opal_mutex_lock (&flist->fl_lock);
|
opal_mutex_lock (&flist->fl_lock);
|
||||||
opal_free_list_grow_st (flist, flist->fl_num_per_alloc);
|
opal_free_list_grow_st (flist, flist->fl_num_per_alloc, &item);
|
||||||
opal_mutex_unlock (&flist->fl_lock);
|
opal_mutex_unlock (&flist->fl_lock);
|
||||||
item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&flist->super);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return item;
|
return item;
|
||||||
@ -209,8 +215,7 @@ static inline opal_free_list_item_t *opal_free_list_get_st (opal_free_list_t *fl
|
|||||||
(opal_free_list_item_t*) opal_lifo_pop_st (&flist->super);
|
(opal_free_list_item_t*) opal_lifo_pop_st (&flist->super);
|
||||||
|
|
||||||
if (OPAL_UNLIKELY(NULL == item)) {
|
if (OPAL_UNLIKELY(NULL == item)) {
|
||||||
opal_free_list_grow_st (flist, flist->fl_num_per_alloc);
|
opal_free_list_grow_st (flist, flist->fl_num_per_alloc, &item);
|
||||||
item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&flist->super);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return item;
|
return item;
|
||||||
@ -253,7 +258,7 @@ static inline opal_free_list_item_t *opal_free_list_wait_mt (opal_free_list_t *f
|
|||||||
while (NULL == item) {
|
while (NULL == item) {
|
||||||
if (!opal_mutex_trylock (&fl->fl_lock)) {
|
if (!opal_mutex_trylock (&fl->fl_lock)) {
|
||||||
if (fl->fl_max_to_alloc <= fl->fl_num_allocated ||
|
if (fl->fl_max_to_alloc <= fl->fl_num_allocated ||
|
||||||
OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc)) {
|
OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc, &item)) {
|
||||||
fl->fl_num_waiting++;
|
fl->fl_num_waiting++;
|
||||||
opal_condition_wait (&fl->fl_condition, &fl->fl_lock);
|
opal_condition_wait (&fl->fl_condition, &fl->fl_lock);
|
||||||
fl->fl_num_waiting--;
|
fl->fl_num_waiting--;
|
||||||
@ -274,8 +279,10 @@ static inline opal_free_list_item_t *opal_free_list_wait_mt (opal_free_list_t *f
|
|||||||
opal_mutex_lock (&fl->fl_lock);
|
opal_mutex_lock (&fl->fl_lock);
|
||||||
}
|
}
|
||||||
opal_mutex_unlock (&fl->fl_lock);
|
opal_mutex_unlock (&fl->fl_lock);
|
||||||
|
if (NULL == item) {
|
||||||
item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&fl->super);
|
item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&fl->super);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return item;
|
return item;
|
||||||
}
|
}
|
||||||
@ -287,13 +294,14 @@ static inline opal_free_list_item_t *opal_free_list_wait_st (opal_free_list_t *f
|
|||||||
|
|
||||||
while (NULL == item) {
|
while (NULL == item) {
|
||||||
if (fl->fl_max_to_alloc <= fl->fl_num_allocated ||
|
if (fl->fl_max_to_alloc <= fl->fl_num_allocated ||
|
||||||
OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc)) {
|
OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc, &item)) {
|
||||||
/* try to make progress */
|
/* try to make progress */
|
||||||
opal_progress ();
|
opal_progress ();
|
||||||
}
|
}
|
||||||
|
if (NULL == item) {
|
||||||
item = (opal_free_list_item_t *) opal_lifo_pop (&fl->super);
|
item = (opal_free_list_item_t *) opal_lifo_pop (&fl->super);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return item;
|
return item;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user