1
1

usnic: fix segfault at finalize time

Without this commit, if you run IMB pingpong between two nodes with only
one usnic selected (e.g., via `--mca btl_usnic_if_include usnic_0`) then
the run will seem fine but will segfault at MPI_Finalize time.

This behavior has happened since Cisco v1.6 git commit ec7ddf8, upstream
trunk r29484, and upstream v1.7 r29507.

Root cause was that the free list element was being used as the recv
buffer instead of the data buffer associated with the element.  So the
reassembly code would stomp all over the free list element, which would
cause the destructor to explode when the free list attempted to clean up
all of its elements.  This surprisingly did not cause any other problems
until now.

Reviewed-by: Reese Faucette <rfaucett@cisco.com>

This commit was SVN r29593.

The following SVN revision numbers were found above:
  r29484 --> open-mpi/ompi@a6ed232a10
  r29507 --> open-mpi/ompi@790d269ce8
Этот коммит содержится в:
Dave Goodell 2013-11-04 22:52:14 +00:00
родитель 73a943492c
Коммит 1ed9b8ff43
3 изменённых файлов: 8 добавлений и 3 удалений

Просмотреть файл

@ -99,6 +99,9 @@ typedef struct ompi_btl_usnic_rx_frag_info_t {
uint32_t rfi_bytes_left; /* bytes remaining to RX in fragment */
char *rfi_data; /* pointer to assembly area */
int rfi_data_pool; /* if 0, data malloced, else rx buf pool */
ompi_free_list_item_t *rfi_fl_elt; /* free list elemement from buf pool
(rfi_fl_elt->ptr==rfi_data) when
rfi_data_pool is nonzero */
} ompi_btl_usnic_rx_frag_info_t;
/**

Просмотреть файл

@ -2009,7 +2009,7 @@ int ompi_btl_usnic_module_init(ompi_btl_usnic_module_t *module)
rc = ompi_free_list_init_new(&module->module_recv_buffers[i],
1 << i,
opal_cache_line_size,
OBJ_CLASS(ompi_btl_usnic_large_send_frag_t),
OBJ_CLASS(ompi_free_list_item_t),
0, /* payload size */
0, /* payload align */
8,

Просмотреть файл

@ -218,7 +218,8 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
OMPI_FREE_LIST_GET_MT(&module->module_recv_buffers[pool],
item);
if (OPAL_LIKELY(NULL != item)) {
fip->rfi_data = (char *)item;
fip->rfi_fl_elt = item;
fip->rfi_data = item->ptr;
fip->rfi_data_pool = pool;
}
}
@ -301,9 +302,10 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
if (0 == fip->rfi_data_pool) {
free(fip->rfi_data);
} else {
assert(fip->rfi_fl_elt->ptr == fip->rfi_data);
OMPI_FREE_LIST_RETURN_MT(
&module->module_recv_buffers[fip->rfi_data_pool],
(ompi_free_list_item_t *)fip->rfi_data);
fip->rfi_fl_elt);
}
#if MSGDEBUG1