usnic: fix segfault at finalize time
Without this commit, if you run IMB pingpong between two nodes with only one usnic selected (e.g., via `--mca btl_usnic_if_include usnic_0`) then the run will seem fine but will segfault at MPI_Finalize time. This behavior has happened since Cisco v1.6 git commit ec7ddf8, upstream trunk r29484, and upstream v1.7 r29507. Root cause was that the free list element was being used as the recv buffer instead of the data buffer associated with the element. So the reassembly code would stomp all over the free list element, which would cause the destructor to explode when the free list attempted to clean up all of its elements. This surprisingly did not cause any other problems until now. Reviewed-by: Reese Faucette <rfaucett@cisco.com> This commit was SVN r29593. The following SVN revision numbers were found above: r29484 --> open-mpi/ompi@a6ed232a10 r29507 --> open-mpi/ompi@790d269ce8
Этот коммит содержится в:
родитель
73a943492c
Коммит
1ed9b8ff43
@ -99,6 +99,9 @@ typedef struct ompi_btl_usnic_rx_frag_info_t {
|
||||
uint32_t rfi_bytes_left; /* bytes remaining to RX in fragment */
|
||||
char *rfi_data; /* pointer to assembly area */
|
||||
int rfi_data_pool; /* if 0, data malloced, else rx buf pool */
|
||||
ompi_free_list_item_t *rfi_fl_elt; /* free list elemement from buf pool
|
||||
(rfi_fl_elt->ptr==rfi_data) when
|
||||
rfi_data_pool is nonzero */
|
||||
} ompi_btl_usnic_rx_frag_info_t;
|
||||
|
||||
/**
|
||||
|
@ -2009,7 +2009,7 @@ int ompi_btl_usnic_module_init(ompi_btl_usnic_module_t *module)
|
||||
rc = ompi_free_list_init_new(&module->module_recv_buffers[i],
|
||||
1 << i,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(ompi_btl_usnic_large_send_frag_t),
|
||||
OBJ_CLASS(ompi_free_list_item_t),
|
||||
0, /* payload size */
|
||||
0, /* payload align */
|
||||
8,
|
||||
|
@ -218,7 +218,8 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
OMPI_FREE_LIST_GET_MT(&module->module_recv_buffers[pool],
|
||||
item);
|
||||
if (OPAL_LIKELY(NULL != item)) {
|
||||
fip->rfi_data = (char *)item;
|
||||
fip->rfi_fl_elt = item;
|
||||
fip->rfi_data = item->ptr;
|
||||
fip->rfi_data_pool = pool;
|
||||
}
|
||||
}
|
||||
@ -301,9 +302,10 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
if (0 == fip->rfi_data_pool) {
|
||||
free(fip->rfi_data);
|
||||
} else {
|
||||
assert(fip->rfi_fl_elt->ptr == fip->rfi_data);
|
||||
OMPI_FREE_LIST_RETURN_MT(
|
||||
&module->module_recv_buffers[fip->rfi_data_pool],
|
||||
(ompi_free_list_item_t *)fip->rfi_data);
|
||||
fip->rfi_fl_elt);
|
||||
}
|
||||
|
||||
#if MSGDEBUG1
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user