1
1

usnic: fix module_recv_buffers perf regression

Cisco v1.6 git commit 913ec6c and upstream trunk r29593 (segfault fix)
introduced a performance regression by inadvertently disabling the
`module_recv_buffers` functionality.  With those changes in place, the
`btl_usnic_recv.c` logic would end up mallocing a buffer that should
have otherwise come from a `module_recv_buffers` pool.  It also resulted
in a small, bounded memory leak (128 buffers at each power-of-two size
interval).

The new version just places the buffer after the free list item with a
flexible array member.  I bumped the pool to allocate all 128 elements
up front because the deferred allocation was modestly impacting IMB
Sendrecv performance at a few sizes.

Reviewed-by: Reese Faucette <rfaucett@cisco.com>

This commit was SVN r29631.

The following SVN revision numbers were found above:
  r29593 --> open-mpi/ompi@1ed9b8ff43
Этот коммит содержится в:
Dave Goodell 2013-11-07 01:27:31 +00:00
родитель 0df678f1ea
Коммит 82db913490
5 изменённых файлов: 33 добавлений и 13 удалений

Просмотреть файл

@ -100,8 +100,7 @@ typedef struct ompi_btl_usnic_rx_frag_info_t {
char *rfi_data; /* pointer to assembly area */
int rfi_data_pool; /* if 0, data malloced, else rx buf pool */
ompi_free_list_item_t *rfi_fl_elt; /* free list elemement from buf pool
(rfi_fl_elt->ptr==rfi_data) when
rfi_data_pool is nonzero */
when rfi_data_pool is nonzero */
} ompi_btl_usnic_rx_frag_info_t;
/**

Просмотреть файл

@ -305,3 +305,8 @@ OBJ_CLASS_INSTANCE(ompi_btl_usnic_put_dest_frag_t,
ompi_btl_usnic_frag_t,
put_dest_frag_constructor,
put_dest_frag_destructor);
OBJ_CLASS_INSTANCE(ompi_btl_usnic_rx_buf_t,
ompi_free_list_item_t,
NULL,
NULL);

Просмотреть файл

@ -319,6 +319,18 @@ typedef struct ompi_btl_usnic_small_send_frag_t {
*/
typedef ompi_btl_usnic_frag_t ompi_btl_usnic_put_dest_frag_t;
/**
* A simple buffer that can be enqueued on an ompi_free_list_t that is intended
* to be used for fragment reassembly. Nominally the free list code supports
* this via the rb_super.ptr field, but that field is only allocated and
* non-NULL if an mpool is used, and we don't need this reassembly memory to be
* registered.
*/
typedef struct ompi_btl_usnic_rx_buf_t {
ompi_free_list_item_t rb_super;
char buf[1]; /* flexible array member for frag reassembly */
} ompi_btl_usnic_rx_buf_t;
OBJ_CLASS_DECLARATION(ompi_btl_usnic_send_frag_t);
OBJ_CLASS_DECLARATION(ompi_btl_usnic_small_send_frag_t);
OBJ_CLASS_DECLARATION(ompi_btl_usnic_large_send_frag_t);
@ -329,6 +341,8 @@ OBJ_CLASS_DECLARATION(ompi_btl_usnic_frag_segment_t);
OBJ_CLASS_DECLARATION(ompi_btl_usnic_chunk_segment_t);
OBJ_CLASS_DECLARATION(ompi_btl_usnic_recv_segment_t);
OBJ_CLASS_DECLARATION(ompi_btl_usnic_rx_buf_t);
typedef ompi_btl_usnic_send_segment_t ompi_btl_usnic_ack_segment_t;
OBJ_CLASS_DECLARATION(ompi_btl_usnic_ack_segment_t);

Просмотреть файл

@ -2006,16 +2006,17 @@ int ompi_btl_usnic_module_init(ompi_btl_usnic_module_t *module)
assert(module->module_recv_buffers != NULL);
for (i=module->first_pool; i<=module->last_pool; ++i) {
OBJ_CONSTRUCT(&module->module_recv_buffers[i], ompi_free_list_t);
size_t elt_size = sizeof(ompi_btl_usnic_rx_buf_t) - 1 + (1 << i);
rc = ompi_free_list_init_new(&module->module_recv_buffers[i],
1 << i,
elt_size,
opal_cache_line_size,
OBJ_CLASS(ompi_free_list_item_t),
0, /* payload size */
0, /* payload align */
8,
128,
8,
NULL);
OBJ_CLASS(ompi_btl_usnic_rx_buf_t),
0, /* payload size */
0, /* payload align */
128, /* init elts to alloc */
128, /* max elts to alloc */
128, /* num elts per alloc */
NULL /* mpool */);
assert(OMPI_SUCCESS == rc);
}

Просмотреть файл

@ -215,11 +215,13 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
if (pool >= module->first_pool &&
pool <= module->last_pool) {
ompi_free_list_item_t* item;
ompi_btl_usnic_rx_buf_t *rx_buf;
OMPI_FREE_LIST_GET_MT(&module->module_recv_buffers[pool],
item);
if (OPAL_LIKELY(NULL != item)) {
rx_buf = (ompi_btl_usnic_rx_buf_t *)item;
if (OPAL_LIKELY(NULL != rx_buf)) {
fip->rfi_fl_elt = item;
fip->rfi_data = item->ptr;
fip->rfi_data = rx_buf->buf;
fip->rfi_data_pool = pool;
}
}
@ -302,7 +304,6 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
if (0 == fip->rfi_data_pool) {
free(fip->rfi_data);
} else {
assert(fip->rfi_fl_elt->ptr == fip->rfi_data);
OMPI_FREE_LIST_RETURN_MT(
&module->module_recv_buffers[fip->rfi_data_pool],
fip->rfi_fl_elt);