usnic: handle FI_MSG_PREFIX differences libfabric v1.0.0->v1.1.0
In libfabric v1.0.0 (i.e., API v1.0), the usnic provider handled FI_MSG_PREFIX inconsistently between sends and receives. This has been fixed in libfabric v1.1.0 (i.e., API v1.1): FI_MSG_PREFIX is handled consistently for both sends and receives. Run-time detect which libfabric we are running with and adapt behavior appropriately.
Этот коммит содержится в:
родитель
ddd0de6cfc
Коммит
f265358fbe
@ -220,6 +220,12 @@ typedef struct opal_btl_usnic_component_t {
|
|||||||
API >=v1.1, the usnic provider returned 1 upon success. */
|
API >=v1.1, the usnic provider returned 1 upon success. */
|
||||||
ssize_t cq_readerr_success_value;
|
ssize_t cq_readerr_success_value;
|
||||||
ssize_t cq_readerr_try_again_value;
|
ssize_t cq_readerr_try_again_value;
|
||||||
|
|
||||||
|
/** Offset into the send buffer where the payload will go. For
|
||||||
|
libfabric v1.0.0 / API v1.0, this is 0. For libfabric >=v1.1
|
||||||
|
/ API >=v1.1, this is the endpoint.msg_prefix_size (i.e.,
|
||||||
|
component.transport_header_len). */
|
||||||
|
uint32_t prefix_send_offset;
|
||||||
} opal_btl_usnic_component_t;
|
} opal_btl_usnic_component_t;
|
||||||
|
|
||||||
OPAL_MODULE_DECLSPEC extern opal_btl_usnic_component_t mca_btl_usnic_component;
|
OPAL_MODULE_DECLSPEC extern opal_btl_usnic_component_t mca_btl_usnic_component;
|
||||||
|
@ -163,6 +163,7 @@ static int usnic_component_open(void)
|
|||||||
mca_btl_usnic_component.usnic_all_modules = NULL;
|
mca_btl_usnic_component.usnic_all_modules = NULL;
|
||||||
mca_btl_usnic_component.usnic_active_modules = NULL;
|
mca_btl_usnic_component.usnic_active_modules = NULL;
|
||||||
mca_btl_usnic_component.transport_header_len = -1;
|
mca_btl_usnic_component.transport_header_len = -1;
|
||||||
|
mca_btl_usnic_component.prefix_send_offset = 0;
|
||||||
|
|
||||||
/* initialize objects */
|
/* initialize objects */
|
||||||
OBJ_CONSTRUCT(&mca_btl_usnic_component.usnic_procs, opal_list_t);
|
OBJ_CONSTRUCT(&mca_btl_usnic_component.usnic_procs, opal_list_t);
|
||||||
@ -630,7 +631,29 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
|||||||
hints.ep_attr = &ep_attr;
|
hints.ep_attr = &ep_attr;
|
||||||
hints.fabric_attr = &fabric_attr;
|
hints.fabric_attr = &fabric_attr;
|
||||||
|
|
||||||
ret = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, &hints, &info_list);
|
/* This code understands libfabric API v1.0 and v1.1. Even if we
|
||||||
|
were compiled with libfabric API v1.0, we still want to request
|
||||||
|
v1.1 -- here's why:
|
||||||
|
|
||||||
|
- In libfabric v1.0.0 (i.e., API v1.0), the usnic provider did
|
||||||
|
not check the value of the "version" parameter passed into
|
||||||
|
fi_getinfo()
|
||||||
|
|
||||||
|
- If you pass FI_VERSION(1,0) to libfabric v1.1.0 (i.e., API
|
||||||
|
v1.1), the usnic provider will disable FI_MSG_PREFIX support
|
||||||
|
(on the assumption that the application will not handle
|
||||||
|
FI_MSG_PREFIX properly). This can happen if you compile OMPI
|
||||||
|
against libfabric v1.0.0 (i.e., API v1.0) and run OMPI
|
||||||
|
against libfabric v1.1.0 (i.e., API v1.1).
|
||||||
|
|
||||||
|
So never request API v1.0 -- always request a minimum of
|
||||||
|
v1.1. */
|
||||||
|
uint32_t libfabric_api;
|
||||||
|
libfabric_api = FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION);
|
||||||
|
if (libfabric_api == FI_VERSION(1, 0)) {
|
||||||
|
libfabric_api = FI_VERSION(1, 1);
|
||||||
|
}
|
||||||
|
ret = fi_getinfo(libfabric_api, NULL, 0, 0, &hints, &info_list);
|
||||||
if (0 != ret) {
|
if (0 != ret) {
|
||||||
opal_output_verbose(5, USNIC_OUT,
|
opal_output_verbose(5, USNIC_OUT,
|
||||||
"btl:usnic: disqualifiying myself due to fi_getinfo failure: %s (%d)", strerror(-ret), ret);
|
"btl:usnic: disqualifiying myself due to fi_getinfo failure: %s (%d)", strerror(-ret), ret);
|
||||||
@ -671,8 +694,9 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
|||||||
The ambiguities were clarified in libfabric v1.1.0 (i.e., API
|
The ambiguities were clarified in libfabric v1.1.0 (i.e., API
|
||||||
v1.1); the usnic provider returned 1 from fi_cq_readerr() upon
|
v1.1); the usnic provider returned 1 from fi_cq_readerr() upon
|
||||||
success.
|
success.
|
||||||
*/
|
|
||||||
uint32_t libfabric_api;
|
So query to see what version of the libfabric API we are
|
||||||
|
running with, and adapt accordingly. */
|
||||||
libfabric_api = fi_version();
|
libfabric_api = fi_version();
|
||||||
if (1 == FI_MAJOR(libfabric_api) &&
|
if (1 == FI_MAJOR(libfabric_api) &&
|
||||||
0 == FI_MINOR(libfabric_api)) {
|
0 == FI_MINOR(libfabric_api)) {
|
||||||
|
@ -30,23 +30,22 @@
|
|||||||
#include "btl_usnic_ack.h"
|
#include "btl_usnic_ack.h"
|
||||||
|
|
||||||
static void
|
static void
|
||||||
common_send_seg_helper(
|
common_send_seg_helper(opal_btl_usnic_send_segment_t *seg)
|
||||||
opal_btl_usnic_send_segment_t *seg,
|
|
||||||
int offset)
|
|
||||||
{
|
{
|
||||||
opal_btl_usnic_segment_t *bseg;
|
opal_btl_usnic_segment_t *bseg;
|
||||||
|
|
||||||
bseg = &seg->ss_base;
|
/* send ptr for fi_send(). ss_len will be filled in right before
|
||||||
|
the actual send. */
|
||||||
bseg->us_btl_header = (opal_btl_usnic_btl_header_t *)
|
seg->ss_ptr = (uint8_t *) seg->ss_base.us_list.ptr;
|
||||||
(((char*) bseg->us_list.ptr) + offset);
|
|
||||||
bseg->us_btl_header->sender = mca_btl_usnic_component.my_hashed_rte_name;
|
|
||||||
|
|
||||||
seg->ss_send_posted = 0;
|
seg->ss_send_posted = 0;
|
||||||
seg->ss_ack_pending = false;
|
seg->ss_ack_pending = false;
|
||||||
|
|
||||||
/* send ptr, len will be filled in just before send */
|
/* Offset the BTL header by (prefix_send_offset) bytes into the
|
||||||
seg->ss_ptr = (uint8_t *)bseg->us_btl_header;
|
raw buffer */
|
||||||
|
bseg = &seg->ss_base;
|
||||||
|
bseg->us_btl_header = (opal_btl_usnic_btl_header_t *)
|
||||||
|
(seg->ss_ptr + mca_btl_usnic_component.prefix_send_offset);
|
||||||
|
bseg->us_btl_header->sender = mca_btl_usnic_component.my_hashed_rte_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -59,7 +58,7 @@ chunk_seg_constructor(
|
|||||||
bseg->us_type = OPAL_BTL_USNIC_SEG_CHUNK;
|
bseg->us_type = OPAL_BTL_USNIC_SEG_CHUNK;
|
||||||
|
|
||||||
/* some more common initializaiton */
|
/* some more common initializaiton */
|
||||||
common_send_seg_helper(seg, mca_btl_usnic_component.transport_header_len);
|
common_send_seg_helper(seg);
|
||||||
|
|
||||||
/* payload starts next byte beyond BTL chunk header */
|
/* payload starts next byte beyond BTL chunk header */
|
||||||
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_chunk_header + 1);
|
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_chunk_header + 1);
|
||||||
@ -77,7 +76,7 @@ frag_seg_constructor(
|
|||||||
bseg->us_type = OPAL_BTL_USNIC_SEG_FRAG;
|
bseg->us_type = OPAL_BTL_USNIC_SEG_FRAG;
|
||||||
|
|
||||||
/* some more common initializaiton */
|
/* some more common initializaiton */
|
||||||
common_send_seg_helper(seg, mca_btl_usnic_component.transport_header_len);
|
common_send_seg_helper(seg);
|
||||||
|
|
||||||
/* payload starts next byte beyond BTL header */
|
/* payload starts next byte beyond BTL header */
|
||||||
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_header + 1);
|
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_header + 1);
|
||||||
@ -95,7 +94,7 @@ ack_seg_constructor(
|
|||||||
bseg->us_type = OPAL_BTL_USNIC_SEG_ACK;
|
bseg->us_type = OPAL_BTL_USNIC_SEG_ACK;
|
||||||
|
|
||||||
/* some more common initializaiton */
|
/* some more common initializaiton */
|
||||||
common_send_seg_helper(ack, mca_btl_usnic_component.transport_header_len);
|
common_send_seg_helper(ack);
|
||||||
|
|
||||||
/* ACK value embedded in BTL header */
|
/* ACK value embedded in BTL header */
|
||||||
bseg->us_btl_header->payload_type = OPAL_BTL_USNIC_PAYLOAD_TYPE_ACK;
|
bseg->us_btl_header->payload_type = OPAL_BTL_USNIC_PAYLOAD_TYPE_ACK;
|
||||||
|
@ -1421,7 +1421,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
|
|||||||
opal_process_info.my_local_rank);
|
opal_process_info.my_local_rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, hint, &channel->info);
|
rc = fi_getinfo(FI_VERSION(1, 1), NULL, 0, 0, hint, &channel->info);
|
||||||
fi_freeinfo(hint);
|
fi_freeinfo(hint);
|
||||||
if (0 != rc) {
|
if (0 != rc) {
|
||||||
opal_show_help("help-mpi-btl-usnic.txt",
|
opal_show_help("help-mpi-btl-usnic.txt",
|
||||||
@ -1634,6 +1634,9 @@ static int init_one_channel(opal_btl_usnic_module_t *module,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(channel->info->ep_attr->msg_prefix_size ==
|
||||||
|
(uint32_t) mca_btl_usnic_component.transport_header_len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize pool of receive segments. Round MTU up to cache
|
* Initialize pool of receive segments. Round MTU up to cache
|
||||||
* line size so that each segment is guaranteed to start on a
|
* line size so that each segment is guaranteed to start on a
|
||||||
@ -1777,6 +1780,33 @@ static void init_find_transport_header_len(opal_btl_usnic_module_t *module)
|
|||||||
module->fabric_info->ep_attr->msg_prefix_size;
|
module->fabric_info->ep_attr->msg_prefix_size;
|
||||||
mca_btl_usnic_component.transport_protocol =
|
mca_btl_usnic_component.transport_protocol =
|
||||||
module->fabric_info->ep_attr->protocol;
|
module->fabric_info->ep_attr->protocol;
|
||||||
|
|
||||||
|
/* The usnic provider in libfabric v1.0.0 (i.e., API v1.0) treated
|
||||||
|
FI_MSG_PREFIX inconsistently between senders and receivers. It
|
||||||
|
was corrected in libfabric v1.1.0 (i.e., API v1.1), meaning
|
||||||
|
that FI_MSG_PREFIX is treated consistently between senders and
|
||||||
|
receivers.
|
||||||
|
|
||||||
|
So check what version of the libfabric API we have, and setup
|
||||||
|
to use the "old" (inconsistent) MSG_PREFIX behavior, or the
|
||||||
|
"new" MSG_PREFIX (consistent) behavior.
|
||||||
|
|
||||||
|
NOTE: This is a little redundant; we're setting a
|
||||||
|
component-level attribute during each module's setup. We do
|
||||||
|
this here (and not earlier, when we check fi_version() during
|
||||||
|
the component setup) because we can't obtain the value of the
|
||||||
|
endpoint msg_prefix_size until we setup the first module.
|
||||||
|
Also, it's safe because each module will set the component
|
||||||
|
attribute to the same value. So it's ok. */
|
||||||
|
uint32_t libfabric_api;
|
||||||
|
libfabric_api = fi_version();
|
||||||
|
if (1 == FI_MAJOR(libfabric_api) &&
|
||||||
|
0 == FI_MINOR(libfabric_api)) {
|
||||||
|
mca_btl_usnic_component.prefix_send_offset = 0;
|
||||||
|
} else {
|
||||||
|
mca_btl_usnic_component.prefix_send_offset =
|
||||||
|
module->fabric_info->ep_attr->msg_prefix_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1835,13 +1865,15 @@ static void init_payload_lengths(opal_btl_usnic_module_t *module)
|
|||||||
/* Find the max payload this port can handle */
|
/* Find the max payload this port can handle */
|
||||||
module->max_frag_payload =
|
module->max_frag_payload =
|
||||||
module->local_modex.max_msg_size - /* start with the MTU */
|
module->local_modex.max_msg_size - /* start with the MTU */
|
||||||
sizeof(opal_btl_usnic_btl_header_t); /* subtract size of
|
sizeof(opal_btl_usnic_btl_header_t) - /* subtract size of
|
||||||
the BTL header */
|
the BTL header */
|
||||||
|
mca_btl_usnic_component.prefix_send_offset;
|
||||||
|
|
||||||
/* same, but use chunk header */
|
/* same, but use chunk header */
|
||||||
module->max_chunk_payload =
|
module->max_chunk_payload =
|
||||||
module->local_modex.max_msg_size -
|
module->local_modex.max_msg_size -
|
||||||
sizeof(opal_btl_usnic_btl_chunk_header_t);
|
sizeof(opal_btl_usnic_btl_chunk_header_t) -
|
||||||
|
mca_btl_usnic_component.prefix_send_offset;
|
||||||
|
|
||||||
/* Priorirty queue MTU and max size */
|
/* Priorirty queue MTU and max size */
|
||||||
if (0 == module->max_tiny_msg_size) {
|
if (0 == module->max_tiny_msg_size) {
|
||||||
@ -2097,7 +2129,6 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
uint32_t segsize;
|
uint32_t segsize;
|
||||||
|
|
||||||
segsize = (module->local_modex.max_msg_size +
|
segsize = (module->local_modex.max_msg_size +
|
||||||
module->fabric_info->ep_attr->msg_prefix_size +
|
|
||||||
opal_cache_line_size - 1) &
|
opal_cache_line_size - 1) &
|
||||||
~(opal_cache_line_size - 1);
|
~(opal_cache_line_size - 1);
|
||||||
|
|
||||||
@ -2105,7 +2136,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->small_send_frags,
|
rc = usnic_compat_free_list_init(&module->small_send_frags,
|
||||||
sizeof(opal_btl_usnic_small_send_frag_t) +
|
sizeof(opal_btl_usnic_small_send_frag_t) +
|
||||||
mca_btl_usnic_component.transport_header_len,
|
mca_btl_usnic_component.prefix_send_offset,
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
|
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
|
||||||
segsize,
|
segsize,
|
||||||
@ -2123,7 +2154,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->large_send_frags,
|
rc = usnic_compat_free_list_init(&module->large_send_frags,
|
||||||
sizeof(opal_btl_usnic_large_send_frag_t) +
|
sizeof(opal_btl_usnic_large_send_frag_t) +
|
||||||
mca_btl_usnic_component.transport_header_len,
|
mca_btl_usnic_component.prefix_send_offset,
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
|
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
|
||||||
0, /* payload size */
|
0, /* payload size */
|
||||||
@ -2141,7 +2172,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->put_dest_frags,
|
rc = usnic_compat_free_list_init(&module->put_dest_frags,
|
||||||
sizeof(opal_btl_usnic_put_dest_frag_t) +
|
sizeof(opal_btl_usnic_put_dest_frag_t) +
|
||||||
mca_btl_usnic_component.transport_header_len,
|
mca_btl_usnic_component.prefix_send_offset,
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
|
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
|
||||||
0, /* payload size */
|
0, /* payload size */
|
||||||
@ -2160,7 +2191,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->chunk_segs,
|
rc = usnic_compat_free_list_init(&module->chunk_segs,
|
||||||
sizeof(opal_btl_usnic_chunk_segment_t) +
|
sizeof(opal_btl_usnic_chunk_segment_t) +
|
||||||
mca_btl_usnic_component.transport_header_len,
|
mca_btl_usnic_component.prefix_send_offset,
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
|
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
|
||||||
segsize,
|
segsize,
|
||||||
@ -2178,12 +2209,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
/* ACK segments freelist */
|
/* ACK segments freelist */
|
||||||
uint32_t ack_segment_len;
|
uint32_t ack_segment_len;
|
||||||
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
|
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
|
||||||
module->fabric_info->ep_attr->msg_prefix_size +
|
|
||||||
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
|
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
|
||||||
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->ack_segs,
|
rc = usnic_compat_free_list_init(&module->ack_segs,
|
||||||
sizeof(opal_btl_usnic_ack_segment_t) +
|
sizeof(opal_btl_usnic_ack_segment_t) +
|
||||||
mca_btl_usnic_component.transport_header_len,
|
mca_btl_usnic_component.prefix_send_offset,
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
|
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
|
||||||
ack_segment_len,
|
ack_segment_len,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -79,7 +79,7 @@ opal_btl_usnic_post_segment(
|
|||||||
/* Send the segment */
|
/* Send the segment */
|
||||||
ret = fi_send(channel->ep,
|
ret = fi_send(channel->ep,
|
||||||
sseg->ss_ptr,
|
sseg->ss_ptr,
|
||||||
sseg->ss_len,
|
sseg->ss_len + mca_btl_usnic_component.prefix_send_offset,
|
||||||
NULL,
|
NULL,
|
||||||
endpoint->endpoint_remote_addrs[channel_id],
|
endpoint->endpoint_remote_addrs[channel_id],
|
||||||
sseg);
|
sseg);
|
||||||
@ -128,7 +128,7 @@ opal_btl_usnic_post_ack(
|
|||||||
|
|
||||||
ret = fi_send(channel->ep,
|
ret = fi_send(channel->ep,
|
||||||
sseg->ss_ptr,
|
sseg->ss_ptr,
|
||||||
sseg->ss_len,
|
sseg->ss_len + mca_btl_usnic_component.prefix_send_offset,
|
||||||
NULL,
|
NULL,
|
||||||
endpoint->endpoint_remote_addrs[channel_id],
|
endpoint->endpoint_remote_addrs[channel_id],
|
||||||
sseg);
|
sseg);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user