Merge pull request #7038 from jsquyres/pr/usnic-fixes-and-optimizations
btl/usnic fixes and optimizations
Этот коммит содержится в:
Коммит
a49ae7f034
@ -57,7 +57,7 @@ extern uint64_t opal_btl_usnic_ticks;
|
|||||||
extern opal_recursive_mutex_t btl_usnic_lock;
|
extern opal_recursive_mutex_t btl_usnic_lock;
|
||||||
|
|
||||||
static inline uint64_t
|
static inline uint64_t
|
||||||
get_nsec(void)
|
get_ticks(void)
|
||||||
{
|
{
|
||||||
return opal_btl_usnic_ticks;
|
return opal_btl_usnic_ticks;
|
||||||
}
|
}
|
||||||
@ -190,6 +190,14 @@ typedef struct opal_btl_usnic_component_t {
|
|||||||
/** retrans characteristics */
|
/** retrans characteristics */
|
||||||
int retrans_timeout;
|
int retrans_timeout;
|
||||||
|
|
||||||
|
/** max number of messages re-sent during a single progress
|
||||||
|
iteration */
|
||||||
|
int max_resends_per_iteration;
|
||||||
|
|
||||||
|
/** minimum number of times through component progress before
|
||||||
|
checking to see if standalone ACKs need to be sent */
|
||||||
|
int ack_iteration_delay;
|
||||||
|
|
||||||
/** transport header length for all usNIC devices on this server
|
/** transport header length for all usNIC devices on this server
|
||||||
(it is guaranteed that all usNIC devices on a single server
|
(it is guaranteed that all usNIC devices on a single server
|
||||||
will have the same underlying transport, and therefore the
|
will have the same underlying transport, and therefore the
|
||||||
|
@ -380,8 +380,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
|
|||||||
|
|
||||||
static void usnic_clock_callback(int fd, short flags, void *timeout)
|
static void usnic_clock_callback(int fd, short flags, void *timeout)
|
||||||
{
|
{
|
||||||
/* 1ms == 1,000,000 ns */
|
/* Increase by so many ticks that we will definitely force sending
|
||||||
opal_btl_usnic_ticks += 1000000;
|
any ACKs that are pending */
|
||||||
|
opal_btl_usnic_ticks += 1000;
|
||||||
|
|
||||||
/* run progress to make sure time change gets noticed */
|
/* run progress to make sure time change gets noticed */
|
||||||
usnic_component_progress();
|
usnic_component_progress();
|
||||||
@ -1128,7 +1129,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
|||||||
*/
|
*/
|
||||||
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
|
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
|
||||||
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
|
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
|
||||||
static int usnic_component_progress_2(void);
|
static int usnic_component_progress_2(bool check_priority);
|
||||||
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
||||||
opal_btl_usnic_channel_t *channel, int cq_ret);
|
opal_btl_usnic_channel_t *channel, int cq_ret);
|
||||||
|
|
||||||
@ -1141,9 +1142,7 @@ static int usnic_component_progress(void)
|
|||||||
struct fi_cq_entry completion;
|
struct fi_cq_entry completion;
|
||||||
opal_btl_usnic_channel_t *channel;
|
opal_btl_usnic_channel_t *channel;
|
||||||
static bool fastpath_ok = true;
|
static bool fastpath_ok = true;
|
||||||
|
bool check_priority = true;
|
||||||
/* update our simulated clock */
|
|
||||||
opal_btl_usnic_ticks += 5000;
|
|
||||||
|
|
||||||
count = 0;
|
count = 0;
|
||||||
if (fastpath_ok) {
|
if (fastpath_ok) {
|
||||||
@ -1176,10 +1175,11 @@ static int usnic_component_progress(void)
|
|||||||
usnic_handle_cq_error(module, channel, ret);
|
usnic_handle_cq_error(module, channel, ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
check_priority = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fastpath_ok = true;
|
fastpath_ok = true;
|
||||||
return count + usnic_component_progress_2();
|
return count + usnic_component_progress_2(check_priority);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int usnic_handle_completion(
|
static int usnic_handle_completion(
|
||||||
@ -1300,7 +1300,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int usnic_component_progress_2(void)
|
static int usnic_component_progress_2(bool check_priority)
|
||||||
{
|
{
|
||||||
int i, j, count = 0, num_events, ret;
|
int i, j, count = 0, num_events, ret;
|
||||||
opal_btl_usnic_module_t* module;
|
opal_btl_usnic_module_t* module;
|
||||||
@ -1309,15 +1309,18 @@ static int usnic_component_progress_2(void)
|
|||||||
int rc;
|
int rc;
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
/* update our simulated clock */
|
opal_btl_usnic_ticks += 1;
|
||||||
opal_btl_usnic_ticks += 5000;
|
|
||||||
|
/* If we need to check priority, start with the priority channel.
|
||||||
|
Otherwise, just check the data channel. */
|
||||||
|
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
|
||||||
|
|
||||||
/* Poll for completions */
|
/* Poll for completions */
|
||||||
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
|
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
|
||||||
module = mca_btl_usnic_component.usnic_active_modules[i];
|
module = mca_btl_usnic_component.usnic_active_modules[i];
|
||||||
|
|
||||||
/* poll each channel */
|
/* poll each channel */
|
||||||
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
|
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
|
||||||
channel = &module->mod_channels[c];
|
channel = &module->mod_channels[c];
|
||||||
|
|
||||||
if (channel->chan_deferred_recv != NULL) {
|
if (channel->chan_deferred_recv != NULL) {
|
||||||
|
@ -138,18 +138,24 @@ typedef struct {
|
|||||||
the length of the packet to meet a minimum size */
|
the length of the packet to meet a minimum size */
|
||||||
uint16_t payload_len;
|
uint16_t payload_len;
|
||||||
|
|
||||||
/* If this is an emulated PUT, store at this address on receiver */
|
|
||||||
char *put_addr;
|
|
||||||
|
|
||||||
/* Type of BTL header (see enum, above) */
|
/* Type of BTL header (see enum, above) */
|
||||||
uint8_t payload_type;
|
uint8_t payload_type;
|
||||||
|
|
||||||
/* true if there is piggy-backed ACK */
|
/* true if there is piggy-backed ACK */
|
||||||
uint8_t ack_present;
|
uint8_t ack_present;
|
||||||
|
|
||||||
|
/* This field is ordered here so that we have no holes in the
|
||||||
|
struct. Technically this doesn't matter, because we're using
|
||||||
|
the __packed__ attribute (so there will be no holes anyway),
|
||||||
|
but ordering things nicely in the struct prevents the need for
|
||||||
|
unaligned reads/writes when using _packed__. */
|
||||||
|
/* If this is an emulated PUT, store at this address on
|
||||||
|
receiver */
|
||||||
|
char *put_addr;
|
||||||
|
|
||||||
/* tag for upper layer */
|
/* tag for upper layer */
|
||||||
mca_btl_base_tag_t tag;
|
mca_btl_base_tag_t tag;
|
||||||
} opal_btl_usnic_btl_header_t;
|
} __opal_attribute_packed__ opal_btl_usnic_btl_header_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BTL header for a chunk of a fragment
|
* BTL header for a chunk of a fragment
|
||||||
|
@ -246,9 +246,17 @@ int opal_btl_usnic_component_register(void)
|
|||||||
mca_btl_usnic_component.udp_port_base = (int) udp_port_base;
|
mca_btl_usnic_component.udp_port_base = (int) udp_port_base;
|
||||||
|
|
||||||
CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame",
|
CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame",
|
||||||
5000, &mca_btl_usnic_component.retrans_timeout,
|
100000, &mca_btl_usnic_component.retrans_timeout,
|
||||||
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
||||||
|
|
||||||
|
CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress",
|
||||||
|
16, &mca_btl_usnic_component.max_resends_per_iteration,
|
||||||
|
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
||||||
|
|
||||||
|
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
|
||||||
|
0, &mca_btl_usnic_component.ack_iteration_delay,
|
||||||
|
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||||
|
|
||||||
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
|
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
|
||||||
0, &max_tiny_msg_size,
|
0, &max_tiny_msg_size,
|
||||||
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||||
|
@ -954,11 +954,12 @@ usnic_do_resends(
|
|||||||
opal_btl_usnic_send_segment_t *sseg;
|
opal_btl_usnic_send_segment_t *sseg;
|
||||||
opal_btl_usnic_endpoint_t *endpoint;
|
opal_btl_usnic_endpoint_t *endpoint;
|
||||||
struct opal_btl_usnic_channel_t *data_channel;
|
struct opal_btl_usnic_channel_t *data_channel;
|
||||||
int ret;
|
int ret, count;
|
||||||
|
|
||||||
data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];
|
data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];
|
||||||
|
|
||||||
while ((get_send_credits(data_channel) > 1) &&
|
count = mca_btl_usnic_component.max_resends_per_iteration;
|
||||||
|
while (count > 0 && (get_send_credits(data_channel) > 1) &&
|
||||||
!opal_list_is_empty(&module->pending_resend_segs)) {
|
!opal_list_is_empty(&module->pending_resend_segs)) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -999,6 +1000,8 @@ usnic_do_resends(
|
|||||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||||
opal_btl_usnic_util_abort("hotel checkin failed\n", __FILE__, __LINE__);
|
opal_btl_usnic_util_abort("hotel checkin failed\n", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
--count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1226,7 +1229,7 @@ opal_btl_usnic_module_progress_sends(
|
|||||||
|
|
||||||
/* Is it time to send ACK? */
|
/* Is it time to send ACK? */
|
||||||
if (endpoint->endpoint_acktime == 0 ||
|
if (endpoint->endpoint_acktime == 0 ||
|
||||||
endpoint->endpoint_acktime <= get_nsec()) {
|
endpoint->endpoint_acktime <= get_ticks()) {
|
||||||
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
|
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
|
||||||
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
|
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
|
||||||
} else {
|
} else {
|
||||||
@ -2344,14 +2347,14 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
uint32_t segsize;
|
uint32_t segsize;
|
||||||
|
|
||||||
segsize = (module->local_modex.max_msg_size +
|
segsize = (module->local_modex.max_msg_size +
|
||||||
opal_cache_line_size - 1) &
|
mca_btl_usnic_component.prefix_send_offset +
|
||||||
|
opal_cache_line_size - 1) &
|
||||||
~(opal_cache_line_size - 1);
|
~(opal_cache_line_size - 1);
|
||||||
|
|
||||||
/* Send frags freelists */
|
/* Send frags freelists */
|
||||||
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->small_send_frags,
|
rc = usnic_compat_free_list_init(&module->small_send_frags,
|
||||||
sizeof(opal_btl_usnic_small_send_frag_t) +
|
sizeof(opal_btl_usnic_small_send_frag_t),
|
||||||
mca_btl_usnic_component.prefix_send_offset,
|
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
|
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
|
||||||
segsize,
|
segsize,
|
||||||
@ -2368,8 +2371,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
|
|
||||||
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->large_send_frags,
|
rc = usnic_compat_free_list_init(&module->large_send_frags,
|
||||||
sizeof(opal_btl_usnic_large_send_frag_t) +
|
sizeof(opal_btl_usnic_large_send_frag_t),
|
||||||
mca_btl_usnic_component.prefix_send_offset,
|
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
|
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
|
||||||
0, /* payload size */
|
0, /* payload size */
|
||||||
@ -2386,8 +2388,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
|
|
||||||
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->put_dest_frags,
|
rc = usnic_compat_free_list_init(&module->put_dest_frags,
|
||||||
sizeof(opal_btl_usnic_put_dest_frag_t) +
|
sizeof(opal_btl_usnic_put_dest_frag_t),
|
||||||
mca_btl_usnic_component.prefix_send_offset,
|
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
|
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
|
||||||
0, /* payload size */
|
0, /* payload size */
|
||||||
@ -2405,8 +2406,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
/* list of segments to use for sending */
|
/* list of segments to use for sending */
|
||||||
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->chunk_segs,
|
rc = usnic_compat_free_list_init(&module->chunk_segs,
|
||||||
sizeof(opal_btl_usnic_chunk_segment_t) +
|
sizeof(opal_btl_usnic_chunk_segment_t),
|
||||||
mca_btl_usnic_component.prefix_send_offset,
|
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
|
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
|
||||||
segsize,
|
segsize,
|
||||||
@ -2424,11 +2424,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
|||||||
/* ACK segments freelist */
|
/* ACK segments freelist */
|
||||||
uint32_t ack_segment_len;
|
uint32_t ack_segment_len;
|
||||||
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
|
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
|
||||||
|
mca_btl_usnic_component.prefix_send_offset +
|
||||||
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
|
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
|
||||||
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
|
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
|
||||||
rc = usnic_compat_free_list_init(&module->ack_segs,
|
rc = usnic_compat_free_list_init(&module->ack_segs,
|
||||||
sizeof(opal_btl_usnic_ack_segment_t) +
|
sizeof(opal_btl_usnic_ack_segment_t),
|
||||||
mca_btl_usnic_component.prefix_send_offset,
|
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
|
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
|
||||||
ack_segment_len,
|
ack_segment_len,
|
||||||
|
@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
|
|||||||
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
|
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* give this process a chance to send something before ACKing */
|
/* A hueristic: set to send this ACK after we have checked our
|
||||||
|
incoming DATA_CHANNEL component.act_iteration_delay times
|
||||||
|
(i.e., so we can piggyback an ACK on an outgoing send) */
|
||||||
if (0 == endpoint->endpoint_acktime) {
|
if (0 == endpoint->endpoint_acktime) {
|
||||||
endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */
|
endpoint->endpoint_acktime =
|
||||||
|
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Save this incoming segment in the received segmentss array on the
|
/* Save this incoming segment in the received segmentss array on the
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user