Merge pull request #7043 from jsquyres/pr/v4.0.x/usnic-fixes-and-optimizations
v4.0.x: usnic fixes and optimizations
Этот коммит содержится в:
Коммит
106109a286
@ -68,7 +68,7 @@ extern uint64_t opal_btl_usnic_ticks;
|
||||
extern opal_recursive_mutex_t btl_usnic_lock;
|
||||
|
||||
static inline uint64_t
|
||||
get_nsec(void)
|
||||
get_ticks(void)
|
||||
{
|
||||
return opal_btl_usnic_ticks;
|
||||
}
|
||||
@ -206,6 +206,14 @@ typedef struct opal_btl_usnic_component_t {
|
||||
/** retrans characteristics */
|
||||
int retrans_timeout;
|
||||
|
||||
/** max number of messages re-sent during a single progress
|
||||
iteration */
|
||||
int max_resends_per_iteration;
|
||||
|
||||
/** minimum number of times through component progress before
|
||||
checking to see if standalone ACKs need to be sent */
|
||||
int ack_iteration_delay;
|
||||
|
||||
/** transport header length for all usNIC devices on this server
|
||||
(it is guaranteed that all usNIC devices on a single server
|
||||
will have the same underlying transport, and therefore the
|
||||
|
@ -384,8 +384,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
|
||||
|
||||
static void usnic_clock_callback(int fd, short flags, void *timeout)
|
||||
{
|
||||
/* 1ms == 1,000,000 ns */
|
||||
opal_btl_usnic_ticks += 1000000;
|
||||
/* Increase by so many ticks that we will definitely force sending
|
||||
any ACKs that are pending */
|
||||
opal_btl_usnic_ticks += 1000;
|
||||
|
||||
/* run progress to make sure time change gets noticed */
|
||||
usnic_component_progress();
|
||||
@ -1132,7 +1133,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
*/
|
||||
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
|
||||
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
|
||||
static int usnic_component_progress_2(void);
|
||||
static int usnic_component_progress_2(bool check_priority);
|
||||
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
||||
opal_btl_usnic_channel_t *channel, int cq_ret);
|
||||
|
||||
@ -1145,9 +1146,7 @@ static int usnic_component_progress(void)
|
||||
struct fi_cq_entry completion;
|
||||
opal_btl_usnic_channel_t *channel;
|
||||
static bool fastpath_ok = true;
|
||||
|
||||
/* update our simulated clock */
|
||||
opal_btl_usnic_ticks += 5000;
|
||||
bool check_priority = true;
|
||||
|
||||
count = 0;
|
||||
if (fastpath_ok) {
|
||||
@ -1180,10 +1179,11 @@ static int usnic_component_progress(void)
|
||||
usnic_handle_cq_error(module, channel, ret);
|
||||
}
|
||||
}
|
||||
check_priority = false;
|
||||
}
|
||||
|
||||
fastpath_ok = true;
|
||||
return count + usnic_component_progress_2();
|
||||
return count + usnic_component_progress_2(check_priority);
|
||||
}
|
||||
|
||||
static int usnic_handle_completion(
|
||||
@ -1304,7 +1304,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
||||
}
|
||||
}
|
||||
|
||||
static int usnic_component_progress_2(void)
|
||||
static int usnic_component_progress_2(bool check_priority)
|
||||
{
|
||||
int i, j, count = 0, num_events, ret;
|
||||
opal_btl_usnic_module_t* module;
|
||||
@ -1313,15 +1313,18 @@ static int usnic_component_progress_2(void)
|
||||
int rc;
|
||||
int c;
|
||||
|
||||
/* update our simulated clock */
|
||||
opal_btl_usnic_ticks += 5000;
|
||||
opal_btl_usnic_ticks += 1;
|
||||
|
||||
/* If we need to check priority, start with the priority channel.
|
||||
Otherwise, just check the data channel. */
|
||||
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
|
||||
|
||||
/* Poll for completions */
|
||||
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
|
||||
module = mca_btl_usnic_component.usnic_active_modules[i];
|
||||
|
||||
/* poll each channel */
|
||||
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
|
||||
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
|
||||
channel = &module->mod_channels[c];
|
||||
|
||||
if (channel->chan_deferred_recv != NULL) {
|
||||
|
@ -260,6 +260,14 @@ int opal_btl_usnic_component_register(void)
|
||||
5000, &mca_btl_usnic_component.retrans_timeout,
|
||||
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress",
|
||||
16, &mca_btl_usnic_component.max_resends_per_iteration,
|
||||
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
|
||||
4, &mca_btl_usnic_component.ack_iteration_delay,
|
||||
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
|
||||
0, &max_tiny_msg_size,
|
||||
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||
|
@ -963,11 +963,12 @@ usnic_do_resends(
|
||||
opal_btl_usnic_send_segment_t *sseg;
|
||||
opal_btl_usnic_endpoint_t *endpoint;
|
||||
struct opal_btl_usnic_channel_t *data_channel;
|
||||
int ret;
|
||||
int ret, count;
|
||||
|
||||
data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];
|
||||
|
||||
while ((get_send_credits(data_channel) > 1) &&
|
||||
count = mca_btl_usnic_component.max_resends_per_iteration;
|
||||
while (count > 0 && (get_send_credits(data_channel) > 1) &&
|
||||
!opal_list_is_empty(&module->pending_resend_segs)) {
|
||||
|
||||
/*
|
||||
@ -1009,6 +1010,8 @@ usnic_do_resends(
|
||||
BTL_ERROR(("hotel checkin failed\n"));
|
||||
abort(); /* should not be possible */
|
||||
}
|
||||
|
||||
--count;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1236,7 +1239,7 @@ opal_btl_usnic_module_progress_sends(
|
||||
|
||||
/* Is it time to send ACK? */
|
||||
if (endpoint->endpoint_acktime == 0 ||
|
||||
endpoint->endpoint_acktime <= get_nsec()) {
|
||||
endpoint->endpoint_acktime <= get_ticks()) {
|
||||
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
|
||||
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
|
||||
} else {
|
||||
@ -2366,14 +2369,14 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
uint32_t segsize;
|
||||
|
||||
segsize = (module->local_modex.max_msg_size +
|
||||
opal_cache_line_size - 1) &
|
||||
mca_btl_usnic_component.prefix_send_offset +
|
||||
opal_cache_line_size - 1) &
|
||||
~(opal_cache_line_size - 1);
|
||||
|
||||
/* Send frags freelists */
|
||||
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->small_send_frags,
|
||||
sizeof(opal_btl_usnic_small_send_frag_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_small_send_frag_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
|
||||
segsize,
|
||||
@ -2390,8 +2393,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
|
||||
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->large_send_frags,
|
||||
sizeof(opal_btl_usnic_large_send_frag_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_large_send_frag_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
|
||||
0, /* payload size */
|
||||
@ -2408,8 +2410,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
|
||||
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->put_dest_frags,
|
||||
sizeof(opal_btl_usnic_put_dest_frag_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_put_dest_frag_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
|
||||
0, /* payload size */
|
||||
@ -2427,8 +2428,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
/* list of segments to use for sending */
|
||||
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->chunk_segs,
|
||||
sizeof(opal_btl_usnic_chunk_segment_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_chunk_segment_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
|
||||
segsize,
|
||||
@ -2446,11 +2446,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
/* ACK segments freelist */
|
||||
uint32_t ack_segment_len;
|
||||
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset +
|
||||
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
|
||||
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->ack_segs,
|
||||
sizeof(opal_btl_usnic_ack_segment_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_ack_segment_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
|
||||
ack_segment_len,
|
||||
|
@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
|
||||
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
|
||||
}
|
||||
|
||||
/* give this process a chance to send something before ACKing */
|
||||
/* A hueristic: set to send this ACK after we have checked our
|
||||
incoming DATA_CHANNEL component.act_iteration_delay times
|
||||
(i.e., so we can piggyback an ACK on an outgoing send) */
|
||||
if (0 == endpoint->endpoint_acktime) {
|
||||
endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */
|
||||
endpoint->endpoint_acktime =
|
||||
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
|
||||
}
|
||||
|
||||
/* Save this incoming segment in the received segmentss array on the
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user