1
1

Merge pull request #7043 from jsquyres/pr/v4.0.x/usnic-fixes-and-optimizations

v4.0.x: usnic fixes and optimizations
Этот коммит содержится в:
Howard Pritchard 2019-10-22 09:05:27 -05:00 коммит произвёл GitHub
родитель cb5f4e737a c6592822c0
Коммит 106109a286
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 50 добавлений и 28 удалений

Просмотреть файл

@ -68,7 +68,7 @@ extern uint64_t opal_btl_usnic_ticks;
extern opal_recursive_mutex_t btl_usnic_lock;
static inline uint64_t
get_nsec(void)
get_ticks(void)
{
return opal_btl_usnic_ticks;
}
@ -206,6 +206,14 @@ typedef struct opal_btl_usnic_component_t {
/** retrans characteristics */
int retrans_timeout;
/** max number of messages re-sent during a single progress
iteration */
int max_resends_per_iteration;
/** minimum number of times through component progress before
checking to see if standalone ACKs need to be sent */
int ack_iteration_delay;
/** transport header length for all usNIC devices on this server
(it is guaranteed that all usNIC devices on a single server
will have the same underlying transport, and therefore the

Просмотреть файл

@ -384,8 +384,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
static void usnic_clock_callback(int fd, short flags, void *timeout)
{
/* 1ms == 1,000,000 ns */
opal_btl_usnic_ticks += 1000000;
/* Increase by so many ticks that we will definitely force sending
any ACKs that are pending */
opal_btl_usnic_ticks += 1000;
/* run progress to make sure time change gets noticed */
usnic_component_progress();
@ -1132,7 +1133,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
*/
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
static int usnic_component_progress_2(void);
static int usnic_component_progress_2(bool check_priority);
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, int cq_ret);
@ -1145,9 +1146,7 @@ static int usnic_component_progress(void)
struct fi_cq_entry completion;
opal_btl_usnic_channel_t *channel;
static bool fastpath_ok = true;
/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
bool check_priority = true;
count = 0;
if (fastpath_ok) {
@ -1180,10 +1179,11 @@ static int usnic_component_progress(void)
usnic_handle_cq_error(module, channel, ret);
}
}
check_priority = false;
}
fastpath_ok = true;
return count + usnic_component_progress_2();
return count + usnic_component_progress_2(check_priority);
}
static int usnic_handle_completion(
@ -1304,7 +1304,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
}
}
static int usnic_component_progress_2(void)
static int usnic_component_progress_2(bool check_priority)
{
int i, j, count = 0, num_events, ret;
opal_btl_usnic_module_t* module;
@ -1313,15 +1313,18 @@ static int usnic_component_progress_2(void)
int rc;
int c;
/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
opal_btl_usnic_ticks += 1;
/* If we need to check priority, start with the priority channel.
Otherwise, just check the data channel. */
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
/* Poll for completions */
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
module = mca_btl_usnic_component.usnic_active_modules[i];
/* poll each channel */
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
channel = &module->mod_channels[c];
if (channel->chan_deferred_recv != NULL) {

Просмотреть файл

@ -260,6 +260,14 @@ int opal_btl_usnic_component_register(void)
5000, &mca_btl_usnic_component.retrans_timeout,
REGINT_GE_ONE, OPAL_INFO_LVL_5));
CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress",
16, &mca_btl_usnic_component.max_resends_per_iteration,
REGINT_GE_ONE, OPAL_INFO_LVL_5));
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
4, &mca_btl_usnic_component.ack_iteration_delay,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
0, &max_tiny_msg_size,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));

Просмотреть файл

@ -963,11 +963,12 @@ usnic_do_resends(
opal_btl_usnic_send_segment_t *sseg;
opal_btl_usnic_endpoint_t *endpoint;
struct opal_btl_usnic_channel_t *data_channel;
int ret;
int ret, count;
data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];
while ((get_send_credits(data_channel) > 1) &&
count = mca_btl_usnic_component.max_resends_per_iteration;
while (count > 0 && (get_send_credits(data_channel) > 1) &&
!opal_list_is_empty(&module->pending_resend_segs)) {
/*
@ -1009,6 +1010,8 @@ usnic_do_resends(
BTL_ERROR(("hotel checkin failed\n"));
abort(); /* should not be possible */
}
--count;
}
}
@ -1236,7 +1239,7 @@ opal_btl_usnic_module_progress_sends(
/* Is it time to send ACK? */
if (endpoint->endpoint_acktime == 0 ||
endpoint->endpoint_acktime <= get_nsec()) {
endpoint->endpoint_acktime <= get_ticks()) {
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
} else {
@ -2366,14 +2369,14 @@ static void init_freelists(opal_btl_usnic_module_t *module)
uint32_t segsize;
segsize = (module->local_modex.max_msg_size +
opal_cache_line_size - 1) &
mca_btl_usnic_component.prefix_send_offset +
opal_cache_line_size - 1) &
~(opal_cache_line_size - 1);
/* Send frags freelists */
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->small_send_frags,
sizeof(opal_btl_usnic_small_send_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_small_send_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
segsize,
@ -2390,8 +2393,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->large_send_frags,
sizeof(opal_btl_usnic_large_send_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_large_send_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
0, /* payload size */
@ -2408,8 +2410,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->put_dest_frags,
sizeof(opal_btl_usnic_put_dest_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_put_dest_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
0, /* payload size */
@ -2427,8 +2428,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
/* list of segments to use for sending */
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->chunk_segs,
sizeof(opal_btl_usnic_chunk_segment_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_chunk_segment_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
segsize,
@ -2446,11 +2446,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
/* ACK segments freelist */
uint32_t ack_segment_len;
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
mca_btl_usnic_component.prefix_send_offset +
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->ack_segs,
sizeof(opal_btl_usnic_ack_segment_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_ack_segment_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
ack_segment_len,

Просмотреть файл

@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
}
/* give this process a chance to send something before ACKing */
/* A hueristic: set to send this ACK after we have checked our
incoming DATA_CHANNEL component.act_iteration_delay times
(i.e., so we can piggyback an ACK on an outgoing send) */
if (0 == endpoint->endpoint_acktime) {
endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */
endpoint->endpoint_acktime =
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
}
/* Save this incoming segment in the received segmentss array on the