1
1

btl/usnic: clarifications and fixes regarding ACKs

New MCA parameter: btl_usnic_ack_iteration_delay.  Set this to the
number of times through the usNIC component progress function before
sending a standalone ACK (vs. piggy-backing the ACK on any other send
going to the target peer).

Use "ticks" language to clarify that we're really counting the number
of times through the usNIC component DATA_CHANNEL completion check (to
check for incoming messages) -- it has no relation to wall clock time
whatsoever.

Also slightly change the channel-checking scheme in usNIC component
progress: only check the PRIORITY channel once (vs. checking it once,
not finding anything, and then falling through the progress_2() where we
check PRIORITY again and then check the DATA channel).

As before, if our "progress" libevent fires, increment the tick
counter enough to guarantee that all endpoints that need an ACK will
get triggered to send standalone ACKs the next time through progress,
if necessary.

Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
Этот коммит содержится в:
Jeff Squyres 2019-10-04 11:52:48 -07:00
родитель ce2910a28a
Коммит 968b1a51b5
4 изменённых файлов: 27 добавлений и 13 удалений

Просмотреть файл

@ -190,6 +190,10 @@ typedef struct opal_btl_usnic_component_t {
/** retrans characteristics */
int retrans_timeout;
/** minimum number of times through component progress before
checking to see if standalone ACKs need to be sent */
int ack_iteration_delay;
/** transport header length for all usNIC devices on this server
(it is guaranteed that all usNIC devices on a single server
will have the same underlying transport, and therefore the

Просмотреть файл

@ -380,8 +380,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
static void usnic_clock_callback(int fd, short flags, void *timeout)
{
/* 1ms == 1,000,000 ns */
opal_btl_usnic_ticks += 1000000;
/* Increase by so many ticks that we will definitely force sending
any ACKs that are pending */
opal_btl_usnic_ticks += 1000;
/* run progress to make sure time change gets noticed */
usnic_component_progress();
@ -1128,7 +1129,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
*/
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
static int usnic_component_progress_2(void);
static int usnic_component_progress_2(bool check_priority);
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, int cq_ret);
@ -1141,9 +1142,7 @@ static int usnic_component_progress(void)
struct fi_cq_entry completion;
opal_btl_usnic_channel_t *channel;
static bool fastpath_ok = true;
/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
bool check_priority = true;
count = 0;
if (fastpath_ok) {
@ -1176,10 +1175,11 @@ static int usnic_component_progress(void)
usnic_handle_cq_error(module, channel, ret);
}
}
check_priority = false;
}
fastpath_ok = true;
return count + usnic_component_progress_2();
return count + usnic_component_progress_2(check_priority);
}
static int usnic_handle_completion(
@ -1300,7 +1300,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
}
}
static int usnic_component_progress_2(void)
static int usnic_component_progress_2(bool check_priority)
{
int i, j, count = 0, num_events, ret;
opal_btl_usnic_module_t* module;
@ -1309,15 +1309,18 @@ static int usnic_component_progress_2(void)
int rc;
int c;
/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
opal_btl_usnic_ticks += 1;
/* If we need to check priority, start with the priority channel.
Otherwise, just check the data channel. */
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
/* Poll for completions */
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
module = mca_btl_usnic_component.usnic_active_modules[i];
/* poll each channel */
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
channel = &module->mod_channels[c];
if (channel->chan_deferred_recv != NULL) {

Просмотреть файл

@ -249,6 +249,10 @@ int opal_btl_usnic_component_register(void)
5000, &mca_btl_usnic_component.retrans_timeout,
REGINT_GE_ONE, OPAL_INFO_LVL_5));
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
0, &mca_btl_usnic_component.ack_iteration_delay,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
0, &max_tiny_msg_size,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));

Просмотреть файл

@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
}
/* give this process a chance to send something before ACKing */
/* A hueristic: set to send this ACK after we have checked our
incoming DATA_CHANNEL component.act_iteration_delay times
(i.e., so we can piggyback an ACK on an outgoing send) */
if (0 == endpoint->endpoint_acktime) {
endpoint->endpoint_acktime = get_ticks() + 50000;
endpoint->endpoint_acktime =
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
}
/* Save this incoming segment in the received segmentss array on the