btl/usnic: clarifications and fixes regarding ACKs
New MCA parameter: btl_usnic_ack_iteration_delay. Set this to the number of times through the usNIC component progress function before sending a standalone ACK (vs. piggy-backing the ACK on any other send going to the target peer). Use "ticks" language to clarify that we're really counting the number of times through the usNIC component DATA_CHANNEL completion check (to check for incoming messages) -- it has no relation to wall clock time whatsoever. Also slightly change the channel-checking scheme in usNIC component progress: only check the PRIORITY channel once (vs. checking it once, not finding anything, and then falling through the progress_2() where we check PRIORITY again and then check the DATA channel). As before, if our "progress" libevent fires, increment the tick counter enough to guarantee that all endpoints that need an ACK will get triggered to send standalone ACKs the next time through progress, if necessary. Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
Этот коммит содержится в:
родитель
ce2910a28a
Коммит
968b1a51b5
@ -190,6 +190,10 @@ typedef struct opal_btl_usnic_component_t {
|
||||
/** retrans characteristics */
|
||||
int retrans_timeout;
|
||||
|
||||
/** minimum number of times through component progress before
|
||||
checking to see if standalone ACKs need to be sent */
|
||||
int ack_iteration_delay;
|
||||
|
||||
/** transport header length for all usNIC devices on this server
|
||||
(it is guaranteed that all usNIC devices on a single server
|
||||
will have the same underlying transport, and therefore the
|
||||
|
@ -380,8 +380,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
|
||||
|
||||
static void usnic_clock_callback(int fd, short flags, void *timeout)
|
||||
{
|
||||
/* 1ms == 1,000,000 ns */
|
||||
opal_btl_usnic_ticks += 1000000;
|
||||
/* Increase by so many ticks that we will definitely force sending
|
||||
any ACKs that are pending */
|
||||
opal_btl_usnic_ticks += 1000;
|
||||
|
||||
/* run progress to make sure time change gets noticed */
|
||||
usnic_component_progress();
|
||||
@ -1128,7 +1129,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
*/
|
||||
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
|
||||
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
|
||||
static int usnic_component_progress_2(void);
|
||||
static int usnic_component_progress_2(bool check_priority);
|
||||
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
||||
opal_btl_usnic_channel_t *channel, int cq_ret);
|
||||
|
||||
@ -1141,9 +1142,7 @@ static int usnic_component_progress(void)
|
||||
struct fi_cq_entry completion;
|
||||
opal_btl_usnic_channel_t *channel;
|
||||
static bool fastpath_ok = true;
|
||||
|
||||
/* update our simulated clock */
|
||||
opal_btl_usnic_ticks += 5000;
|
||||
bool check_priority = true;
|
||||
|
||||
count = 0;
|
||||
if (fastpath_ok) {
|
||||
@ -1176,10 +1175,11 @@ static int usnic_component_progress(void)
|
||||
usnic_handle_cq_error(module, channel, ret);
|
||||
}
|
||||
}
|
||||
check_priority = false;
|
||||
}
|
||||
|
||||
fastpath_ok = true;
|
||||
return count + usnic_component_progress_2();
|
||||
return count + usnic_component_progress_2(check_priority);
|
||||
}
|
||||
|
||||
static int usnic_handle_completion(
|
||||
@ -1300,7 +1300,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
||||
}
|
||||
}
|
||||
|
||||
static int usnic_component_progress_2(void)
|
||||
static int usnic_component_progress_2(bool check_priority)
|
||||
{
|
||||
int i, j, count = 0, num_events, ret;
|
||||
opal_btl_usnic_module_t* module;
|
||||
@ -1309,15 +1309,18 @@ static int usnic_component_progress_2(void)
|
||||
int rc;
|
||||
int c;
|
||||
|
||||
/* update our simulated clock */
|
||||
opal_btl_usnic_ticks += 5000;
|
||||
opal_btl_usnic_ticks += 1;
|
||||
|
||||
/* If we need to check priority, start with the priority channel.
|
||||
Otherwise, just check the data channel. */
|
||||
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
|
||||
|
||||
/* Poll for completions */
|
||||
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
|
||||
module = mca_btl_usnic_component.usnic_active_modules[i];
|
||||
|
||||
/* poll each channel */
|
||||
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
|
||||
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
|
||||
channel = &module->mod_channels[c];
|
||||
|
||||
if (channel->chan_deferred_recv != NULL) {
|
||||
|
@ -249,6 +249,10 @@ int opal_btl_usnic_component_register(void)
|
||||
5000, &mca_btl_usnic_component.retrans_timeout,
|
||||
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
|
||||
0, &mca_btl_usnic_component.ack_iteration_delay,
|
||||
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
|
||||
0, &max_tiny_msg_size,
|
||||
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||
|
@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
|
||||
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
|
||||
}
|
||||
|
||||
/* give this process a chance to send something before ACKing */
|
||||
/* A hueristic: set to send this ACK after we have checked our
|
||||
incoming DATA_CHANNEL component.act_iteration_delay times
|
||||
(i.e., so we can piggyback an ACK on an outgoing send) */
|
||||
if (0 == endpoint->endpoint_acktime) {
|
||||
endpoint->endpoint_acktime = get_ticks() + 50000;
|
||||
endpoint->endpoint_acktime =
|
||||
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
|
||||
}
|
||||
|
||||
/* Save this incoming segment in the received segmentss array on the
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user