Merge pull request #7038 from jsquyres/pr/usnic-fixes-and-optimizations
btl/usnic fixes and optimizations
Этот коммит содержится в:
Коммит
a49ae7f034
@ -57,7 +57,7 @@ extern uint64_t opal_btl_usnic_ticks;
|
||||
extern opal_recursive_mutex_t btl_usnic_lock;
|
||||
|
||||
static inline uint64_t
|
||||
get_nsec(void)
|
||||
get_ticks(void)
|
||||
{
|
||||
return opal_btl_usnic_ticks;
|
||||
}
|
||||
@ -190,6 +190,14 @@ typedef struct opal_btl_usnic_component_t {
|
||||
/** retrans characteristics */
|
||||
int retrans_timeout;
|
||||
|
||||
/** max number of messages re-sent during a single progress
|
||||
iteration */
|
||||
int max_resends_per_iteration;
|
||||
|
||||
/** minimum number of times through component progress before
|
||||
checking to see if standalone ACKs need to be sent */
|
||||
int ack_iteration_delay;
|
||||
|
||||
/** transport header length for all usNIC devices on this server
|
||||
(it is guaranteed that all usNIC devices on a single server
|
||||
will have the same underlying transport, and therefore the
|
||||
|
@ -380,8 +380,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
|
||||
|
||||
static void usnic_clock_callback(int fd, short flags, void *timeout)
|
||||
{
|
||||
/* 1ms == 1,000,000 ns */
|
||||
opal_btl_usnic_ticks += 1000000;
|
||||
/* Increase by so many ticks that we will definitely force sending
|
||||
any ACKs that are pending */
|
||||
opal_btl_usnic_ticks += 1000;
|
||||
|
||||
/* run progress to make sure time change gets noticed */
|
||||
usnic_component_progress();
|
||||
@ -1128,7 +1129,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
*/
|
||||
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
|
||||
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
|
||||
static int usnic_component_progress_2(void);
|
||||
static int usnic_component_progress_2(bool check_priority);
|
||||
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
||||
opal_btl_usnic_channel_t *channel, int cq_ret);
|
||||
|
||||
@ -1141,9 +1142,7 @@ static int usnic_component_progress(void)
|
||||
struct fi_cq_entry completion;
|
||||
opal_btl_usnic_channel_t *channel;
|
||||
static bool fastpath_ok = true;
|
||||
|
||||
/* update our simulated clock */
|
||||
opal_btl_usnic_ticks += 5000;
|
||||
bool check_priority = true;
|
||||
|
||||
count = 0;
|
||||
if (fastpath_ok) {
|
||||
@ -1176,10 +1175,11 @@ static int usnic_component_progress(void)
|
||||
usnic_handle_cq_error(module, channel, ret);
|
||||
}
|
||||
}
|
||||
check_priority = false;
|
||||
}
|
||||
|
||||
fastpath_ok = true;
|
||||
return count + usnic_component_progress_2();
|
||||
return count + usnic_component_progress_2(check_priority);
|
||||
}
|
||||
|
||||
static int usnic_handle_completion(
|
||||
@ -1300,7 +1300,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
|
||||
}
|
||||
}
|
||||
|
||||
static int usnic_component_progress_2(void)
|
||||
static int usnic_component_progress_2(bool check_priority)
|
||||
{
|
||||
int i, j, count = 0, num_events, ret;
|
||||
opal_btl_usnic_module_t* module;
|
||||
@ -1309,15 +1309,18 @@ static int usnic_component_progress_2(void)
|
||||
int rc;
|
||||
int c;
|
||||
|
||||
/* update our simulated clock */
|
||||
opal_btl_usnic_ticks += 5000;
|
||||
opal_btl_usnic_ticks += 1;
|
||||
|
||||
/* If we need to check priority, start with the priority channel.
|
||||
Otherwise, just check the data channel. */
|
||||
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
|
||||
|
||||
/* Poll for completions */
|
||||
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
|
||||
module = mca_btl_usnic_component.usnic_active_modules[i];
|
||||
|
||||
/* poll each channel */
|
||||
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
|
||||
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
|
||||
channel = &module->mod_channels[c];
|
||||
|
||||
if (channel->chan_deferred_recv != NULL) {
|
||||
|
@ -138,18 +138,24 @@ typedef struct {
|
||||
the length of the packet to meet a minimum size */
|
||||
uint16_t payload_len;
|
||||
|
||||
/* If this is an emulated PUT, store at this address on receiver */
|
||||
char *put_addr;
|
||||
|
||||
/* Type of BTL header (see enum, above) */
|
||||
uint8_t payload_type;
|
||||
|
||||
/* true if there is piggy-backed ACK */
|
||||
uint8_t ack_present;
|
||||
|
||||
/* This field is ordered here so that we have no holes in the
|
||||
struct. Technically this doesn't matter, because we're using
|
||||
the __packed__ attribute (so there will be no holes anyway),
|
||||
but ordering things nicely in the struct prevents the need for
|
||||
unaligned reads/writes when using _packed__. */
|
||||
/* If this is an emulated PUT, store at this address on
|
||||
receiver */
|
||||
char *put_addr;
|
||||
|
||||
/* tag for upper layer */
|
||||
mca_btl_base_tag_t tag;
|
||||
} opal_btl_usnic_btl_header_t;
|
||||
} __opal_attribute_packed__ opal_btl_usnic_btl_header_t;
|
||||
|
||||
/**
|
||||
* BTL header for a chunk of a fragment
|
||||
|
@ -246,9 +246,17 @@ int opal_btl_usnic_component_register(void)
|
||||
mca_btl_usnic_component.udp_port_base = (int) udp_port_base;
|
||||
|
||||
CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame",
|
||||
5000, &mca_btl_usnic_component.retrans_timeout,
|
||||
100000, &mca_btl_usnic_component.retrans_timeout,
|
||||
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress",
|
||||
16, &mca_btl_usnic_component.max_resends_per_iteration,
|
||||
REGINT_GE_ONE, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
|
||||
0, &mca_btl_usnic_component.ack_iteration_delay,
|
||||
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||
|
||||
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
|
||||
0, &max_tiny_msg_size,
|
||||
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
|
||||
|
@ -954,11 +954,12 @@ usnic_do_resends(
|
||||
opal_btl_usnic_send_segment_t *sseg;
|
||||
opal_btl_usnic_endpoint_t *endpoint;
|
||||
struct opal_btl_usnic_channel_t *data_channel;
|
||||
int ret;
|
||||
int ret, count;
|
||||
|
||||
data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];
|
||||
|
||||
while ((get_send_credits(data_channel) > 1) &&
|
||||
count = mca_btl_usnic_component.max_resends_per_iteration;
|
||||
while (count > 0 && (get_send_credits(data_channel) > 1) &&
|
||||
!opal_list_is_empty(&module->pending_resend_segs)) {
|
||||
|
||||
/*
|
||||
@ -999,6 +1000,8 @@ usnic_do_resends(
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
opal_btl_usnic_util_abort("hotel checkin failed\n", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
--count;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1226,7 +1229,7 @@ opal_btl_usnic_module_progress_sends(
|
||||
|
||||
/* Is it time to send ACK? */
|
||||
if (endpoint->endpoint_acktime == 0 ||
|
||||
endpoint->endpoint_acktime <= get_nsec()) {
|
||||
endpoint->endpoint_acktime <= get_ticks()) {
|
||||
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
|
||||
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
|
||||
} else {
|
||||
@ -2344,14 +2347,14 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
uint32_t segsize;
|
||||
|
||||
segsize = (module->local_modex.max_msg_size +
|
||||
opal_cache_line_size - 1) &
|
||||
mca_btl_usnic_component.prefix_send_offset +
|
||||
opal_cache_line_size - 1) &
|
||||
~(opal_cache_line_size - 1);
|
||||
|
||||
/* Send frags freelists */
|
||||
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->small_send_frags,
|
||||
sizeof(opal_btl_usnic_small_send_frag_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_small_send_frag_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
|
||||
segsize,
|
||||
@ -2368,8 +2371,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
|
||||
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->large_send_frags,
|
||||
sizeof(opal_btl_usnic_large_send_frag_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_large_send_frag_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
|
||||
0, /* payload size */
|
||||
@ -2386,8 +2388,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
|
||||
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->put_dest_frags,
|
||||
sizeof(opal_btl_usnic_put_dest_frag_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_put_dest_frag_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
|
||||
0, /* payload size */
|
||||
@ -2405,8 +2406,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
/* list of segments to use for sending */
|
||||
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->chunk_segs,
|
||||
sizeof(opal_btl_usnic_chunk_segment_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_chunk_segment_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
|
||||
segsize,
|
||||
@ -2424,11 +2424,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
|
||||
/* ACK segments freelist */
|
||||
uint32_t ack_segment_len;
|
||||
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset +
|
||||
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
|
||||
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
|
||||
rc = usnic_compat_free_list_init(&module->ack_segs,
|
||||
sizeof(opal_btl_usnic_ack_segment_t) +
|
||||
mca_btl_usnic_component.prefix_send_offset,
|
||||
sizeof(opal_btl_usnic_ack_segment_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
|
||||
ack_segment_len,
|
||||
|
@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
|
||||
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
|
||||
}
|
||||
|
||||
/* give this process a chance to send something before ACKing */
|
||||
/* A hueristic: set to send this ACK after we have checked our
|
||||
incoming DATA_CHANNEL component.act_iteration_delay times
|
||||
(i.e., so we can piggyback an ACK on an outgoing send) */
|
||||
if (0 == endpoint->endpoint_acktime) {
|
||||
endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */
|
||||
endpoint->endpoint_acktime =
|
||||
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
|
||||
}
|
||||
|
||||
/* Save this incoming segment in the received segmentss array on the
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user