1
1

Merge pull request #7038 from jsquyres/pr/usnic-fixes-and-optimizations

btl/usnic fixes and optimizations
Этот коммит содержится в:
Jeff Squyres 2019-10-04 19:38:50 -04:00 коммит произвёл GitHub
родитель b774b47428 fe7f772f21
Коммит a49ae7f034
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 61 добавлений и 33 удалений

Просмотреть файл

@ -57,7 +57,7 @@ extern uint64_t opal_btl_usnic_ticks;
extern opal_recursive_mutex_t btl_usnic_lock;
static inline uint64_t
get_nsec(void)
get_ticks(void)
{
return opal_btl_usnic_ticks;
}
@ -190,6 +190,14 @@ typedef struct opal_btl_usnic_component_t {
/** retrans characteristics */
int retrans_timeout;
/** max number of messages re-sent during a single progress
iteration */
int max_resends_per_iteration;
/** minimum number of times through component progress before
checking to see if standalone ACKs need to be sent */
int ack_iteration_delay;
/** transport header length for all usNIC devices on this server
(it is guaranteed that all usNIC devices on a single server
will have the same underlying transport, and therefore the

Просмотреть файл

@ -380,8 +380,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
static void usnic_clock_callback(int fd, short flags, void *timeout)
{
/* 1ms == 1,000,000 ns */
opal_btl_usnic_ticks += 1000000;
/* Increase by so many ticks that we will definitely force sending
any ACKs that are pending */
opal_btl_usnic_ticks += 1000;
/* run progress to make sure time change gets noticed */
usnic_component_progress();
@ -1128,7 +1129,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
*/
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
static int usnic_component_progress_2(void);
static int usnic_component_progress_2(bool check_priority);
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, int cq_ret);
@ -1141,9 +1142,7 @@ static int usnic_component_progress(void)
struct fi_cq_entry completion;
opal_btl_usnic_channel_t *channel;
static bool fastpath_ok = true;
/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
bool check_priority = true;
count = 0;
if (fastpath_ok) {
@ -1176,10 +1175,11 @@ static int usnic_component_progress(void)
usnic_handle_cq_error(module, channel, ret);
}
}
check_priority = false;
}
fastpath_ok = true;
return count + usnic_component_progress_2();
return count + usnic_component_progress_2(check_priority);
}
static int usnic_handle_completion(
@ -1300,7 +1300,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
}
}
static int usnic_component_progress_2(void)
static int usnic_component_progress_2(bool check_priority)
{
int i, j, count = 0, num_events, ret;
opal_btl_usnic_module_t* module;
@ -1309,15 +1309,18 @@ static int usnic_component_progress_2(void)
int rc;
int c;
/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
opal_btl_usnic_ticks += 1;
/* If we need to check priority, start with the priority channel.
Otherwise, just check the data channel. */
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
/* Poll for completions */
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
module = mca_btl_usnic_component.usnic_active_modules[i];
/* poll each channel */
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
channel = &module->mod_channels[c];
if (channel->chan_deferred_recv != NULL) {

Просмотреть файл

@ -138,18 +138,24 @@ typedef struct {
the length of the packet to meet a minimum size */
uint16_t payload_len;
/* If this is an emulated PUT, store at this address on receiver */
char *put_addr;
/* Type of BTL header (see enum, above) */
uint8_t payload_type;
/* true if there is piggy-backed ACK */
uint8_t ack_present;
/* This field is ordered here so that we have no holes in the
struct. Technically this doesn't matter, because we're using
the __packed__ attribute (so there will be no holes anyway),
but ordering things nicely in the struct prevents the need for
unaligned reads/writes when using _packed__. */
/* If this is an emulated PUT, store at this address on
receiver */
char *put_addr;
/* tag for upper layer */
mca_btl_base_tag_t tag;
} opal_btl_usnic_btl_header_t;
} __opal_attribute_packed__ opal_btl_usnic_btl_header_t;
/**
* BTL header for a chunk of a fragment

Просмотреть файл

@ -246,9 +246,17 @@ int opal_btl_usnic_component_register(void)
mca_btl_usnic_component.udp_port_base = (int) udp_port_base;
CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame",
5000, &mca_btl_usnic_component.retrans_timeout,
100000, &mca_btl_usnic_component.retrans_timeout,
REGINT_GE_ONE, OPAL_INFO_LVL_5));
CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress",
16, &mca_btl_usnic_component.max_resends_per_iteration,
REGINT_GE_ONE, OPAL_INFO_LVL_5));
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
0, &mca_btl_usnic_component.ack_iteration_delay,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
0, &max_tiny_msg_size,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));

Просмотреть файл

@ -954,11 +954,12 @@ usnic_do_resends(
opal_btl_usnic_send_segment_t *sseg;
opal_btl_usnic_endpoint_t *endpoint;
struct opal_btl_usnic_channel_t *data_channel;
int ret;
int ret, count;
data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];
while ((get_send_credits(data_channel) > 1) &&
count = mca_btl_usnic_component.max_resends_per_iteration;
while (count > 0 && (get_send_credits(data_channel) > 1) &&
!opal_list_is_empty(&module->pending_resend_segs)) {
/*
@ -999,6 +1000,8 @@ usnic_do_resends(
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
opal_btl_usnic_util_abort("hotel checkin failed\n", __FILE__, __LINE__);
}
--count;
}
}
@ -1226,7 +1229,7 @@ opal_btl_usnic_module_progress_sends(
/* Is it time to send ACK? */
if (endpoint->endpoint_acktime == 0 ||
endpoint->endpoint_acktime <= get_nsec()) {
endpoint->endpoint_acktime <= get_ticks()) {
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
} else {
@ -2344,14 +2347,14 @@ static void init_freelists(opal_btl_usnic_module_t *module)
uint32_t segsize;
segsize = (module->local_modex.max_msg_size +
opal_cache_line_size - 1) &
mca_btl_usnic_component.prefix_send_offset +
opal_cache_line_size - 1) &
~(opal_cache_line_size - 1);
/* Send frags freelists */
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->small_send_frags,
sizeof(opal_btl_usnic_small_send_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_small_send_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
segsize,
@ -2368,8 +2371,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->large_send_frags,
sizeof(opal_btl_usnic_large_send_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_large_send_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
0, /* payload size */
@ -2386,8 +2388,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->put_dest_frags,
sizeof(opal_btl_usnic_put_dest_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_put_dest_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
0, /* payload size */
@ -2405,8 +2406,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
/* list of segments to use for sending */
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->chunk_segs,
sizeof(opal_btl_usnic_chunk_segment_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_chunk_segment_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
segsize,
@ -2424,11 +2424,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
/* ACK segments freelist */
uint32_t ack_segment_len;
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
mca_btl_usnic_component.prefix_send_offset +
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->ack_segs,
sizeof(opal_btl_usnic_ack_segment_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_ack_segment_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
ack_segment_len,

Просмотреть файл

@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
}
/* give this process a chance to send something before ACKing */
/* A hueristic: set to send this ACK after we have checked our
incoming DATA_CHANNEL component.act_iteration_delay times
(i.e., so we can piggyback an ACK on an outgoing send) */
if (0 == endpoint->endpoint_acktime) {
endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */
endpoint->endpoint_acktime =
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
}
/* Save this incoming segment in the received segmentss array on the