usnic: add ompi_btl_usnic_component_debug helper
This new routine can be called in exceptional situations, either conditionally in BTL code or from a debugger, to help with debugging in cases where MSGDEBUG1/2 or stats logging are impractical but more detail is needed. Reviewed-by: Jeff Squyres <jsquyres@cisco.com> This commit was SVN r29483.
Этот коммит содержится в:
родитель
d0b7d125b2
Коммит
c9b2343982
@ -170,6 +170,10 @@ typedef uint64_t ompi_btl_usnic_seq_t;
|
|||||||
*/
|
*/
|
||||||
int ompi_btl_usnic_component_register(void);
|
int ompi_btl_usnic_component_register(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Routine which can be called from a debugger to print module, endpoint,
|
||||||
|
* fragment, and segment state to standard output. */
|
||||||
|
void ompi_btl_usnic_component_debug(void);
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
#endif
|
#endif
|
||||||
|
@ -44,6 +44,12 @@
|
|||||||
# define opal_ifindextomtu btl_usnic_opal_ifindextomtu
|
# define opal_ifindextomtu btl_usnic_opal_ifindextomtu
|
||||||
# define opal_ifindextoname btl_usnic_opal_ifindextoname
|
# define opal_ifindextoname btl_usnic_opal_ifindextoname
|
||||||
|
|
||||||
|
/* this _FOREACH macro is not present in v1.6 */
|
||||||
|
#define OPAL_LIST_FOREACH(item, list, type) \
|
||||||
|
for (item = (type *) (list)->opal_list_sentinel.opal_list_next ; \
|
||||||
|
item != (type *) &(list)->opal_list_sentinel ; \
|
||||||
|
item = (type *) ((opal_list_item_t *) (item))->opal_list_next)
|
||||||
|
|
||||||
#elif (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION >= 7) || \
|
#elif (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION >= 7) || \
|
||||||
(OMPI_MAJOR_VERSION >= 2)
|
(OMPI_MAJOR_VERSION >= 2)
|
||||||
/* v1.7, v1.8 (to be released), trunk (v1.9), or later */
|
/* v1.7, v1.8 (to be released), trunk (v1.9), or later */
|
||||||
|
@ -83,6 +83,10 @@ static opal_event_t usnic_clock_timer_event;
|
|||||||
static bool usnic_clock_timer_event_set = false;
|
static bool usnic_clock_timer_event_set = false;
|
||||||
static struct timeval usnic_clock_timeout;
|
static struct timeval usnic_clock_timeout;
|
||||||
|
|
||||||
|
/* set to true in a debugger to enable even more verbose output when calling
|
||||||
|
* ompi_btl_usnic_component_debug */
|
||||||
|
static volatile bool dump_bitvectors = false;
|
||||||
|
|
||||||
static int usnic_component_open(void);
|
static int usnic_component_open(void);
|
||||||
static int usnic_component_close(void);
|
static int usnic_component_close(void);
|
||||||
static mca_btl_base_module_t **
|
static mca_btl_base_module_t **
|
||||||
@ -1349,3 +1353,177 @@ static bool filter_module(ompi_btl_usnic_module_t *module,
|
|||||||
/* Turn the match result into whether we should keep it or not */
|
/* Turn the match result into whether we should keep it or not */
|
||||||
return match ^ !filter_incl;
|
return match ^ !filter_incl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* could take indent as a parameter instead of hard-coding it */
|
||||||
|
static void dump_endpoint(ompi_btl_usnic_endpoint_t *endpoint)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
ompi_btl_usnic_frag_t *frag;
|
||||||
|
ompi_btl_usnic_send_segment_t *sseg;
|
||||||
|
int ep_jobid;
|
||||||
|
int ep_rank;
|
||||||
|
struct in_addr ia;
|
||||||
|
char ep_addr_str[INET_ADDRSTRLEN];
|
||||||
|
char tmp[128], str[2048];
|
||||||
|
|
||||||
|
ep_jobid = endpoint->endpoint_proc->proc_ompi->proc_name.jobid;
|
||||||
|
ep_rank = endpoint->endpoint_proc->proc_ompi->proc_name.vpid;
|
||||||
|
|
||||||
|
memset(ep_addr_str, 0x00, sizeof(ep_addr_str));
|
||||||
|
ia.s_addr = endpoint->endpoint_remote_addr.ipv4_addr;
|
||||||
|
inet_ntop(AF_INET, &ia, ep_addr_str, sizeof(ep_addr_str));
|
||||||
|
|
||||||
|
opal_output(0, " endpoint %p, %s job=%"PRIu32" rank=%"PRIu32" rts=%s s_credits=%"PRIi32"\n",
|
||||||
|
(void *)endpoint, ep_addr_str, ep_jobid, ep_rank,
|
||||||
|
(endpoint->endpoint_ready_to_send ? "true" : "false"),
|
||||||
|
endpoint->endpoint_send_credits);
|
||||||
|
opal_output(0, " endpoint->frag_send_queue:\n");
|
||||||
|
|
||||||
|
OPAL_LIST_FOREACH(frag, &endpoint->endpoint_frag_send_queue,
|
||||||
|
ompi_btl_usnic_frag_t) {
|
||||||
|
snprintf(str, sizeof(str), " --> frag %p, %s", (void *)frag,
|
||||||
|
usnic_frag_type(frag->uf_type));
|
||||||
|
switch (frag->uf_type) {
|
||||||
|
ompi_btl_usnic_small_send_frag_t *ssfrag = NULL;
|
||||||
|
ompi_btl_usnic_large_send_frag_t *lsfrag = NULL;
|
||||||
|
|
||||||
|
case OMPI_BTL_USNIC_FRAG_LARGE_SEND:
|
||||||
|
lsfrag = (ompi_btl_usnic_large_send_frag_t *)frag;
|
||||||
|
snprintf(tmp, sizeof(tmp), " tag=%"PRIu8" id=%"PRIu32" offset=%llu/%llu post_cnt=%"PRIu32" ack_bytes_left=%llu\n",
|
||||||
|
lsfrag->lsf_tag,
|
||||||
|
lsfrag->lsf_frag_id,
|
||||||
|
(unsigned long long)lsfrag->lsf_cur_offset,
|
||||||
|
(unsigned long long)lsfrag->lsf_base.sf_size,
|
||||||
|
lsfrag->lsf_base.sf_seg_post_cnt,
|
||||||
|
(unsigned long long)lsfrag->lsf_base.sf_ack_bytes_left);
|
||||||
|
strncat(str, tmp, sizeof(str) - strlen(str) - 1);
|
||||||
|
opal_output(0, "%s", str);
|
||||||
|
|
||||||
|
OPAL_LIST_FOREACH(sseg, &lsfrag->lsf_seg_chain,
|
||||||
|
ompi_btl_usnic_send_segment_t) {
|
||||||
|
/* chunk segs are just typedefs to send segs */
|
||||||
|
opal_output(0, " chunk seg %p, chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
|
||||||
|
(void *)sseg,
|
||||||
|
(USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
|
||||||
|
"prio" : "data"),
|
||||||
|
sseg->ss_hotel_room,
|
||||||
|
sseg->ss_send_posted,
|
||||||
|
(sseg->ss_ack_pending ? "true" : "false"));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OMPI_BTL_USNIC_FRAG_SMALL_SEND:
|
||||||
|
ssfrag = (ompi_btl_usnic_small_send_frag_t *)frag;
|
||||||
|
snprintf(tmp, sizeof(tmp), " sf_size=%llu post_cnt=%"PRIu32" ack_bytes_left=%llu\n",
|
||||||
|
(unsigned long long)ssfrag->ssf_base.sf_size,
|
||||||
|
ssfrag->ssf_base.sf_seg_post_cnt,
|
||||||
|
(unsigned long long)ssfrag->ssf_base.sf_ack_bytes_left);
|
||||||
|
strncat(str, tmp, sizeof(str) - strlen(str) - 1);
|
||||||
|
opal_output(0, "%s", str);
|
||||||
|
|
||||||
|
sseg = &ssfrag->ssf_segment;
|
||||||
|
opal_output(0, " small seg %p, chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
|
||||||
|
(void *)sseg,
|
||||||
|
(USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
|
||||||
|
"prio" : "data"),
|
||||||
|
sseg->ss_hotel_room,
|
||||||
|
sseg->ss_send_posted,
|
||||||
|
(sseg->ss_ack_pending ? "true" : "false"));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OMPI_BTL_USNIC_FRAG_PUT_DEST:
|
||||||
|
/* put_dest frags are just a typedef to generic frags */
|
||||||
|
snprintf(tmp, sizeof(tmp), " put_addr=%p\n", frag->uf_dst_seg[0].seg_addr.pval);
|
||||||
|
strncat(str, tmp, sizeof(str) - strlen(str) - 1);
|
||||||
|
opal_output(0, "%s", str);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now examine the hotel for this endpoint and dump any segments we find
|
||||||
|
* there. Yes, this peeks at members that are technically "private", so
|
||||||
|
* eventually this should be done through some sort of debug or iteration
|
||||||
|
* interface in the hotel code. */
|
||||||
|
opal_output(0, " endpoint->endpoint_sent_segs (%p):\n",
|
||||||
|
(void *)endpoint->endpoint_sent_segs);
|
||||||
|
for (i = 0; i < WINDOW_SIZE; ++i) {
|
||||||
|
sseg = endpoint->endpoint_sent_segs[i];
|
||||||
|
if (NULL != sseg) {
|
||||||
|
opal_output(0, " [%d] sseg=%p %s chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
|
||||||
|
i,
|
||||||
|
(void *)sseg,
|
||||||
|
usnic_seg_type(sseg->ss_base.us_type),
|
||||||
|
(USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
|
||||||
|
"prio" : "data"),
|
||||||
|
sseg->ss_hotel_room,
|
||||||
|
sseg->ss_send_posted,
|
||||||
|
(sseg->ss_ack_pending ? "true" : "false"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
opal_output(0, " ack_needed=%s n_t=%"PRIu64" n_a=%"PRIu64" n_r=%"PRIu64" n_s=%"PRIu64" rfstart=%"PRIu32"\n",
|
||||||
|
(endpoint->endpoint_ack_needed?"true":"false"),
|
||||||
|
endpoint->endpoint_next_seq_to_send,
|
||||||
|
endpoint->endpoint_ack_seq_rcvd,
|
||||||
|
endpoint->endpoint_next_contig_seq_to_recv,
|
||||||
|
endpoint->endpoint_highest_seq_rcvd,
|
||||||
|
endpoint->endpoint_rfstart);
|
||||||
|
|
||||||
|
if (dump_bitvectors) {
|
||||||
|
ompi_btl_usnic_snprintf_bool_array(str, sizeof(str),
|
||||||
|
endpoint->endpoint_rcvd_segs,
|
||||||
|
WINDOW_SIZE);
|
||||||
|
opal_output(0, " rcvd_segs 0x%s", str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ompi_btl_usnic_component_debug(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
ompi_btl_usnic_module_t *module;
|
||||||
|
ompi_btl_usnic_endpoint_t *endpoint;
|
||||||
|
ompi_btl_usnic_send_segment_t *sseg;
|
||||||
|
opal_list_item_t *item;
|
||||||
|
|
||||||
|
opal_output(0, "*** dumping usnic state for MPI_COMM_WORLD rank %d ***\n",
|
||||||
|
ompi_proc_local_proc->proc_name.vpid);
|
||||||
|
for (i = 0; i < (int)mca_btl_usnic_component.num_modules; ++i) {
|
||||||
|
module = mca_btl_usnic_component.usnic_active_modules[i];
|
||||||
|
|
||||||
|
opal_output(0, "active_modules[%d]=%p %s max{frag,chunk,tiny}=%llu,%llu,%llu\n",
|
||||||
|
i, (void *)module, module->if_name,
|
||||||
|
(unsigned long long)module->max_frag_payload,
|
||||||
|
(unsigned long long)module->max_chunk_payload,
|
||||||
|
(unsigned long long)module->max_tiny_payload);
|
||||||
|
|
||||||
|
opal_output(0, " endpoints_with_sends:\n");
|
||||||
|
OPAL_LIST_FOREACH(endpoint, &module->endpoints_with_sends,
|
||||||
|
ompi_btl_usnic_endpoint_t) {
|
||||||
|
dump_endpoint(endpoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
opal_output(0, " endpoints_that_need_acks:\n");
|
||||||
|
OPAL_LIST_FOREACH(endpoint, &module->endpoints_that_need_acks,
|
||||||
|
ompi_btl_usnic_endpoint_t) {
|
||||||
|
dump_endpoint(endpoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the all_endpoints list uses a different list item member */
|
||||||
|
opal_output(0, " all_endpoints:\n");
|
||||||
|
item = opal_list_get_first(&module->all_endpoints);
|
||||||
|
while (item != opal_list_get_end(&module->all_endpoints)) {
|
||||||
|
endpoint = container_of(item, mca_btl_base_endpoint_t,
|
||||||
|
endpoint_endpoint_li);
|
||||||
|
item = opal_list_get_next(item);
|
||||||
|
dump_endpoint(endpoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
opal_output(0, " pending_resend_segs:\n");
|
||||||
|
OPAL_LIST_FOREACH(sseg, &module->pending_resend_segs,
|
||||||
|
ompi_btl_usnic_send_segment_t) {
|
||||||
|
opal_output(0, " sseg %p\n", (void *)sseg);
|
||||||
|
}
|
||||||
|
|
||||||
|
ompi_btl_usnic_print_stats(module, " manual", /*reset=*/false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -57,7 +57,6 @@ typedef enum {
|
|||||||
OMPI_BTL_USNIC_FRAG_PUT_DEST
|
OMPI_BTL_USNIC_FRAG_PUT_DEST
|
||||||
} ompi_btl_usnic_frag_type_t;
|
} ompi_btl_usnic_frag_type_t;
|
||||||
|
|
||||||
#if MSGDEBUG2
|
|
||||||
static inline const char *
|
static inline const char *
|
||||||
usnic_frag_type(ompi_btl_usnic_frag_type_t t)
|
usnic_frag_type(ompi_btl_usnic_frag_type_t t)
|
||||||
{
|
{
|
||||||
@ -68,7 +67,6 @@ usnic_frag_type(ompi_btl_usnic_frag_type_t t)
|
|||||||
default: return "unknown";
|
default: return "unknown";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
OMPI_BTL_USNIC_SEG_ACK,
|
OMPI_BTL_USNIC_SEG_ACK,
|
||||||
@ -77,7 +75,6 @@ typedef enum {
|
|||||||
OMPI_BTL_USNIC_SEG_RECV
|
OMPI_BTL_USNIC_SEG_RECV
|
||||||
} ompi_btl_usnic_seg_type_t;
|
} ompi_btl_usnic_seg_type_t;
|
||||||
|
|
||||||
#if MSGDEBUG2
|
|
||||||
static inline const char *
|
static inline const char *
|
||||||
usnic_seg_type(ompi_btl_usnic_seg_type_t t)
|
usnic_seg_type(ompi_btl_usnic_seg_type_t t)
|
||||||
{
|
{
|
||||||
@ -89,7 +86,6 @@ usnic_seg_type(ompi_btl_usnic_seg_type_t t)
|
|||||||
default: return "unknown";
|
default: return "unknown";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct ompi_btl_usnic_reg_t {
|
typedef struct ompi_btl_usnic_reg_t {
|
||||||
|
@ -86,6 +86,37 @@ void ompi_btl_usnic_sprintf_gid_mac(char *out, union ibv_gid *gid)
|
|||||||
ompi_btl_usnic_sprintf_mac(out, mac);
|
ompi_btl_usnic_sprintf_mac(out, mac);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Pretty-print the given boolean array as a hexadecimal string. slen should
|
||||||
|
* include space for any null terminator. */
|
||||||
|
void ompi_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t alen)
|
||||||
|
{
|
||||||
|
size_t i = 0;
|
||||||
|
size_t j = 0;
|
||||||
|
|
||||||
|
/* could accommodate other cases, but not needed right now */
|
||||||
|
assert(slen % 4 == 0);
|
||||||
|
|
||||||
|
/* compute one nybble at a time */
|
||||||
|
while (i < alen && (j < slen - 1)) {
|
||||||
|
unsigned char tmp = 0;
|
||||||
|
|
||||||
|
/* first bool is the leftmost (most significant) bit of the nybble */
|
||||||
|
tmp |= !!a[i+0] << 3;
|
||||||
|
tmp |= !!a[i+1] << 2;
|
||||||
|
tmp |= !!a[i+2] << 1;
|
||||||
|
tmp |= !!a[i+3] << 0;
|
||||||
|
tmp += '0';
|
||||||
|
s[j] = tmp;
|
||||||
|
|
||||||
|
++j;
|
||||||
|
i += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
s[j++] = '\0';
|
||||||
|
assert(i <= alen);
|
||||||
|
assert(j <= slen);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int ompi_btl_usnic_find_ip(ompi_btl_usnic_module_t *module, uint8_t mac[6])
|
int ompi_btl_usnic_find_ip(ompi_btl_usnic_module_t *module, uint8_t mac[6])
|
||||||
{
|
{
|
||||||
|
@ -52,6 +52,8 @@ void ompi_btl_usnic_sprintf_mac(char *out, const uint8_t mac[6]);
|
|||||||
|
|
||||||
void ompi_btl_usnic_sprintf_gid_mac(char *out, union ibv_gid *gid);
|
void ompi_btl_usnic_sprintf_gid_mac(char *out, union ibv_gid *gid);
|
||||||
|
|
||||||
|
void ompi_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t alen);
|
||||||
|
|
||||||
int ompi_btl_usnic_find_ip(ompi_btl_usnic_module_t *module, uint8_t mac[6]);
|
int ompi_btl_usnic_find_ip(ompi_btl_usnic_module_t *module, uint8_t mac[6]);
|
||||||
|
|
||||||
void ompi_btl_usnic_gid_to_mac(union ibv_gid *gid, uint8_t mac[6]);
|
void ompi_btl_usnic_gid_to_mac(union ibv_gid *gid, uint8_t mac[6]);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user