1
1

Clean up debugging logging a bit.

MSGDEBUG2 now means "print a one-liner for all PML calls into BTL, and
also when BTL calls PML with a recv completion (not send completions)"
MSGDEBUG1 means print more internal gory detail
MSGDEBUG is gone, replaced by MSGDEBUG1

In the process also found that PUT_DEST style fragments could
potentially be leaked in usnic_free() since send_fragment tests were
being applied to see if it was eligible to be freed.

This commit was SVN r29185.
Этот коммит содержится в:
Reese Faucette 2013-09-17 07:29:40 +00:00
родитель f35d9b50e3
Коммит 651d61f1a3
7 изменённых файлов: 61 добавлений и 39 удалений

Просмотреть файл

@ -59,12 +59,11 @@ get_nsec(void)
#define container_of(ptr, type, member) ( \
(type *)( ((char *)(ptr)) - offsetof(type,member) ))
/* Set to 1 to turn out a LOT of debugging ouput */
#define MSGDEBUG2 (MSGDEBUG1||0) /* temp */
#define MSGDEBUG1 (MSGDEBUG||0) /* temp */
#define MSGDEBUG 0
/* Set to 1 to get frag history */
#define HISTORY 0
/* MSGDEBUG2 prints 1 line at each BTL entry point */
#define MSGDEBUG2 (MSGDEBUG1||0)
/* MSGDEBUG1 prints more info about arguments and internal functions */
#define MSGDEBUG1 0
/* Set to >0 to randomly drop received frags. The higher the number,
the more frequent the drops. */
#define WANT_RECV_FRAG_DROPS 0

Просмотреть файл

@ -72,8 +72,8 @@ ompi_btl_usnic_handle_ack(
/* ignore if this is an old ACK */
if (ack_seq < endpoint->endpoint_ack_seq_rcvd) {
#if MSGDEBUG
opal_output(0, "Got OLD DUP ACK seq %d < %d\n",
#if MSGDEBUG1
opal_output(0, "Got OLD DUP ACK seq %"UDSEQ" < %"UDSEQ"\n",
ack_seq, endpoint->endpoint_ack_seq_rcvd);
#endif
++module->num_old_dup_acks;
@ -100,7 +100,7 @@ ompi_btl_usnic_handle_ack(
assert(sseg != NULL);
assert(sseg->ss_base.us_btl_header->seq == is);
#if MSGDEBUG
#if MSGDEBUG1
if (sseg->ss_hotel_room == -1) {
opal_output(0, "=== ACKed frag in sent_frags array is not in hotel/enqueued, module %p, endpoint %p, seg %p, seq %" UDSEQ ", slot %lu",
(void*) module, (void*) endpoint,
@ -142,7 +142,7 @@ ompi_btl_usnic_handle_ack(
(frag->sf_base.uf_base.des_flags &
MCA_BTL_DES_SEND_ALWAYS_CALLBACK))) {
#if MSGDEBUG2
opal_output(0, "completion callback for frag=%p, dest=%p\n",
opal_output(0, "completion callback for put frag=%p, dest=%p\n",
(void*)frag, frag->sf_base.uf_dst_seg[0].seg_addr.pval);
#endif
frag->sf_base.uf_base.des_cbfunc(&module->super,
@ -255,7 +255,7 @@ ompi_btl_usnic_ack_timeout(
endpoint = seg->ss_parent_frag->sf_endpoint;
module = endpoint->endpoint_module;
#if MSGDEBUG2
#if MSGDEBUG1
{
opal_output(0, "Send timeout! seg %p, room %d, seq %" UDSEQ "\n",
(void*)seg, seg->ss_hotel_room,

Просмотреть файл

@ -466,6 +466,28 @@ ompi_btl_usnic_send_frag_return_cond(
}
}
/*
* Return a frag if it's all done and owned by BTL
* If this is a PUT destination, only condition is that we own it. If it's
* a send frag, there are other conditions, so use the specific send frag
* return checker.
*/
static inline void
ompi_btl_usnic_frag_return_cond(
struct ompi_btl_usnic_module_t *module,
ompi_btl_usnic_frag_t *frag)
{
if (OMPI_BTL_USNIC_FRAG_PUT_DEST == frag->uf_type) {
if (OPAL_LIKELY(frag->uf_base.des_flags &
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
ompi_btl_usnic_frag_return(module, frag);
}
} else {
ompi_btl_usnic_send_frag_return_cond(module,
(ompi_btl_usnic_send_frag_t *)frag);
}
}
static inline ompi_btl_usnic_chunk_segment_t *
ompi_btl_usnic_chunk_segment_alloc(
ompi_btl_usnic_module_t *module)

Просмотреть файл

@ -290,13 +290,18 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
static int usnic_free(struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
ompi_btl_usnic_send_frag_t* frag = (ompi_btl_usnic_send_frag_t*)des;
ompi_btl_usnic_frag_t* frag = (ompi_btl_usnic_frag_t*)des;
#if MSGDEBUG2
opal_output(0, "usnic_free: %p\n", (void*)frag);
opal_output(0, "usnic_free: %p (%s)\n", (void*)frag,
usnic_frag_type(frag->uf_type));
#endif
/* calling free routine gives us ownership - we need to make sure
* the flag is set for lower layers.
*/
frag->uf_base.des_flags |= MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
ompi_btl_usnic_send_frag_return_cond((struct ompi_btl_usnic_module_t *)btl,
ompi_btl_usnic_frag_return_cond((struct ompi_btl_usnic_module_t *)btl,
frag);
return OMPI_SUCCESS;
@ -359,7 +364,7 @@ usnic_prepare_src(
uint32_t iov_count;
size_t max_data;
int rc;
#ifdef MSGDEBUG2
#if MSGDEBUG2
size_t osize = *size;
#endif
@ -1007,9 +1012,6 @@ usnic_handle_large_send(
/* save back pointer to fragment */
sseg->ss_parent_frag = frag;
#if MSGDEBUG1
opal_output(0, "handle large, frag=%p, cur_sge=%d, sge_left=%d left=%d\n", frag, lfrag->lsf_cur_sge, lfrag->lsf_bytes_left_in_sge, lfrag->lsf_bytes_left);
#endif
/* keep copying in as long as we have space and there is data
* to be copied.
@ -1097,7 +1099,7 @@ usnic_handle_large_send(
(frag->sf_base.uf_base.des_flags &
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
#if MSGDEBUG2
#if MSGDEBUG1
opal_output(0, "callback for large frag %p, len=%zd\n",
(void *)(uintptr_t)frag->sf_base.uf_base.des_cbfunc,
frag->sf_size);
@ -1206,7 +1208,7 @@ ompi_btl_usnic_module_progress_sends(
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) ==
(MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
#if MSGDEBUG2
#if MSGDEBUG1
opal_output(0, "callback frag small %p, len=%"PRIu64"\n",
(void*)frag,
(unsigned long)frag->sf_base.uf_src_seg[0].seg_len);
@ -1361,7 +1363,7 @@ usnic_send(
sseg->ss_send_desc.send_flags |= IBV_SEND_INLINE;
sseg->ss_channel = USNIC_PRIORITY_CHANNEL;
sseg->ss_base.us_btl_header->tag = tag;
#if MSGDEBUG2
#if MSGDEBUG1
opal_output(0, "INLINE send, conv=%p", (void *)frag->sf_convertor);
#endif
@ -1392,7 +1394,7 @@ usnic_send(
*/
if (descriptor->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
if (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
#if MSGDEBUG2
#if MSGDEBUG1
opal_output(0, "immediate callback for frag %p\n", (void *)frag);
#endif
frag->sf_base.uf_base.des_cbfunc(&module->super,
@ -1401,7 +1403,7 @@ usnic_send(
rc = 0;
descriptor->des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
} else {
#if MSGDEBUG2
#if MSGDEBUG1
opal_output(0, "skipping callback for frag %p\n", (void *)frag);
#endif
rc = 1;

Просмотреть файл

@ -80,11 +80,9 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
ompi_btl_usnic_sprintf_gid_mac(dest_mac,
&seg->rs_protocol_header->grh.dgid);
#if MSGDEBUG
opal_output(0, "Got message from MAC %s", src_mac);
opal_output(0, "Looking for sender: 0x%016lx",
bseg->us_btl_header->sender);
#endif
#endif
/* Find out who sent this segment */
@ -143,6 +141,11 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
if (hdr->put_addr == NULL) {
reg = mca_btl_base_active_message_trigger + hdr->tag;
seg->rs_segment.seg_len = hdr->payload_len;
#if MSGDEBUG2
opal_output(0, "small recv complete, pass up %u bytes, tag=%d\n",
(unsigned)bseg->us_btl_header->payload_len,
(int)bseg->us_btl_header->tag);
#endif
reg->cbfunc(&module->super, hdr->tag, &seg->rs_desc, reg->cbdata);
/*
@ -226,12 +229,12 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
if (fip->rfi_data == NULL) {
abort();
}
#if MSGDEBUG2
#if MSGDEBUG1
opal_output(0, "Start large recv to %p, size=%d\n",
fip->rfi_data, chunk_hdr->ch_frag_size);
#endif
} else {
#if MSGDEBUG2
#if MSGDEBUG1
opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
#endif
fip->rfi_data = chunk_hdr->ch_hdr.put_addr;
@ -281,7 +284,7 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
/* Pass this segment up to the PML */
#if MSGDEBUG2
opal_output(0, " large FRAG complete, pass up %p, %u bytes, tag=%d\n",
opal_output(0, "large recv complete, pass up %p, %u bytes, tag=%d\n",
desc.des_dst->seg_addr.pval,
(unsigned)desc.des_dst->seg_len,
(int)chunk_hdr->ch_hdr.tag);
@ -302,9 +305,9 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
(ompi_free_list_item_t *)fip->rfi_data);
}
#if MSGDEBUG2
#if MSGDEBUG1
} else {
opal_output(0, "PUT complete, suppressing callback\n");
opal_output(0, "PUT recv complete, no callback\n");
#endif
}

Просмотреть файл

@ -100,7 +100,7 @@ ompi_btl_usnic_update_window(
endpoint->endpoint_next_contig_seq_to_recv++;
i = WINDOW_SIZE_MOD(i + 1);
#if MSGDEBUG
#if MSGDEBUG1
opal_output(0, "Advance window to %d; next seq to send %" UDSEQ, i,
endpoint->endpoint_next_contig_seq_to_recv);
#endif
@ -154,7 +154,7 @@ ompi_btl_usnic_check_rx_seq(
seq = seg->rs_base.us_btl_header->seq;
if (seq < endpoint->endpoint_next_contig_seq_to_recv ||
seq >= endpoint->endpoint_next_contig_seq_to_recv + WINDOW_SIZE) {
#if MSGDEBUG
#if MSGDEBUG1
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ " outside of window (%" UDSEQ " - %" UDSEQ "), %p, module %p -- DROPPED\n",
(void*)endpoint, seg->rs_base.us_btl_header->seq,
endpoint->endpoint_next_contig_seq_to_recv,
@ -199,7 +199,7 @@ ompi_btl_usnic_check_rx_seq(
i = seq - endpoint->endpoint_next_contig_seq_to_recv;
i = WINDOW_SIZE_MOD(i + endpoint->endpoint_rfstart);
if (endpoint->endpoint_rcvd_segs[i]) {
#if MSGDEBUG
#if MSGDEBUG1
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ ", seg %p: duplicate -- DROPPED\n",
(void*) endpoint, seg->rs_base.us_btl_header->seq, (void*) seg);
#endif
@ -354,10 +354,6 @@ ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
mca_btl_active_message_callback_t* reg;
ompi_btl_usnic_endpoint_t *endpoint;
int rc;
#if MSGDEBUG1
char src_mac[32];
char dest_mac[32];
#endif
bseg = &seg->rs_base;

Просмотреть файл

@ -231,10 +231,10 @@ ompi_btl_usnic_endpoint_enqueue_frag(
ompi_btl_usnic_send_frag_t *frag)
{
#if MSGDEBUG1
opal_output(0, "enq_frag: frag=%p, endpoint=%p, %s, len=%"PRIu64"\n",
opal_output(0, "enq_frag: frag=%p, endpoint=%p, %s, len=%lu\n",
(void*)frag, (void*)endpoint,
usnic_frag_type(frag->sf_base.uf_type),
frag->sf_base.uf_base.des_src->seg_len);
(long unsigned)frag->sf_base.uf_base.des_src->seg_len);
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_LARGE_SEND) {
ompi_btl_usnic_large_send_frag_t *lfrag;
lfrag = (ompi_btl_usnic_large_send_frag_t *)frag;