Clean up debugging logging a bit.
MSGDEBUG2 now means "print a one-liner for all PML calls into BTL, and also when BTL calls PML with a recv completion (not send completions)" MSGDEBUG1 means print more internal gory detail MSGDEBUG is gone, replaced by MSGDEBUG1 In the process also found that PUT_DEST style fragments could potentially be leaked in usnic_free() since send_fragment tests were being applied to see if it was eligible to be freed. This commit was SVN r29185.
Этот коммит содержится в:
родитель
f35d9b50e3
Коммит
651d61f1a3
@ -59,12 +59,11 @@ get_nsec(void)
|
||||
#define container_of(ptr, type, member) ( \
|
||||
(type *)( ((char *)(ptr)) - offsetof(type,member) ))
|
||||
|
||||
/* Set to 1 to turn out a LOT of debugging ouput */
|
||||
#define MSGDEBUG2 (MSGDEBUG1||0) /* temp */
|
||||
#define MSGDEBUG1 (MSGDEBUG||0) /* temp */
|
||||
#define MSGDEBUG 0
|
||||
/* Set to 1 to get frag history */
|
||||
#define HISTORY 0
|
||||
/* MSGDEBUG2 prints 1 line at each BTL entry point */
|
||||
#define MSGDEBUG2 (MSGDEBUG1||0)
|
||||
/* MSGDEBUG1 prints more info about arguments and internal functions */
|
||||
#define MSGDEBUG1 0
|
||||
|
||||
/* Set to >0 to randomly drop received frags. The higher the number,
|
||||
the more frequent the drops. */
|
||||
#define WANT_RECV_FRAG_DROPS 0
|
||||
|
@ -72,8 +72,8 @@ ompi_btl_usnic_handle_ack(
|
||||
|
||||
/* ignore if this is an old ACK */
|
||||
if (ack_seq < endpoint->endpoint_ack_seq_rcvd) {
|
||||
#if MSGDEBUG
|
||||
opal_output(0, "Got OLD DUP ACK seq %d < %d\n",
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "Got OLD DUP ACK seq %"UDSEQ" < %"UDSEQ"\n",
|
||||
ack_seq, endpoint->endpoint_ack_seq_rcvd);
|
||||
#endif
|
||||
++module->num_old_dup_acks;
|
||||
@ -100,7 +100,7 @@ ompi_btl_usnic_handle_ack(
|
||||
|
||||
assert(sseg != NULL);
|
||||
assert(sseg->ss_base.us_btl_header->seq == is);
|
||||
#if MSGDEBUG
|
||||
#if MSGDEBUG1
|
||||
if (sseg->ss_hotel_room == -1) {
|
||||
opal_output(0, "=== ACKed frag in sent_frags array is not in hotel/enqueued, module %p, endpoint %p, seg %p, seq %" UDSEQ ", slot %lu",
|
||||
(void*) module, (void*) endpoint,
|
||||
@ -142,7 +142,7 @@ ompi_btl_usnic_handle_ack(
|
||||
(frag->sf_base.uf_base.des_flags &
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK))) {
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "completion callback for frag=%p, dest=%p\n",
|
||||
opal_output(0, "completion callback for put frag=%p, dest=%p\n",
|
||||
(void*)frag, frag->sf_base.uf_dst_seg[0].seg_addr.pval);
|
||||
#endif
|
||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||
@ -255,7 +255,7 @@ ompi_btl_usnic_ack_timeout(
|
||||
endpoint = seg->ss_parent_frag->sf_endpoint;
|
||||
module = endpoint->endpoint_module;
|
||||
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
{
|
||||
opal_output(0, "Send timeout! seg %p, room %d, seq %" UDSEQ "\n",
|
||||
(void*)seg, seg->ss_hotel_room,
|
||||
|
@ -466,6 +466,28 @@ ompi_btl_usnic_send_frag_return_cond(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a frag if it's all done and owned by BTL
|
||||
* If this is a PUT destination, only condition is that we own it. If it's
|
||||
* a send frag, there are other conditions, so use the specific send frag
|
||||
* return checker.
|
||||
*/
|
||||
static inline void
|
||||
ompi_btl_usnic_frag_return_cond(
|
||||
struct ompi_btl_usnic_module_t *module,
|
||||
ompi_btl_usnic_frag_t *frag)
|
||||
{
|
||||
if (OMPI_BTL_USNIC_FRAG_PUT_DEST == frag->uf_type) {
|
||||
if (OPAL_LIKELY(frag->uf_base.des_flags &
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||
ompi_btl_usnic_frag_return(module, frag);
|
||||
}
|
||||
} else {
|
||||
ompi_btl_usnic_send_frag_return_cond(module,
|
||||
(ompi_btl_usnic_send_frag_t *)frag);
|
||||
}
|
||||
}
|
||||
|
||||
static inline ompi_btl_usnic_chunk_segment_t *
|
||||
ompi_btl_usnic_chunk_segment_alloc(
|
||||
ompi_btl_usnic_module_t *module)
|
||||
|
@ -290,13 +290,18 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
|
||||
static int usnic_free(struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
ompi_btl_usnic_send_frag_t* frag = (ompi_btl_usnic_send_frag_t*)des;
|
||||
ompi_btl_usnic_frag_t* frag = (ompi_btl_usnic_frag_t*)des;
|
||||
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "usnic_free: %p\n", (void*)frag);
|
||||
opal_output(0, "usnic_free: %p (%s)\n", (void*)frag,
|
||||
usnic_frag_type(frag->uf_type));
|
||||
#endif
|
||||
/* calling free routine gives us ownership - we need to make sure
|
||||
* the flag is set for lower layers.
|
||||
*/
|
||||
frag->uf_base.des_flags |= MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
||||
|
||||
ompi_btl_usnic_send_frag_return_cond((struct ompi_btl_usnic_module_t *)btl,
|
||||
ompi_btl_usnic_frag_return_cond((struct ompi_btl_usnic_module_t *)btl,
|
||||
frag);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -359,7 +364,7 @@ usnic_prepare_src(
|
||||
uint32_t iov_count;
|
||||
size_t max_data;
|
||||
int rc;
|
||||
#ifdef MSGDEBUG2
|
||||
#if MSGDEBUG2
|
||||
size_t osize = *size;
|
||||
#endif
|
||||
|
||||
@ -1007,9 +1012,6 @@ usnic_handle_large_send(
|
||||
|
||||
/* save back pointer to fragment */
|
||||
sseg->ss_parent_frag = frag;
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "handle large, frag=%p, cur_sge=%d, sge_left=%d left=%d\n", frag, lfrag->lsf_cur_sge, lfrag->lsf_bytes_left_in_sge, lfrag->lsf_bytes_left);
|
||||
#endif
|
||||
|
||||
/* keep copying in as long as we have space and there is data
|
||||
* to be copied.
|
||||
@ -1097,7 +1099,7 @@ usnic_handle_large_send(
|
||||
(frag->sf_base.uf_base.des_flags &
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "callback for large frag %p, len=%zd\n",
|
||||
(void *)(uintptr_t)frag->sf_base.uf_base.des_cbfunc,
|
||||
frag->sf_size);
|
||||
@ -1206,7 +1208,7 @@ ompi_btl_usnic_module_progress_sends(
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) ==
|
||||
(MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "callback frag small %p, len=%"PRIu64"\n",
|
||||
(void*)frag,
|
||||
(unsigned long)frag->sf_base.uf_src_seg[0].seg_len);
|
||||
@ -1361,7 +1363,7 @@ usnic_send(
|
||||
sseg->ss_send_desc.send_flags |= IBV_SEND_INLINE;
|
||||
sseg->ss_channel = USNIC_PRIORITY_CHANNEL;
|
||||
sseg->ss_base.us_btl_header->tag = tag;
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "INLINE send, conv=%p", (void *)frag->sf_convertor);
|
||||
#endif
|
||||
|
||||
@ -1392,7 +1394,7 @@ usnic_send(
|
||||
*/
|
||||
if (descriptor->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
|
||||
if (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "immediate callback for frag %p\n", (void *)frag);
|
||||
#endif
|
||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||
@ -1401,7 +1403,7 @@ usnic_send(
|
||||
rc = 0;
|
||||
descriptor->des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
} else {
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "skipping callback for frag %p\n", (void *)frag);
|
||||
#endif
|
||||
rc = 1;
|
||||
|
@ -80,11 +80,9 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
ompi_btl_usnic_sprintf_gid_mac(dest_mac,
|
||||
&seg->rs_protocol_header->grh.dgid);
|
||||
|
||||
#if MSGDEBUG
|
||||
opal_output(0, "Got message from MAC %s", src_mac);
|
||||
opal_output(0, "Looking for sender: 0x%016lx",
|
||||
bseg->us_btl_header->sender);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Find out who sent this segment */
|
||||
@ -143,6 +141,11 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
if (hdr->put_addr == NULL) {
|
||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||
seg->rs_segment.seg_len = hdr->payload_len;
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "small recv complete, pass up %u bytes, tag=%d\n",
|
||||
(unsigned)bseg->us_btl_header->payload_len,
|
||||
(int)bseg->us_btl_header->tag);
|
||||
#endif
|
||||
reg->cbfunc(&module->super, hdr->tag, &seg->rs_desc, reg->cbdata);
|
||||
|
||||
/*
|
||||
@ -226,12 +229,12 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
if (fip->rfi_data == NULL) {
|
||||
abort();
|
||||
}
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "Start large recv to %p, size=%d\n",
|
||||
fip->rfi_data, chunk_hdr->ch_frag_size);
|
||||
#endif
|
||||
} else {
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
|
||||
#endif
|
||||
fip->rfi_data = chunk_hdr->ch_hdr.put_addr;
|
||||
@ -281,7 +284,7 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
|
||||
|
||||
/* Pass this segment up to the PML */
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, " large FRAG complete, pass up %p, %u bytes, tag=%d\n",
|
||||
opal_output(0, "large recv complete, pass up %p, %u bytes, tag=%d\n",
|
||||
desc.des_dst->seg_addr.pval,
|
||||
(unsigned)desc.des_dst->seg_len,
|
||||
(int)chunk_hdr->ch_hdr.tag);
|
||||
@ -302,9 +305,9 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
|
||||
(ompi_free_list_item_t *)fip->rfi_data);
|
||||
}
|
||||
|
||||
#if MSGDEBUG2
|
||||
#if MSGDEBUG1
|
||||
} else {
|
||||
opal_output(0, "PUT complete, suppressing callback\n");
|
||||
opal_output(0, "PUT recv complete, no callback\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -100,7 +100,7 @@ ompi_btl_usnic_update_window(
|
||||
endpoint->endpoint_next_contig_seq_to_recv++;
|
||||
i = WINDOW_SIZE_MOD(i + 1);
|
||||
|
||||
#if MSGDEBUG
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "Advance window to %d; next seq to send %" UDSEQ, i,
|
||||
endpoint->endpoint_next_contig_seq_to_recv);
|
||||
#endif
|
||||
@ -154,7 +154,7 @@ ompi_btl_usnic_check_rx_seq(
|
||||
seq = seg->rs_base.us_btl_header->seq;
|
||||
if (seq < endpoint->endpoint_next_contig_seq_to_recv ||
|
||||
seq >= endpoint->endpoint_next_contig_seq_to_recv + WINDOW_SIZE) {
|
||||
#if MSGDEBUG
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ " outside of window (%" UDSEQ " - %" UDSEQ "), %p, module %p -- DROPPED\n",
|
||||
(void*)endpoint, seg->rs_base.us_btl_header->seq,
|
||||
endpoint->endpoint_next_contig_seq_to_recv,
|
||||
@ -199,7 +199,7 @@ ompi_btl_usnic_check_rx_seq(
|
||||
i = seq - endpoint->endpoint_next_contig_seq_to_recv;
|
||||
i = WINDOW_SIZE_MOD(i + endpoint->endpoint_rfstart);
|
||||
if (endpoint->endpoint_rcvd_segs[i]) {
|
||||
#if MSGDEBUG
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ ", seg %p: duplicate -- DROPPED\n",
|
||||
(void*) endpoint, seg->rs_base.us_btl_header->seq, (void*) seg);
|
||||
#endif
|
||||
@ -354,10 +354,6 @@ ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
ompi_btl_usnic_endpoint_t *endpoint;
|
||||
int rc;
|
||||
#if MSGDEBUG1
|
||||
char src_mac[32];
|
||||
char dest_mac[32];
|
||||
#endif
|
||||
|
||||
bseg = &seg->rs_base;
|
||||
|
||||
|
@ -231,10 +231,10 @@ ompi_btl_usnic_endpoint_enqueue_frag(
|
||||
ompi_btl_usnic_send_frag_t *frag)
|
||||
{
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "enq_frag: frag=%p, endpoint=%p, %s, len=%"PRIu64"\n",
|
||||
opal_output(0, "enq_frag: frag=%p, endpoint=%p, %s, len=%lu\n",
|
||||
(void*)frag, (void*)endpoint,
|
||||
usnic_frag_type(frag->sf_base.uf_type),
|
||||
frag->sf_base.uf_base.des_src->seg_len);
|
||||
(long unsigned)frag->sf_base.uf_base.des_src->seg_len);
|
||||
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_LARGE_SEND) {
|
||||
ompi_btl_usnic_large_send_frag_t *lfrag;
|
||||
lfrag = (ompi_btl_usnic_large_send_frag_t *)frag;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user