1
1
* Make recv short block events use the callback mechanism so that can
  add overflow debugging

This commit was SVN r25212.
Этот коммит содержится в:
Brian Barrett 2011-09-29 21:59:48 +00:00
родитель c08ea5c0f5
Коммит 758f8a4d87
6 изменённых файлов: 63 добавлений и 26 удалений

Просмотреть файл

@ -205,10 +205,14 @@ ompi_mtl_portals4_progress(void)
case PTL_EVENT_AUTO_UNLINK:
break;
case PTL_EVENT_AUTO_FREE:
if (OMPI_SUCCESS != (ret = ompi_mtl_portals4_recv_short_block_repost(&ev))) {
opal_output(ompi_mtl_base_output,
"Error returned from PTL_EVENT_FREE callback: %d", ret);
abort();
if (NULL != ev.user_ptr) {
ptl_request = ev.user_ptr;
ret = ptl_request->event_callback(&ev, ptl_request);
if (OMPI_SUCCESS != ret) {
opal_output(ompi_mtl_base_output,
"Error returned from auto_free event callback: %d", ret);
abort();
}
}
break;
case PTL_EVENT_SEARCH:

Просмотреть файл

@ -60,7 +60,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ptl_request->super.super.ompi_req->req_status.MPI_TAG =
MTL_PORTALS4_GET_TAG(ev->match_bits);
if (msg_length > ptl_request->delivery_len) {
opal_output(ompi_mtl_base_output, "truncate expected: %d %d",
opal_output(ompi_mtl_base_output, "truncate expected: %ld %ld",
msg_length, ptl_request->delivery_len);
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
}
@ -183,8 +183,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ptl_request->super.super.ompi_req->req_status.MPI_TAG =
MTL_PORTALS4_GET_TAG(ev->match_bits);
if (msg_length > ptl_request->delivery_len) {
opal_output(ompi_mtl_base_output, "truncate unexpected: %d %d",
msg_length, ptl_request->delivery_len);
opal_output(ompi_mtl_base_output, "truncate unexpected: %ld %ld %d",
msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
}
@ -237,8 +237,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short",
ptl_request->opcount, ptl_request->hdr_data));
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short (0x%lx)",
ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
ptl_request->super.super.completion_callback(&ptl_request->super.super);
} else {
@ -357,10 +357,10 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"Recv %d from %x,%x of length %d\n",
"Recv %d from %x,%x of length %d (0x%lx, 0x%lx)\n",
ptl_request->opcount,
remote_proc.phys.nid, remote_proc.phys.pid,
(int)length));
(int)length, match_bits, ignore_bits));
me.start = start;
me.length = length;

Просмотреть файл

@ -30,6 +30,34 @@ OBJ_CLASS_INSTANCE(ompi_mtl_portals4_recv_short_block_t,
opal_list_item_t,
NULL, NULL);
static inline int ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block);
static int
ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
ompi_mtl_portals4_base_request_t* ptl_base_request)
{
ompi_mtl_portals4_recv_short_request_t *ptl_request =
(ompi_mtl_portals4_recv_short_request_t*) ptl_base_request;
ompi_mtl_portals4_recv_short_block_t *block = ptl_request->block;
if (PTL_EVENT_AUTO_FREE == ev->type) {
return ompi_mtl_portals4_activate_block(block);
} else {
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"OVERFLOW EVENT %d, hdr_data = %lx", ev->type, ev->hdr_data));
}
return OMPI_SUCCESS;
}
int
ompi_mtl_portals4_recv_short_block_repost(ptl_event_t *ev)
{
return ompi_mtl_portals4_activate_block(ev->user_ptr);
}
static ompi_mtl_portals4_recv_short_block_t*
ompi_mtl_portals4_recv_short_block_init(mca_mtl_portals4_module_t *mtl)
{
@ -41,6 +69,8 @@ ompi_mtl_portals4_recv_short_block_init(mca_mtl_portals4_module_t *mtl)
if (block->start == NULL) return NULL;
block->me_h = PTL_INVALID_HANDLE;
block->request.block = block;
block->request.super.event_callback = ompi_mtl_portals4_recv_block_progress;
return block;
}
@ -81,7 +111,10 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
me.min_free = block->mtl->eager_limit;
me.uid = PTL_UID_ANY;
me.options = PTL_ME_OP_PUT | PTL_ME_MANAGE_LOCAL | PTL_ME_NO_TRUNCATE |
PTL_ME_MAY_ALIGN | PTL_ME_ACK_DISABLE | PTL_ME_EVENT_COMM_DISABLE;
PTL_ME_MAY_ALIGN | PTL_ME_ACK_DISABLE;
#if OPAL_ENABLE_DEBUG
me.options |= PTL_ME_EVENT_COMM_DISABLE;
#endif
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
me.match_bits = match_bits;
@ -91,19 +124,12 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
ompi_mtl_portals4.send_idx,
&me,
PTL_OVERFLOW,
block,
&block->request,
&block->me_h);
return (ret == PTL_OK) ? OMPI_SUCCESS : ompi_mtl_portals4_get_error(ret);
}
int
ompi_mtl_portals4_recv_short_block_repost(ptl_event_t *ev)
{
return ompi_mtl_portals4_activate_block(ev->user_ptr);
}
int
ompi_mtl_portals4_recv_short_init(mca_mtl_portals4_module_t *mtl)
{

Просмотреть файл

@ -25,6 +25,7 @@ struct ompi_mtl_portals4_recv_short_block_t {
mca_mtl_portals4_module_t *mtl;
void *start;
ptl_handle_me_t me_h;
struct ompi_mtl_portals4_recv_short_request_t request;
};
typedef struct ompi_mtl_portals4_recv_short_block_t ompi_mtl_portals4_recv_short_block_t;
OBJ_CLASS_DECLARATION(ompi_mtl_portals4_recv_short_block_t);

Просмотреть файл

@ -66,12 +66,18 @@ struct ompi_mtl_portals4_probe_request_t {
};
typedef struct ompi_mtl_portals4_probe_request_t ompi_mtl_portals4_probe_request_t;
struct ompi_mtl_portals4_recv_short_request_t {
ompi_mtl_portals4_base_request_t super;
struct ompi_mtl_portals4_recv_short_block_t *block;
};
typedef struct ompi_mtl_portals4_recv_short_request_t ompi_mtl_portals4_recv_short_request_t;
struct ompi_mtl_portals4_request_t {
union {
ompi_mtl_portals4_send_request_t send;
ompi_mtl_portals4_recv_request_t recv;
ompi_mtl_portals4_probe_request_t probe;
ompi_mtl_portals4_recv_short_request_t recv_short;
} u;
};
typedef struct ompi_mtl_portals4_request_t ompi_mtl_portals4_request_t;

Просмотреть файл

@ -123,8 +123,8 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"Send %d short send with hdr_data 0x%lx",
ptl_request->opcount, hdr_data));
"Send %d short send with hdr_data 0x%lx (0x%lx)",
ptl_request->opcount, hdr_data, match_bits));
ret = PtlPut(ptl_request->md_h,
0,
@ -208,8 +208,8 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int tag,
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"Send %d short sync send with hdr_data 0x%lx",
ptl_request->opcount, hdr_data));
"Send %d short sync send with hdr_data 0x%lx (0x%lx)",
ptl_request->opcount, hdr_data, match_bits));
ret = PtlPut(ptl_request->md_h,
0,
@ -294,8 +294,8 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"Send %d long send with hdr_data 0x%lx",
ptl_request->opcount, hdr_data));
"Send %d long send with hdr_data 0x%lx (0x%lx)",
ptl_request->opcount, hdr_data, match_bits));
if (ompi_mtl_portals4.protocol == rndv) {
ret = PtlPut(ptl_request->md_h,