Moved replay macros to functions. The performance improvement in process recovery does not worth the debugging hassle.
This commit was SVN r15703.
Этот коммит содержится в:
родитель
5a792a3fad
Коммит
a5d0e53bb3
@ -8,79 +8,91 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "vprotocol_pessimist.h"
|
||||
#include "vprotocol_pessimist_eventlog.h"
|
||||
|
||||
void vprotocol_pessimist_matching_replay(int *src) {
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
vprotocol_pessimist_clock_t max = 0;
|
||||
#endif
|
||||
mca_vprotocol_pessimist_event_t *event;
|
||||
|
||||
#if 0
|
||||
static inline void replay_delivery_order(
|
||||
struct mca_ptl_base_module_t* ptl,
|
||||
mca_ptl_base_recv_request_t* req,
|
||||
size_t bytes_received,
|
||||
size_t bytes_delivered)
|
||||
{
|
||||
vprotocol_pessimist_delivery_pending_t *delivery;
|
||||
vprotocol_pessimist_event_t *event;
|
||||
|
||||
/* store it as finished but not delivered message in the list */
|
||||
delivery = OBJ_NEW(vprotocol_pessimist_delivery_pending_t);
|
||||
delivery->ptl = ptl;
|
||||
delivery->req = req;
|
||||
delivery->bytes_received = bytes_received;
|
||||
delivery->bytes_delivered = bytes_delivered;
|
||||
opal_list_append(&mca_vprotocol_pessimist.replay_delivery_pendings, (opal_list_item_t *) delivery);
|
||||
|
||||
/* deliver as many message as possible in the delivery pending list */
|
||||
scanpendings:
|
||||
if(opal_list_is_empty(&mca_vprotocol_pessimist.replay_events))
|
||||
{
|
||||
/* there is no more events to be replayed, leaving replay mode */
|
||||
OPAL_OUTPUT((mca_pml_v_output, "Leaving replay mode"));
|
||||
mca_vprotocol_pessimist.replay = false;
|
||||
/* deliver (and log) any message still waiting in delivery_pendings */
|
||||
while(! opal_list_is_empty(&mca_vprotocol_pessimist.replay_delivery_pendings))
|
||||
/* searching this request in the event list */
|
||||
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
|
||||
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events);
|
||||
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event))
|
||||
{
|
||||
delivery = (vprotocol_pessimist_delivery_pending_t *) opal_list_remove_first(&mca_vprotocol_pessimist.replay_delivery_pendings);
|
||||
log_event(delivery->req);
|
||||
OPAL_OUTPUT((mca_pml_v_output, "deliver\trecv %d:%lx\tfrom %d\ttag %d\tsize %d\tmatched src %d",
|
||||
delivery->req->req_recv.req_base.req_comm->c_contextid, (long) delivery->req->req_recv.req_base.req_sequence,
|
||||
delivery->req->req_recv.req_base.req_peer, delivery->req->req_recv.req_base.req_tag, delivery->req->req_recv.req_base.req_count,
|
||||
delivery->req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE));
|
||||
mca_pml_v.host_pml_recv_progress(delivery->ptl, delivery->req, delivery->bytes_received, delivery->bytes_delivered);
|
||||
OBJ_RELEASE(delivery);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* search the event to replay in the delivery pending list */
|
||||
event = (vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
|
||||
|
||||
for(delivery = (vprotocol_pessimist_delivery_pending_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_delivery_pendings);
|
||||
delivery != (vprotocol_pessimist_delivery_pending_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_delivery_pendings);
|
||||
delivery = (vprotocol_pessimist_delivery_pending_t *) opal_list_get_next(delivery))
|
||||
{
|
||||
if((delivery->req->req_recv.req_base.req_comm->c_contextid == event->contextid) &&
|
||||
(delivery->req->req_recv.req_base.req_sequence == event->rclock))
|
||||
{
|
||||
/* this is the request matching the first event to be replayed, let's go */
|
||||
OPAL_OUTPUT((mca_pml_v_output, "deliver\trecv %d:%lx\tfrom %d\ttag %d\tsize %d\tmatched src %d",
|
||||
delivery->req->req_recv.req_base.req_comm->c_contextid, (long) delivery->req->req_recv.req_base.req_sequence,
|
||||
delivery->req->req_recv.req_base.req_peer, delivery->req->req_recv.req_base.req_tag, delivery->req->req_recv.req_base.req_count,
|
||||
delivery->req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE));
|
||||
mca_pml_v.host_pml_recv_progress(delivery->ptl, delivery->req, delivery->bytes_received, delivery->bytes_delivered);
|
||||
|
||||
opal_list_remove_first(&mca_vprotocol_pessimist.replay_events);
|
||||
OBJ_RELEASE(event);
|
||||
|
||||
opal_list_remove_item(&mca_vprotocol_pessimist.replay_delivery_pendings, (opal_list_item_t *) delivery);
|
||||
OBJ_RELEASE(delivery);
|
||||
|
||||
/* the first event has been delivered, let's see if the second can be delivered now */
|
||||
goto scanpendings;
|
||||
}
|
||||
vprotocol_pessimist_matching_event_t *mevent;
|
||||
|
||||
if(VPROTOCOL_PESSIMIST_EVENT_TYPE_MATCHING != event->type) continue;
|
||||
mevent = &(event->u_event.e_matching);
|
||||
if(mevent->reqid == mca_vprotocol_pessimist.clock)
|
||||
{
|
||||
/* this is the event to replay */
|
||||
V_OUTPUT_VERBOSE(70, "pessimist: replay\tmatch\t%x\trecv is forced from %d", mevent->reqid, mevent->src);
|
||||
(*src) = mevent->src;
|
||||
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
|
||||
(opal_list_item_t *) event);
|
||||
VPESSIMIST_EVENT_RETURN(event);
|
||||
}
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
else if(mevent->reqid > max)
|
||||
max = mevent->reqid;
|
||||
}
|
||||
/* the event to be replayed is not in the delivery_pendings list, we've delivered as much as we can for now */
|
||||
}
|
||||
/* not forcing a ANY SOURCE event whose recieve clock is lower than max
|
||||
* is a bug indicating we have missed an event during logging ! */
|
||||
assert(((*src) != MPI_ANY_SOURCE) || (mca_vprotocol_pessimist.clock > max));
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* 0 */
|
||||
void vprotocol_pessimist_delivery_replay(size_t n, ompi_request_t **reqs,
|
||||
int *index, ompi_status_public_t *status) {
|
||||
mca_vprotocol_pessimist_event_t *event;
|
||||
|
||||
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
|
||||
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events);
|
||||
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event))
|
||||
{
|
||||
vprotocol_pessimist_delivery_event_t *devent;
|
||||
|
||||
if(VPROTOCOL_PESSIMIST_EVENT_TYPE_DELIVERY != event->type) continue;
|
||||
devent = &(event->u_event.e_delivery);
|
||||
if(devent->probeid < mca_vprotocol_pessimist.clock)
|
||||
{
|
||||
/* this particular test have to return no request completed yet */
|
||||
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock);
|
||||
*index = MPI_UNDEFINED;
|
||||
mca_vprotocol_pessimist.clock++;
|
||||
/* This request have to stay in the queue until probeid matches */
|
||||
return;
|
||||
}
|
||||
else if(devent->probeid == mca_vprotocol_pessimist.clock)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < (int) n; i++)
|
||||
{
|
||||
if(VPESSIMIST_REQ(reqs[i])->reqid == devent->reqid)
|
||||
{
|
||||
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\t%x", devent->probeid, devent->reqid);
|
||||
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
|
||||
(opal_list_item_t *) event);
|
||||
VPESSIMIST_EVENT_RETURN(event);
|
||||
*index = i;
|
||||
mca_vprotocol_pessimist.clock++;
|
||||
ompi_request_wait(&reqs[i], status);
|
||||
return;
|
||||
}
|
||||
}
|
||||
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock);
|
||||
assert(devent->reqid == 0); /* make sure we don't missed a request */
|
||||
*index = MPI_UNDEFINED;
|
||||
mca_vprotocol_pessimist.clock++;
|
||||
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
|
||||
(opal_list_item_t *) event);
|
||||
VPESSIMIST_EVENT_RETURN(event);
|
||||
return;
|
||||
}
|
||||
}
|
||||
V_OUTPUT_VERBOSE(50, "pessimist:\treplay\tdeliver\t%x\tnot forced", mca_vprotocol_pessimist.clock);
|
||||
}
|
||||
|
@ -125,41 +125,7 @@
|
||||
if(mca_vprotocol_pessimist.replay && ((src) == MPI_ANY_SOURCE)) \
|
||||
vprotocol_pessimist_matching_replay(&(src)); \
|
||||
} while(0)
|
||||
|
||||
static inline void vprotocol_pessimist_matching_replay(int *src)
|
||||
{
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
vprotocol_pessimist_clock_t max = 0;
|
||||
#endif
|
||||
mca_vprotocol_pessimist_event_t *event;
|
||||
|
||||
/* searching this request in the event list */
|
||||
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
|
||||
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events);
|
||||
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event))
|
||||
{
|
||||
vprotocol_pessimist_matching_event_t *mevent = &(event->u_event.e_matching);
|
||||
if(mevent->reqid == mca_vprotocol_pessimist.clock)
|
||||
{
|
||||
/* this is the event to replay */
|
||||
V_OUTPUT_VERBOSE(70, "pessimist: replay\tmatch\t%x\trecv is forced from %d", mevent->reqid, mevent->src);
|
||||
(*src) = mevent->src;
|
||||
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
|
||||
(opal_list_item_t *) event);
|
||||
VPESSIMIST_EVENT_RETURN(event);
|
||||
}
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
else if(mevent->reqid > max)
|
||||
max = mevent->reqid;
|
||||
}
|
||||
/* not forcing a ANY SOURCE event whose recieve clock is lower than max
|
||||
* is a bug indicating we have missed an event during logging ! */
|
||||
assert(((*src) != MPI_ANY_SOURCE) ||
|
||||
(mca_vprotocol_pessimist.clock > max));
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void vprotocol_pessimist_matching_replay(int *src);
|
||||
|
||||
/*******************************************************************************
|
||||
* WAIT/TEST-SOME/ANY & PROBES
|
||||
@ -208,59 +174,14 @@ static inline void vprotocol_pessimist_matching_replay(int *src)
|
||||
* event clock
|
||||
* n (IN): the number of input requests
|
||||
* reqs (IN): the set of considered requests (pml_base_request_t *)
|
||||
* i (IN/OUT): index of the delivered request
|
||||
* c (IN/OUT): counter for number of delivered requests (currently only 0 or 1)
|
||||
* i (IN/OUT): index(es) of the delivered request (currently always 1 at a time)
|
||||
* status (IN/OUT): status of the delivered request
|
||||
*/
|
||||
#define VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(n, reqs, i, c, status) do { \
|
||||
#define VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(n, reqs, i, status) do { \
|
||||
if(mca_vprotocol_pessimist.replay) \
|
||||
{ \
|
||||
mca_vprotocol_pessimist_event_t *event; \
|
||||
\
|
||||
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events); \
|
||||
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events); \
|
||||
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event)) \
|
||||
{ \
|
||||
vprotocol_pessimist_delivery_event_t *devent = &(event->u_event.e_delivery); \
|
||||
\
|
||||
if(event->type == VPROTOCOL_PESSIMIST_EVENT_TYPE_MATCHING) continue; \
|
||||
if(devent->probeid < mca_vprotocol_pessimist.clock) \
|
||||
{ \
|
||||
/* this particular test have to return no request completed yet */ \
|
||||
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock); \
|
||||
(i) = MPI_UNDEFINED; \
|
||||
(c) = 0; \
|
||||
mca_vprotocol_pessimist.clock++; \
|
||||
return OMPI_SUCCESS; \
|
||||
} \
|
||||
else if(devent->probeid == mca_vprotocol_pessimist.clock) \
|
||||
{ \
|
||||
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\t%x", devent->probeid, devent->reqid); \
|
||||
for((i) = 0; (i) < (n); (i)++) \
|
||||
{ \
|
||||
if(VPESSIMIST_REQ(reqs[i])->reqid == devent->reqid) \
|
||||
{ \
|
||||
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events, \
|
||||
(opal_list_item_t *) event); \
|
||||
VPESSIMIST_EVENT_RETURN(event); \
|
||||
(c) = 1; \
|
||||
mca_vprotocol_pessimist.clock++; \
|
||||
return ompi_request_wait(&reqs[i], status); \
|
||||
} \
|
||||
} \
|
||||
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock); \
|
||||
assert(devent->reqid == 0); \
|
||||
(i) = MPI_UNDEFINED; \
|
||||
(c) = 0; \
|
||||
mca_vprotocol_pessimist.clock++; \
|
||||
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events, \
|
||||
(opal_list_item_t *) event); \
|
||||
VPESSIMIST_EVENT_RETURN(event); \
|
||||
return OMPI_SUCCESS; \
|
||||
} \
|
||||
} \
|
||||
V_OUTPUT_VERBOSE(50, "pessimist:\treplay\tdeliver\t%x\tnot forced", mca_vprotocol_pessimist.clock); \
|
||||
} \
|
||||
vprotocol_pessimist_delivery_replay(n, reqs, i, status); \
|
||||
} while(0)
|
||||
void vprotocol_pessimist_delivery_replay(size_t, ompi_request_t **,
|
||||
int *, ompi_status_public_t *);
|
||||
|
||||
#endif /* __VPROTOCOL_PESSIMIST_EVENTLOG_H__ */
|
||||
|
@ -24,10 +24,8 @@ int mca_vprotocol_pessimist_wait_any(size_t count, ompi_request_t ** requests, i
|
||||
{
|
||||
int ret;
|
||||
size_t i;
|
||||
int c;
|
||||
|
||||
|
||||
VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(count, requests, *index, c, status);
|
||||
VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(count, requests, index, status);
|
||||
|
||||
# define pml_req ((mca_pml_base_request_t *) requests[i])
|
||||
/* Avoid the request to be disposed by waitall */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user