1
1

Moved replay macros to functions. The performance improvement in process recovery does not worth the debugging hassle.

This commit was SVN r15703.
Этот коммит содержится в:
Aurelien Bouteiller 2007-07-31 16:01:32 +00:00
родитель 5a792a3fad
Коммит a5d0e53bb3
3 изменённых файлов: 89 добавлений и 158 удалений

Просмотреть файл

@ -8,79 +8,91 @@
* $HEADER$
*/
#include "ompi_config.h"
#include "vprotocol_pessimist.h"
#include "vprotocol_pessimist_eventlog.h"
void vprotocol_pessimist_matching_replay(int *src) {
#if OMPI_ENABLE_DEBUG
vprotocol_pessimist_clock_t max = 0;
#endif
mca_vprotocol_pessimist_event_t *event;
#if 0
static inline void replay_delivery_order(
struct mca_ptl_base_module_t* ptl,
mca_ptl_base_recv_request_t* req,
size_t bytes_received,
size_t bytes_delivered)
{
vprotocol_pessimist_delivery_pending_t *delivery;
vprotocol_pessimist_event_t *event;
/* store it as finished but not delivered message in the list */
delivery = OBJ_NEW(vprotocol_pessimist_delivery_pending_t);
delivery->ptl = ptl;
delivery->req = req;
delivery->bytes_received = bytes_received;
delivery->bytes_delivered = bytes_delivered;
opal_list_append(&mca_vprotocol_pessimist.replay_delivery_pendings, (opal_list_item_t *) delivery);
/* deliver as many message as possible in the delivery pending list */
scanpendings:
if(opal_list_is_empty(&mca_vprotocol_pessimist.replay_events))
{
/* there is no more events to be replayed, leaving replay mode */
OPAL_OUTPUT((mca_pml_v_output, "Leaving replay mode"));
mca_vprotocol_pessimist.replay = false;
/* deliver (and log) any message still waiting in delivery_pendings */
while(! opal_list_is_empty(&mca_vprotocol_pessimist.replay_delivery_pendings))
/* searching this request in the event list */
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events);
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event))
{
delivery = (vprotocol_pessimist_delivery_pending_t *) opal_list_remove_first(&mca_vprotocol_pessimist.replay_delivery_pendings);
log_event(delivery->req);
OPAL_OUTPUT((mca_pml_v_output, "deliver\trecv %d:%lx\tfrom %d\ttag %d\tsize %d\tmatched src %d",
delivery->req->req_recv.req_base.req_comm->c_contextid, (long) delivery->req->req_recv.req_base.req_sequence,
delivery->req->req_recv.req_base.req_peer, delivery->req->req_recv.req_base.req_tag, delivery->req->req_recv.req_base.req_count,
delivery->req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE));
mca_pml_v.host_pml_recv_progress(delivery->ptl, delivery->req, delivery->bytes_received, delivery->bytes_delivered);
OBJ_RELEASE(delivery);
}
}
else
{
/* search the event to replay in the delivery pending list */
event = (vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
for(delivery = (vprotocol_pessimist_delivery_pending_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_delivery_pendings);
delivery != (vprotocol_pessimist_delivery_pending_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_delivery_pendings);
delivery = (vprotocol_pessimist_delivery_pending_t *) opal_list_get_next(delivery))
{
if((delivery->req->req_recv.req_base.req_comm->c_contextid == event->contextid) &&
(delivery->req->req_recv.req_base.req_sequence == event->rclock))
{
/* this is the request matching the first event to be replayed, let's go */
OPAL_OUTPUT((mca_pml_v_output, "deliver\trecv %d:%lx\tfrom %d\ttag %d\tsize %d\tmatched src %d",
delivery->req->req_recv.req_base.req_comm->c_contextid, (long) delivery->req->req_recv.req_base.req_sequence,
delivery->req->req_recv.req_base.req_peer, delivery->req->req_recv.req_base.req_tag, delivery->req->req_recv.req_base.req_count,
delivery->req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE));
mca_pml_v.host_pml_recv_progress(delivery->ptl, delivery->req, delivery->bytes_received, delivery->bytes_delivered);
opal_list_remove_first(&mca_vprotocol_pessimist.replay_events);
OBJ_RELEASE(event);
opal_list_remove_item(&mca_vprotocol_pessimist.replay_delivery_pendings, (opal_list_item_t *) delivery);
OBJ_RELEASE(delivery);
/* the first event has been delivered, let's see if the second can be delivered now */
goto scanpendings;
}
vprotocol_pessimist_matching_event_t *mevent;
if(VPROTOCOL_PESSIMIST_EVENT_TYPE_MATCHING != event->type) continue;
mevent = &(event->u_event.e_matching);
if(mevent->reqid == mca_vprotocol_pessimist.clock)
{
/* this is the event to replay */
V_OUTPUT_VERBOSE(70, "pessimist: replay\tmatch\t%x\trecv is forced from %d", mevent->reqid, mevent->src);
(*src) = mevent->src;
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
(opal_list_item_t *) event);
VPESSIMIST_EVENT_RETURN(event);
}
#if OMPI_ENABLE_DEBUG
else if(mevent->reqid > max)
max = mevent->reqid;
}
/* the event to be replayed is not in the delivery_pendings list, we've delivered as much as we can for now */
}
/* not forcing a ANY SOURCE event whose recieve clock is lower than max
* is a bug indicating we have missed an event during logging ! */
assert(((*src) != MPI_ANY_SOURCE) || (mca_vprotocol_pessimist.clock > max));
#else
}
#endif
}
#endif /* 0 */
void vprotocol_pessimist_delivery_replay(size_t n, ompi_request_t **reqs,
int *index, ompi_status_public_t *status) {
mca_vprotocol_pessimist_event_t *event;
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events);
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event))
{
vprotocol_pessimist_delivery_event_t *devent;
if(VPROTOCOL_PESSIMIST_EVENT_TYPE_DELIVERY != event->type) continue;
devent = &(event->u_event.e_delivery);
if(devent->probeid < mca_vprotocol_pessimist.clock)
{
/* this particular test have to return no request completed yet */
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock);
*index = MPI_UNDEFINED;
mca_vprotocol_pessimist.clock++;
/* This request have to stay in the queue until probeid matches */
return;
}
else if(devent->probeid == mca_vprotocol_pessimist.clock)
{
int i;
for(i = 0; i < (int) n; i++)
{
if(VPESSIMIST_REQ(reqs[i])->reqid == devent->reqid)
{
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\t%x", devent->probeid, devent->reqid);
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
(opal_list_item_t *) event);
VPESSIMIST_EVENT_RETURN(event);
*index = i;
mca_vprotocol_pessimist.clock++;
ompi_request_wait(&reqs[i], status);
return;
}
}
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock);
assert(devent->reqid == 0); /* make sure we don't missed a request */
*index = MPI_UNDEFINED;
mca_vprotocol_pessimist.clock++;
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
(opal_list_item_t *) event);
VPESSIMIST_EVENT_RETURN(event);
return;
}
}
V_OUTPUT_VERBOSE(50, "pessimist:\treplay\tdeliver\t%x\tnot forced", mca_vprotocol_pessimist.clock);
}

Просмотреть файл

@ -125,41 +125,7 @@
if(mca_vprotocol_pessimist.replay && ((src) == MPI_ANY_SOURCE)) \
vprotocol_pessimist_matching_replay(&(src)); \
} while(0)
static inline void vprotocol_pessimist_matching_replay(int *src)
{
#if OMPI_ENABLE_DEBUG
vprotocol_pessimist_clock_t max = 0;
#endif
mca_vprotocol_pessimist_event_t *event;
/* searching this request in the event list */
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events);
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events);
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event))
{
vprotocol_pessimist_matching_event_t *mevent = &(event->u_event.e_matching);
if(mevent->reqid == mca_vprotocol_pessimist.clock)
{
/* this is the event to replay */
V_OUTPUT_VERBOSE(70, "pessimist: replay\tmatch\t%x\trecv is forced from %d", mevent->reqid, mevent->src);
(*src) = mevent->src;
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events,
(opal_list_item_t *) event);
VPESSIMIST_EVENT_RETURN(event);
}
#if OMPI_ENABLE_DEBUG
else if(mevent->reqid > max)
max = mevent->reqid;
}
/* not forcing a ANY SOURCE event whose recieve clock is lower than max
* is a bug indicating we have missed an event during logging ! */
assert(((*src) != MPI_ANY_SOURCE) ||
(mca_vprotocol_pessimist.clock > max));
#else
}
#endif
}
void vprotocol_pessimist_matching_replay(int *src);
/*******************************************************************************
* WAIT/TEST-SOME/ANY & PROBES
@ -208,59 +174,14 @@ static inline void vprotocol_pessimist_matching_replay(int *src)
* event clock
* n (IN): the number of input requests
* reqs (IN): the set of considered requests (pml_base_request_t *)
* i (IN/OUT): index of the delivered request
* c (IN/OUT): counter for number of delivered requests (currently only 0 or 1)
* i (IN/OUT): index(es) of the delivered request (currently always 1 at a time)
* status (IN/OUT): status of the delivered request
*/
#define VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(n, reqs, i, c, status) do { \
#define VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(n, reqs, i, status) do { \
if(mca_vprotocol_pessimist.replay) \
{ \
mca_vprotocol_pessimist_event_t *event; \
\
for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.replay_events); \
event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events); \
event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event)) \
{ \
vprotocol_pessimist_delivery_event_t *devent = &(event->u_event.e_delivery); \
\
if(event->type == VPROTOCOL_PESSIMIST_EVENT_TYPE_MATCHING) continue; \
if(devent->probeid < mca_vprotocol_pessimist.clock) \
{ \
/* this particular test have to return no request completed yet */ \
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock); \
(i) = MPI_UNDEFINED; \
(c) = 0; \
mca_vprotocol_pessimist.clock++; \
return OMPI_SUCCESS; \
} \
else if(devent->probeid == mca_vprotocol_pessimist.clock) \
{ \
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\t%x", devent->probeid, devent->reqid); \
for((i) = 0; (i) < (n); (i)++) \
{ \
if(VPESSIMIST_REQ(reqs[i])->reqid == devent->reqid) \
{ \
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events, \
(opal_list_item_t *) event); \
VPESSIMIST_EVENT_RETURN(event); \
(c) = 1; \
mca_vprotocol_pessimist.clock++; \
return ompi_request_wait(&reqs[i], status); \
} \
} \
V_OUTPUT_VERBOSE(70, "pessimist:\treplay\tdeliver\t%x\tnone", mca_vprotocol_pessimist.clock); \
assert(devent->reqid == 0); \
(i) = MPI_UNDEFINED; \
(c) = 0; \
mca_vprotocol_pessimist.clock++; \
opal_list_remove_item(&mca_vprotocol_pessimist.replay_events, \
(opal_list_item_t *) event); \
VPESSIMIST_EVENT_RETURN(event); \
return OMPI_SUCCESS; \
} \
} \
V_OUTPUT_VERBOSE(50, "pessimist:\treplay\tdeliver\t%x\tnot forced", mca_vprotocol_pessimist.clock); \
} \
vprotocol_pessimist_delivery_replay(n, reqs, i, status); \
} while(0)
void vprotocol_pessimist_delivery_replay(size_t, ompi_request_t **,
int *, ompi_status_public_t *);
#endif /* __VPROTOCOL_PESSIMIST_EVENTLOG_H__ */

Просмотреть файл

@ -24,10 +24,8 @@ int mca_vprotocol_pessimist_wait_any(size_t count, ompi_request_t ** requests, i
{
int ret;
size_t i;
int c;
VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(count, requests, *index, c, status);
VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(count, requests, index, status);
# define pml_req ((mca_pml_base_request_t *) requests[i])
/* Avoid the request to be disposed by waitall */