Merge pull request #1840 from artpol84/yalla_perf_fix
pml/yalla: fix yalla performance regression
Этот коммит содержится в:
Коммит
06930a0423
@ -681,7 +681,6 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
int mca_pml_yalla_start(size_t count, ompi_request_t** requests)
|
||||
{
|
||||
mca_pml_yalla_base_request_t *req;
|
||||
mca_pml_yalla_send_request_t *sreq;
|
||||
mxm_error_t error;
|
||||
size_t i;
|
||||
int rc;
|
||||
@ -696,10 +695,12 @@ int mca_pml_yalla_start(size_t count, ompi_request_t** requests)
|
||||
|
||||
PML_YALLA_ASSERT(req->ompi.req_state != OMPI_REQUEST_INVALID);
|
||||
PML_YALLA_RESET_OMPI_REQ(&req->ompi, OMPI_REQUEST_ACTIVE);
|
||||
PML_YALLA_RESET_PML_REQ(req);
|
||||
|
||||
if (req->flags & MCA_PML_YALLA_REQUEST_FLAG_SEND) {
|
||||
mca_pml_yalla_send_request_t *sreq;
|
||||
sreq = (mca_pml_yalla_send_request_t *)req;
|
||||
PML_YALLA_RESET_PML_REQ(req, PML_YALLA_MXM_REQBASE(sreq));
|
||||
|
||||
if (req->flags & MCA_PML_YALLA_REQUEST_FLAG_BSEND) {
|
||||
PML_YALLA_VERBOSE(8, "start bsend request %p", (void *)sreq);
|
||||
rc = mca_pml_yalla_bsend(&sreq->mxm);
|
||||
@ -716,8 +717,12 @@ int mca_pml_yalla_start(size_t count, ompi_request_t** requests)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mca_pml_yalla_recv_request_t *rreq;
|
||||
rreq = (mca_pml_yalla_recv_request_t *)req;
|
||||
PML_YALLA_RESET_PML_REQ(req, PML_YALLA_MXM_REQBASE(rreq));
|
||||
|
||||
PML_YALLA_VERBOSE(8, "start recv request %p", (void *)req);
|
||||
error = mxm_req_recv(&((mca_pml_yalla_recv_request_t *)req)->mxm);
|
||||
error = mxm_req_recv(&rreq->mxm);
|
||||
if (MXM_OK != error) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
@ -32,10 +32,10 @@ static inline void mca_pml_yalla_request_release(mca_pml_yalla_base_request_t *r
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_pml_yalla_check_request_state(mca_pml_yalla_base_request_t *req)
|
||||
mca_pml_yalla_check_request_state(mca_pml_yalla_base_request_t *req, mxm_req_base_t *mxm_base)
|
||||
{
|
||||
if (req->mxm_base->state != MXM_REQ_COMPLETED) {
|
||||
PML_YALLA_VERBOSE(8, "request %p free called before completed", (void *)req);
|
||||
if (mxm_base->state != MXM_REQ_COMPLETED) {
|
||||
PML_YALLA_VERBOSE(8, "request %p free called before completed", (void*)req);
|
||||
req->flags |= MCA_PML_YALLA_REQUEST_FLAG_FREE_CALLED;
|
||||
return 0;
|
||||
}
|
||||
@ -45,12 +45,12 @@ mca_pml_yalla_check_request_state(mca_pml_yalla_base_request_t *req)
|
||||
|
||||
static int mca_pml_yalla_send_request_free(ompi_request_t **request)
|
||||
{
|
||||
mca_pml_yalla_base_request_t *req = (mca_pml_yalla_base_request_t*)(*request);
|
||||
mca_pml_yalla_send_request_t *sreq = (mca_pml_yalla_send_request_t*)(*request);
|
||||
|
||||
PML_YALLA_VERBOSE(9, "free send request *%p=%p", (void *)request, (void *)*request);
|
||||
|
||||
if (mca_pml_yalla_check_request_state(req)) {
|
||||
mca_pml_yalla_request_release(req, &ompi_pml_yalla.send_reqs);
|
||||
if (mca_pml_yalla_check_request_state(&sreq->super, PML_YALLA_MXM_REQBASE(sreq))) {
|
||||
mca_pml_yalla_request_release(&sreq->super, &ompi_pml_yalla.send_reqs);
|
||||
}
|
||||
|
||||
*request = MPI_REQUEST_NULL;
|
||||
@ -84,12 +84,12 @@ static int mca_pml_yalla_send_request_cancel(ompi_request_t *request, int flag)
|
||||
|
||||
static int mca_pml_yalla_recv_request_free(ompi_request_t **request)
|
||||
{
|
||||
mca_pml_yalla_base_request_t *req = (mca_pml_yalla_base_request_t*)(*request);
|
||||
mca_pml_yalla_recv_request_t *rreq = (mca_pml_yalla_recv_request_t*)(*request);
|
||||
|
||||
PML_YALLA_VERBOSE(9, "free receive request *%p=%p", (void *)request, (void *)*request);
|
||||
|
||||
if (mca_pml_yalla_check_request_state(req)) {
|
||||
mca_pml_yalla_request_release(req, &ompi_pml_yalla.recv_reqs);
|
||||
if (mca_pml_yalla_check_request_state(&rreq->super, PML_YALLA_MXM_REQBASE(rreq))) {
|
||||
mca_pml_yalla_request_release(&rreq->super, &ompi_pml_yalla.recv_reqs);
|
||||
}
|
||||
|
||||
*request = MPI_REQUEST_NULL;
|
||||
|
@ -25,15 +25,6 @@ struct pml_yalla_base_request {
|
||||
ompi_request_t ompi;
|
||||
mca_pml_yalla_convertor_t *convertor;
|
||||
int flags;
|
||||
/* overlaps with base of send/recv
|
||||
* In ISO C90, you would have to give contents a length of 1,
|
||||
* which means either you waste space or complicate the argument to malloc.
|
||||
* Note:
|
||||
* - 1 was the portable way to go, though it was rather strange
|
||||
* - 0 was better at indicating intent, but not legal as far as
|
||||
* the Standard was concerned and supported as an extension by some compilers (including gcc)
|
||||
*/
|
||||
mxm_req_base_t mxm_base[1];
|
||||
};
|
||||
|
||||
struct pml_yalla_send_request {
|
||||
@ -58,6 +49,8 @@ OBJ_CLASS_DECLARATION(mca_pml_yalla_recv_request_t);
|
||||
|
||||
void mca_pml_yalla_init_reqs(void);
|
||||
|
||||
#define PML_YALLA_MXM_REQBASE( x ) ( &((x)->mxm.base) )
|
||||
|
||||
#define PML_YALLA_RESET_OMPI_REQ(_ompi_req, _state) \
|
||||
{ \
|
||||
(_ompi_req)->req_state = _state; \
|
||||
@ -72,9 +65,9 @@ void mca_pml_yalla_init_reqs(void);
|
||||
OBJ_RETAIN(_comm); \
|
||||
}
|
||||
|
||||
#define PML_YALLA_RESET_PML_REQ(_pml_req) \
|
||||
#define PML_YALLA_RESET_PML_REQ(_pml_req, mxm_base) \
|
||||
{ \
|
||||
(_pml_req)->mxm_base[0].state = MXM_REQ_NEW; \
|
||||
mxm_base->state = MXM_REQ_NEW; \
|
||||
PML_YALLA_RESET_PML_REQ_DATA(_pml_req); \
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user