1
1

Fix yalla PML: MPI_Recv does not return MPI_ERR_TRUNCATE upon overflow

Signed-off-by: Nadia Derbey <Nadia.Derbey@atos.net>
Этот коммит содержится в:
Nadia Derbey 2017-03-30 14:09:18 +02:00
родитель 7e57075f0d
Коммит b6de94e449
3 изменённых файлов: 26 добавлений и 16 удалений

Просмотреть файл

@ -369,6 +369,7 @@ int mca_pml_yalla_recv(void *buf, size_t count, ompi_datatype_t *datatype, int s
{
mxm_recv_req_t rreq;
mxm_error_t error;
int rc;
PML_YALLA_INIT_MXM_RECV_REQ(&rreq, buf, count, datatype, src, tag, comm, recv);
PML_YALLA_INIT_BLOCKING_MXM_RECV_REQ(&rreq);
@ -387,10 +388,10 @@ int mca_pml_yalla_recv(void *buf, size_t count, ompi_datatype_t *datatype, int s
rreq.completion.sender_imm, rreq.completion.sender_tag,
rreq.tag, rreq.tag_mask,
rreq.completion.actual_len);
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status);
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status, rc);
PML_YALLA_FREE_BLOCKING_MXM_REQ(&rreq.base);
return OMPI_SUCCESS;
return rc;
}
int mca_pml_yalla_isend_init(const void *buf, size_t count, ompi_datatype_t *datatype,
@ -531,6 +532,7 @@ int mca_pml_yalla_iprobe(int src, int tag, struct ompi_communicator_t* comm,
{
mxm_recv_req_t rreq;
mxm_error_t error;
int rc;
PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm);
@ -538,7 +540,7 @@ int mca_pml_yalla_iprobe(int src, int tag, struct ompi_communicator_t* comm,
switch (error) {
case MXM_OK:
*matched = 1;
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status);
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc);
return OMPI_SUCCESS;
case MXM_ERR_NO_MESSAGE:
*matched = 0;
@ -555,13 +557,14 @@ int mca_pml_yalla_probe(int src, int tag, struct ompi_communicator_t* comm,
{
mxm_recv_req_t rreq;
mxm_error_t error;
int rc;
PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm);
for (;;) {
error = mxm_req_probe(&rreq);
switch (error) {
case MXM_OK:
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status);
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc);
return OMPI_SUCCESS;
case MXM_ERR_NO_MESSAGE:
break;
@ -580,6 +583,7 @@ int mca_pml_yalla_improbe(int src, int tag, struct ompi_communicator_t* comm,
mxm_recv_req_t rreq;
mxm_message_h mxm_msg;
mxm_error_t error;
int rc;
PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm);
@ -587,7 +591,7 @@ int mca_pml_yalla_improbe(int src, int tag, struct ompi_communicator_t* comm,
switch (error) {
case MXM_OK:
*matched = 1;
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status);
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc);
PML_YALLA_SET_MESSAGE(&rreq, comm, mxm_msg, message);
return OMPI_SUCCESS;
case MXM_ERR_NO_MESSAGE:
@ -607,13 +611,14 @@ int mca_pml_yalla_mprobe(int src, int tag, struct ompi_communicator_t* comm,
mxm_recv_req_t rreq;
mxm_message_h mxm_msg;
mxm_error_t error;
int rc;
PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm);
for (;;) {
error = mxm_req_mprobe(&rreq, &mxm_msg);
switch (error) {
case MXM_OK:
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status);
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc);
PML_YALLA_SET_MESSAGE(&rreq, comm, mxm_msg, message);
return OMPI_SUCCESS;
case MXM_ERR_NO_MESSAGE:
@ -658,6 +663,7 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype,
{
mxm_recv_req_t rreq;
mxm_error_t error;
int rc;
PML_YALLA_INIT_MXM_RECV_REQ(&rreq, buf, count, datatype, -1, 0, (*message)->comm, recv);
PML_YALLA_INIT_BLOCKING_MXM_RECV_REQ(&rreq);
@ -678,8 +684,8 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype,
rreq.completion.sender_imm, rreq.completion.sender_tag,
rreq.tag, rreq.tag_mask,
rreq.completion.actual_len);
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status);
return OMPI_SUCCESS;
PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status, rc);
return rc;
}
int mca_pml_yalla_start(size_t count, ompi_request_t** requests)

Просмотреть файл

@ -195,9 +195,10 @@ static void mca_pml_yalla_bsend_completion_cb(void *context)
static void mca_pml_yalla_recv_completion_cb(void *context)
{
mca_pml_yalla_recv_request_t* rreq = context;
int rc;
PML_YALLA_SET_RECV_STATUS(&rreq->mxm, rreq->mxm.completion.actual_len,
&rreq->super.ompi.req_status);
&rreq->super.ompi.req_status, rc);
PML_YALLA_VERBOSE(8, "receive request %p completed with status %s source %d rtag %d(%d/0x%x) len %zu",
(void *)rreq, mxm_error_string(rreq->mxm.base.error),

Просмотреть файл

@ -175,25 +175,28 @@ static inline mca_pml_yalla_send_request_t* MCA_PML_YALLA_SREQ_INIT(void *_buf,
} \
}
#define PML_YALLA_SET_RECV_STATUS(_rreq, _length, _mpi_status) \
#define PML_YALLA_SET_RECV_STATUS(_rreq, _length, _mpi_status, rc) \
{ \
if ((_mpi_status) != MPI_STATUS_IGNORE) { \
switch ((_rreq)->base.error) { \
case MXM_OK: \
(_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \
(rc) = OMPI_SUCCESS; \
break; \
case MXM_ERR_CANCELED: \
(_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \
(_mpi_status)->_cancelled = true; \
(rc) = OMPI_SUCCESS; \
break; \
case MXM_ERR_MESSAGE_TRUNCATED: \
(_mpi_status)->MPI_ERROR = MPI_ERR_TRUNCATE; \
(rc) = MPI_ERR_TRUNCATE; \
break; \
default: \
(_mpi_status)->MPI_ERROR = MPI_ERR_INTERN; \
(rc) = MPI_ERR_INTERN; \
break; \
} \
\
if ((_mpi_status) != MPI_STATUS_IGNORE) { \
(_mpi_status)->MPI_ERROR = (rc); \
if (MXM_ERR_CANCELED == (_rreq)->base.error) { \
(_mpi_status)->_cancelled = true; \
} \
(_mpi_status)->MPI_TAG = (_rreq)->completion.sender_tag; \
(_mpi_status)->MPI_SOURCE = (_rreq)->completion.sender_imm; \
(_mpi_status)->_ucount = (_length); \