1
1

pml/ob1: Fix data corruption of MPI_BSEND

Data transferred by `MPI_BSEND` may corrupt if all of the following
conditions are met.

- The message size is less than the eager limit.
- The `btl_alloc` function in the BTL interface returns `NULL`
  for some reason.
- The MPI program overwrites the send buffer after `MPI_BSEND`
  returns.

The problem is in the way of pending a send request in ob1 PML.
The `mca_pml_ob1_send_request_start_copy` function retruns
`OMPI_ERR_OUT_OF_RESOURCE` if `mca_bml_base_alloc` function returns
`des = NULL`. In this case, the send request is added to the
`send_pending` list and `MPI_BSEND` returns immediately. Next time
the `mca_pml_ob1_send_request_start_copy` function tries sending,
the user buffer may have been overwritten by the MPI program.

Call hierarchy of `MPI_BSEND`:

```
  MPI_Bsend
    mca_pml_ob1_send
      if (MCA_PML_BASE_SEND_BUFFERED == sendmode)
        mca_pml_ob1_isend
          MCA_PML_OB1_SEND_REQUEST_START_W_SEQ
            mca_pml_ob1_send_request_start_seq
              mca_pml_ob1_send_request_start_btl
                if (size <= eager_limit)
                  if (req_send_mode == MCA_PML_BASE_SEND_BUFFERED)
                    mca_pml_ob1_send_request_start_copy
                      mca_bml_base_alloc
                        btl_alloc
              if (OMPI_ERR_OUT_OF_RESOURCE == rc)
                add_request_to_send_pending
        ompi_request_free
```

To solve this problem, we should save the data to the buffer
attached by `MPI_BUFFER_ATTACH` before leaving `MPI_BSEND`.

This problem was introduced by ob1 optimization (commits 2b57f422
and a06e491c) in v1.8 series.

Signed-off-by: KAWASHIMA Takahiro <t-kawashima@jp.fujitsu.com>
Этот коммит содержится в:
KAWASHIMA Takahiro 2018-07-10 20:24:45 +09:00
родитель 34bc77747c
Коммит 0021616984
2 изменённых файлов: 36 добавлений и 22 удалений

Просмотреть файл

@ -18,6 +18,7 @@
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -424,30 +425,32 @@ int mca_pml_ob1_send_request_start_buffered(
des->des_cbfunc = mca_pml_ob1_rndv_completion;
des->des_cbdata = sendreq;
/* buffer the remainder of the message */
rc = mca_pml_base_bsend_request_alloc((ompi_request_t*)sendreq);
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_bml_base_free(bml_btl, des);
return rc;
/* buffer the remainder of the message if it is not buffered yet */
if( OPAL_LIKELY(sendreq->req_send.req_addr == sendreq->req_send.req_base.req_addr) ) {
rc = mca_pml_base_bsend_request_alloc((ompi_request_t*)sendreq);
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_bml_base_free(bml_btl, des);
return rc;
}
iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)sendreq->req_send.req_addr) + max_data);
iov.iov_len = max_data = sendreq->req_send.req_bytes_packed - max_data;
if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor,
&iov,
&iov_count,
&max_data)) < 0) {
mca_bml_base_free(bml_btl, des);
return rc;
}
/* re-init convertor for packed data */
opal_convertor_prepare_for_send( &sendreq->req_send.req_base.req_convertor,
&(ompi_mpi_byte.dt.super),
sendreq->req_send.req_bytes_packed,
sendreq->req_send.req_addr );
}
iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)sendreq->req_send.req_addr) + max_data);
iov.iov_len = max_data = sendreq->req_send.req_bytes_packed - max_data;
if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor,
&iov,
&iov_count,
&max_data)) < 0) {
mca_bml_base_free(bml_btl, des);
return rc;
}
/* re-init convertor for packed data */
opal_convertor_prepare_for_send( &sendreq->req_send.req_base.req_convertor,
&(ompi_mpi_byte.dt.super),
sendreq->req_send.req_bytes_packed,
sendreq->req_send.req_addr );
/* wait for ack and completion */
sendreq->req_state = 2;

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2011-2012 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -468,6 +469,16 @@ mca_pml_ob1_send_request_start_seq (mca_pml_ob1_send_request_t* sendreq, mca_bml
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) )
return rc;
}
if(MCA_PML_BASE_SEND_BUFFERED == sendreq->req_send.req_send_mode &&
sendreq->req_send.req_addr == sendreq->req_send.req_base.req_addr) {
/* in the buffered mode, the send buffer must be saved to
* the attached buffer before returning it to the user */
int rc;
rc = mca_pml_base_bsend_request_start((ompi_request_t*)sendreq);
if(OMPI_SUCCESS != rc){
return rc;
}
}
add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, true);
return OMPI_SUCCESS;