io/ompio: fix a bug in handling large write/read operations
This is a bug fix based on a problem reported on the mailing list. For very large read/write operations, ompio breaks the operation down into multiple cycles. The problem was that one of the variables required to maintain its values across the different cycles did not do that, and because of that the calculations of the memory offsets was wrong. Fixes issue #4453 Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
Этот коммит содержится в:
родитель
ec6b2e1cf1
Коммит
c9bb049d00
@ -49,7 +49,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (mca_io_ompio_file_t *fp,
|
|||||||
|
|
||||||
OMPI_DECLSPEC int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles,
|
OMPI_DECLSPEC int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles,
|
||||||
size_t bytes_per_cycle, int max_data, uint32_t iov_count,
|
size_t bytes_per_cycle, int max_data, uint32_t iov_count,
|
||||||
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw );
|
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw,
|
||||||
|
size_t *spc );
|
||||||
|
|
||||||
|
|
||||||
OMPI_DECLSPEC int mca_common_ompio_file_read (mca_io_ompio_file_t *fh, void *buf, int count,
|
OMPI_DECLSPEC int mca_common_ompio_file_read (mca_io_ompio_file_t *fh, void *buf, int count,
|
||||||
|
@ -67,6 +67,7 @@ int mca_common_ompio_file_read (mca_io_ompio_file_t *fh,
|
|||||||
struct iovec *decoded_iov = NULL;
|
struct iovec *decoded_iov = NULL;
|
||||||
|
|
||||||
size_t max_data=0, real_bytes_read=0;
|
size_t max_data=0, real_bytes_read=0;
|
||||||
|
size_t spc=0;
|
||||||
ssize_t ret_code=0;
|
ssize_t ret_code=0;
|
||||||
int i = 0; /* index into the decoded iovec of the buffer */
|
int i = 0; /* index into the decoded iovec of the buffer */
|
||||||
int j = 0; /* index into the file vie iovec */
|
int j = 0; /* index into the file vie iovec */
|
||||||
@ -117,7 +118,8 @@ int mca_common_ompio_file_read (mca_io_ompio_file_t *fh,
|
|||||||
decoded_iov,
|
decoded_iov,
|
||||||
&i,
|
&i,
|
||||||
&j,
|
&j,
|
||||||
&total_bytes_read);
|
&total_bytes_read,
|
||||||
|
&spc);
|
||||||
|
|
||||||
if (fh->f_num_of_io_entries) {
|
if (fh->f_num_of_io_entries) {
|
||||||
ret_code = fh->f_fbtl->fbtl_preadv (fh);
|
ret_code = fh->f_fbtl->fbtl_preadv (fh);
|
||||||
@ -181,6 +183,7 @@ int mca_common_ompio_file_iread (mca_io_ompio_file_t *fh,
|
|||||||
{
|
{
|
||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
mca_ompio_request_t *ompio_req=NULL;
|
mca_ompio_request_t *ompio_req=NULL;
|
||||||
|
size_t spc=0;
|
||||||
|
|
||||||
ompio_req = OBJ_NEW(mca_ompio_request_t);
|
ompio_req = OBJ_NEW(mca_ompio_request_t);
|
||||||
ompio_req->req_type = MCA_OMPIO_REQUEST_READ;
|
ompio_req->req_type = MCA_OMPIO_REQUEST_READ;
|
||||||
@ -226,7 +229,8 @@ int mca_common_ompio_file_iread (mca_io_ompio_file_t *fh,
|
|||||||
decoded_iov,
|
decoded_iov,
|
||||||
&i,
|
&i,
|
||||||
&j,
|
&j,
|
||||||
&total_bytes_read);
|
&total_bytes_read,
|
||||||
|
&spc);
|
||||||
|
|
||||||
if (fh->f_num_of_io_entries) {
|
if (fh->f_num_of_io_entries) {
|
||||||
fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req);
|
fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req);
|
||||||
|
@ -51,6 +51,7 @@ int mca_common_ompio_file_write (mca_io_ompio_file_t *fh,
|
|||||||
size_t total_bytes_written = 0;
|
size_t total_bytes_written = 0;
|
||||||
size_t max_data=0, real_bytes_written=0;
|
size_t max_data=0, real_bytes_written=0;
|
||||||
ssize_t ret_code=0;
|
ssize_t ret_code=0;
|
||||||
|
size_t spc=0;
|
||||||
int i = 0; /* index into the decoded iovec of the buffer */
|
int i = 0; /* index into the decoded iovec of the buffer */
|
||||||
int j = 0; /* index into the file view iovec */
|
int j = 0; /* index into the file view iovec */
|
||||||
|
|
||||||
@ -92,7 +93,8 @@ int mca_common_ompio_file_write (mca_io_ompio_file_t *fh,
|
|||||||
decoded_iov,
|
decoded_iov,
|
||||||
&i,
|
&i,
|
||||||
&j,
|
&j,
|
||||||
&total_bytes_written);
|
&total_bytes_written,
|
||||||
|
&spc);
|
||||||
|
|
||||||
if (fh->f_num_of_io_entries) {
|
if (fh->f_num_of_io_entries) {
|
||||||
ret_code =fh->f_fbtl->fbtl_pwritev (fh);
|
ret_code =fh->f_fbtl->fbtl_pwritev (fh);
|
||||||
@ -152,6 +154,7 @@ int mca_common_ompio_file_iwrite (mca_io_ompio_file_t *fh,
|
|||||||
{
|
{
|
||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
mca_ompio_request_t *ompio_req=NULL;
|
mca_ompio_request_t *ompio_req=NULL;
|
||||||
|
size_t spc=0;
|
||||||
|
|
||||||
ompio_req = OBJ_NEW(mca_ompio_request_t);
|
ompio_req = OBJ_NEW(mca_ompio_request_t);
|
||||||
ompio_req->req_type = MCA_OMPIO_REQUEST_WRITE;
|
ompio_req->req_type = MCA_OMPIO_REQUEST_WRITE;
|
||||||
@ -195,7 +198,8 @@ int mca_common_ompio_file_iwrite (mca_io_ompio_file_t *fh,
|
|||||||
decoded_iov,
|
decoded_iov,
|
||||||
&i,
|
&i,
|
||||||
&j,
|
&j,
|
||||||
&total_bytes_written);
|
&total_bytes_written,
|
||||||
|
&spc);
|
||||||
|
|
||||||
if (fh->f_num_of_io_entries) {
|
if (fh->f_num_of_io_entries) {
|
||||||
fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req);
|
fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req);
|
||||||
@ -327,13 +331,16 @@ int mca_common_ompio_file_iwrite_at_all (mca_io_ompio_file_t *fp,
|
|||||||
|
|
||||||
int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles,
|
int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles,
|
||||||
size_t bytes_per_cycle, int max_data, uint32_t iov_count,
|
size_t bytes_per_cycle, int max_data, uint32_t iov_count,
|
||||||
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw )
|
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw,
|
||||||
|
size_t *spc)
|
||||||
{
|
{
|
||||||
ptrdiff_t disp;
|
ptrdiff_t disp;
|
||||||
int block = 1;
|
int block = 1;
|
||||||
size_t total_bytes_written = *tbw; /* total bytes that have been written*/
|
size_t total_bytes_written = *tbw; /* total bytes that have been written*/
|
||||||
size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/
|
size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/
|
||||||
size_t sum_previous_counts = 0;
|
size_t sum_previous_counts = *spc; /* total bytes used, up to the start
|
||||||
|
of the memory block decoded_iov[*ii];
|
||||||
|
is always less or equal to tbw */
|
||||||
size_t sum_previous_length = 0;
|
size_t sum_previous_length = 0;
|
||||||
int k = 0; /* index into the io_array */
|
int k = 0; /* index into the io_array */
|
||||||
int i = *ii;
|
int i = *ii;
|
||||||
@ -432,16 +439,18 @@ int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cy
|
|||||||
printf("*************************** %d\n", fh->f_num_of_io_entries);
|
printf("*************************** %d\n", fh->f_num_of_io_entries);
|
||||||
|
|
||||||
for (d=0 ; d<fh->f_num_of_io_entries ; d++) {
|
for (d=0 ; d<fh->f_num_of_io_entries ; d++) {
|
||||||
printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n",
|
printf(" ADDRESS: %p OFFSET: %p LENGTH: %d prev_count=%ld prev_length=%ld\n",
|
||||||
fh->f_io_array[d].memory_address,
|
fh->f_io_array[d].memory_address,
|
||||||
fh->f_io_array[d].offset,
|
fh->f_io_array[d].offset,
|
||||||
fh->f_io_array[d].length);
|
fh->f_io_array[d].length,
|
||||||
|
sum_previous_counts, sum_previous_length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
*ii = i;
|
*ii = i;
|
||||||
*jj = j;
|
*jj = j;
|
||||||
*tbw = total_bytes_written;
|
*tbw = total_bytes_written;
|
||||||
|
*spc = sum_previous_counts;
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user