diff --git a/ompi/mca/fbtl/posix/fbtl_posix.c b/ompi/mca/fbtl/posix/fbtl_posix.c index 29dbdb4197..65f01c20d6 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix.c +++ b/ompi/mca/fbtl/posix/fbtl_posix.c @@ -203,6 +203,7 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req) /* all pending operations are finished for this request */ req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; req->req_ompi.req_status._ucount = data->aio_total_len; + mca_fbtl_posix_unlock ( &data->aio_lock, data->aio_fh ); ret = true; } #endif @@ -215,6 +216,7 @@ void mca_fbtl_posix_request_free ( mca_ompio_request_t *req) /* Free the fbtl specific data structures */ mca_fbtl_posix_request_data_t *data=(mca_fbtl_posix_request_data_t *)req->req_data; if (NULL != data ) { + mca_fbtl_posix_unlock ( &data->aio_lock, data->aio_fh ); if ( NULL != data->aio_reqs ) { free ( data->aio_reqs); } diff --git a/ompi/mca/fbtl/posix/fbtl_posix_lock.c b/ompi/mca/fbtl/posix/fbtl_posix_lock.c index 30551b4424..31897d5c91 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_lock.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_lock.c @@ -43,6 +43,9 @@ int mca_fbtl_posix_lock ( struct flock *lock, mca_io_ompio_file_t *fh, int op, lock->l_whence = SEEK_SET; lock->l_start =-1; lock->l_len =-1; + if ( 0 == len ) { + return 0; + } if ( fh->f_atomicity || fh->f_flags & OMPIO_LOCK_ALWAYS ) { /* Need to lock the entire region */ @@ -66,7 +69,7 @@ int mca_fbtl_posix_lock ( struct flock *lock, mca_io_ompio_file_t *fh, int op, */ return 0; } - if ( OMPIO_LOCK_ENTIRE_REGION ) { + if ( flags == OMPIO_LOCK_ENTIRE_REGION ) { lock->l_start = (off_t) offset; lock->l_len = len; } @@ -79,13 +82,13 @@ int mca_fbtl_posix_lock ( struct flock *lock, mca_io_ompio_file_t *fh, int op, the two into a single lock. */ bmod = offset % fh->f_fs_block_size; - if ( !bmod ) { + if ( bmod ) { lock->l_start = (off_t) offset; - lock->l_len = fh->f_fs_block_size - bmod; + lock->l_len = bmod; } - lmod = (offset+len-1)%fh->f_fs_block_size; - if ( !lmod ) { - if ( bmod ) { + lmod = (offset+len)%fh->f_fs_block_size; + if ( lmod ) { + if ( !bmod ) { lock->l_start = (offset+len-lmod ); lock->l_len = lmod; } @@ -100,15 +103,28 @@ int mca_fbtl_posix_lock ( struct flock *lock, mca_io_ompio_file_t *fh, int op, } } + +#ifdef OMPIO_DEBUG + printf("%d: acquiring lock for offset %ld length %ld requested offset %ld request len %ld \n", + fh->f_rank, lock->l_start, lock->l_len, offset, len); +#endif return (fcntl ( fh->fd, F_SETLKW, lock)); } int mca_fbtl_posix_unlock ( struct flock *lock, mca_io_ompio_file_t *fh ) { + int ret; if ( -1 == lock->l_start && -1 == lock->l_len ) { return 0; } lock->l_type = F_UNLCK; - return (fcntl ( fh->fd, F_SETLK, lock)); +#ifdef OMPIO_DEBUG + printf("%d: releasing lock for offset %ld length %ld\n", fh->f_rank, lock->l_start, lock->l_len); +#endif + ret = fcntl ( fh->fd, F_SETLK, lock); + lock->l_start = -1; + lock->l_len = -1; + + return ret; } diff --git a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c index b6360b3d97..3ebc69cfa3 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c @@ -37,7 +37,7 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) OMPI_MPI_OFFSET_TYPE iov_offset = 0; ssize_t bytes_read=0, ret_code=0; struct flock lock; - off_t total_length; + off_t total_length, end_offset=0; if (NULL == fh->f_io_array) { return OMPI_ERROR; @@ -55,6 +55,7 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) iov[iov_count].iov_base = fh->f_io_array[i].memory_address; iov[iov_count].iov_len = fh->f_io_array[i].length; iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; + end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length; iov_count ++; } @@ -77,12 +78,13 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) iov[iov_count].iov_base = fh->f_io_array[i+1].memory_address; iov[iov_count].iov_len = fh->f_io_array[i+1].length; + end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length; iov_count ++; continue; } } - total_length = ((off_t)iov[iov_count-1].iov_base + iov[iov_count-1].iov_len - (off_t)iov_offset ); + total_length = (end_offset - (off_t)iov_offset ); mca_fbtl_posix_lock ( &lock, fh, F_RDLCK, iov_offset, total_length, OMPIO_LOCK_SELECTIVE ); #if defined(HAVE_PREADV) ret_code = preadv (fh->fd, iov, iov_count, iov_offset); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c index 61d8008355..1c09827bde 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c @@ -39,7 +39,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) OMPI_MPI_OFFSET_TYPE iov_offset = 0; ssize_t ret_code=0, bytes_written=0; struct flock lock; - off_t total_length; + off_t total_length, end_offset=0; if (NULL == fh->f_io_array) { return OMPI_ERROR; @@ -57,6 +57,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) iov[iov_count].iov_base = fh->f_io_array[i].memory_address; iov[iov_count].iov_len = fh->f_io_array[i].length; iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; + end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length; iov_count ++; } @@ -78,6 +79,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) (iov_count < IOV_MAX )) { iov[iov_count].iov_base = fh->f_io_array[i+1].memory_address; iov[iov_count].iov_len = fh->f_io_array[i+1].length; + end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length; iov_count ++; continue; } @@ -95,7 +97,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) */ - total_length = ((off_t)iov[iov_count-1].iov_base + iov[iov_count-1].iov_len - (off_t)iov_offset); + total_length = (end_offset - (off_t)iov_offset); mca_fbtl_posix_lock ( &lock, fh, F_WRLCK, iov_offset, total_length, OMPIO_LOCK_SELECTIVE ); #if defined (HAVE_PWRITEV) ret_code = pwritev (fh->fd, iov, iov_count, iov_offset); diff --git a/ompi/mca/fs/ufs/fs_ufs_file_open.c b/ompi/mca/fs/ufs/fs_ufs_file_open.c index 8f0ea650c9..b579720d6a 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_open.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_open.c @@ -96,6 +96,10 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm, fh->f_stripe_size=0; fh->f_stripe_count=1; + /* Need to find a way to determine the file system block size at run time. + 4096 is the most common value, but it might not always be accurate. + */ + fh->f_fs_block_size = 4096; return OMPI_SUCCESS; }