diff --git a/opal/mca/btl/vader/btl_vader_get.c b/opal/mca/btl/vader/btl_vader_get.c index f77a1df821..add6889aa1 100644 --- a/opal/mca/btl/vader/btl_vader_get.c +++ b/opal/mca/btl/vader/btl_vader_get.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,6 +25,7 @@ #include "opal/sys/cma.h" #endif /* OPAL_CMA_NEED_SYSCALL_DEFS */ + #endif /** @@ -71,11 +74,34 @@ int mca_btl_vader_get_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t * struct iovec dst_iov = {.iov_base = local_address, .iov_len = size}; ssize_t ret; - ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0); - if (ret != (ssize_t)size) { - opal_output(0, "Read %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno); - return OPAL_ERROR; - } + /* + * According to the man page : + * "On success, process_vm_readv() returns the number of bytes read and + * process_vm_writev() returns the number of bytes written. This return + * value may be less than the total number of requested bytes, if a + * partial read/write occurred. (Partial transfers apply at the + * granularity of iovec elements. These system calls won't perform a + * partial transfer that splits a single iovec element.)". + * So since we use a single iovec element, the returned size should either + * be 0 or size, and the do loop should not be needed here. + * We tried on various Linux kernels with size > 2 GB, and surprisingly, + * the returned value is always 0x7ffff000 (fwiw, it happens to be the size + * of the larger number of pages that fits a signed 32 bits integer). + * We do not know whether this is a bug from the kernel, the libc or even + * the man page, but for the time being, we do as is process_vm_readv() could + * return any value. + */ + do { + ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0); + if (0 > ret) { + opal_output(0, "Read %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno); + return OPAL_ERROR; + } + src_iov.iov_base = (void *)((char *)src_iov.iov_base + ret); + src_iov.iov_len -= ret; + dst_iov.iov_base = (void *)((char *)dst_iov.iov_base + ret); + dst_iov.iov_len -= ret; + } while (0 < src_iov.iov_len); /* always call the callback function */ cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); diff --git a/opal/mca/btl/vader/btl_vader_put.c b/opal/mca/btl/vader/btl_vader_put.c index c3d2112412..ec2690d312 100644 --- a/opal/mca/btl/vader/btl_vader_put.c +++ b/opal/mca/btl/vader/btl_vader_put.c @@ -2,8 +2,8 @@ /* * Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,11 +69,18 @@ int mca_btl_vader_put_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t * struct iovec dst_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size}; ssize_t ret; - ret = process_vm_writev (endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, &dst_iov, 1, 0); - if (ret != (ssize_t)size) { - opal_output(0, "Wrote %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno); - return OPAL_ERROR; - } + /* This should not be needed, see the rationale in mca_btl_vader_get_cma() */ + do { + ret = process_vm_writev (endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, &dst_iov, 1, 0); + if (0 > ret) { + opal_output(0, "Wrote %ld, expected %lu, errno = %d\n", (long)ret, (unsigned long)size, errno); + return OPAL_ERROR; + } + src_iov.iov_base = (void *)((char *)src_iov.iov_base + ret); + src_iov.iov_len -= ret; + dst_iov.iov_base = (void *)((char *)dst_iov.iov_base + ret); + dst_iov.iov_len -= ret; + } while (0 < src_iov.iov_len); /* always call the callback function */ cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);