1
1

Fix issues reported in ticket #3877. Also added additional comments.

This commit was SVN r29641.
Этот коммит содержится в:
Rolf vandeVaart 2013-11-07 20:44:47 +00:00
родитель 2cf7c40ee5
Коммит a6df7bc33a
3 изменённых файлов: 19 добавлений и 9 удалений

Просмотреть файл

@ -1614,7 +1614,13 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
}
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
/* Check to see if the memory was freed between the time it was stored in
* the registration cache and now. Return true if the memory was previously
* freed. This is indicated by the BUFFER_ID value in the registration cache
* not matching the BUFFER_ID of the buffer we are checking. Return false
* if the registration is still good.
*/
bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
{
int res;
unsigned long long bufID;
@ -1622,17 +1628,20 @@ int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
res = cuFunc.cuPointerGetAttribute(&bufID, CU_POINTER_ATTRIBUTE_BUFFER_ID,
(CUdeviceptr)dbuf);
/* If we cannot determine the BUFFER_ID, then print a message and default
* to forcing the registration to be kicked out. */
if (res != CUDA_SUCCESS) {
opal_show_help("help-mpi-common-cuda.txt", "bufferID failed", true, res);
return 0;
opal_show_help("help-mpi-common-cuda.txt", "bufferID failed",
true, ompi_process_info.nodename, res);
return true;
}
opal_output_verbose(50, mca_common_cuda_output,
"CUDA: base=%p, bufID=%llu, reg->gpu_bufID=%llu, %s", dbuf, bufID, reg->gpu_bufID,
(reg->gpu_bufID == bufID ? "BUFFER_ID match":"BUFFER_ID do not match"));
if (bufID != reg->gpu_bufID) {
return 1;
return true;
} else {
return 0;
return false;
}
}

Просмотреть файл

@ -76,7 +76,7 @@ OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1,
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
OMPI_DECLSPEC int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
#endif /* OPAL_CUDA_SUPPORT_60 */
/**

Просмотреть файл

@ -167,9 +167,10 @@ An error occurred while trying to map in the address of a function.
CUDA-aware support is disabled.
#
[bufferID failed]
An error occurred while trying to get the BUFFER_ID of a GPU memory regiion. This
could cause incorrect results. Turn of GPU Direct RDMA support by running with
--mca btl_openib_cuda_want_gdr_support 0.
An error occurred while trying to get the BUFFER_ID of a GPU memory
region. This could cause incorrect results. Turn of GPU Direct RDMA
support by running with --mca btl_openib_cuda_want_gdr_support 0.
Hostname: %s
cuPointerGetAttribute return value: %d
Check the cuda.h file for what the return value means.
[cuPointerSetAttribute failed]