1
1

Add a few support functions for future features.

This commit was SVN r29353.
This commit is contained in:
Rolf vandeVaart 2013-10-03 21:06:17 +00:00
parent 61c4baefe0
commit 4dd1c86b36
3 changed files with 94 additions and 1 deletions

View File

@ -1529,3 +1529,66 @@ int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2)
}
return 0;
}
int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
{
CUresult result;
result = cuFunc.cuMemGetAddressRange((CUdeviceptr *)pbase, psize, (CUdeviceptr)base);
if (CUDA_SUCCESS != result) {
opal_show_help("help-mpi-common-cuda.txt", "cuMemGetAddressRange failed",
true, result, base);
return OMPI_ERROR;
} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: cuMemGetAddressRange passed: addr=%p, pbase=%p, psize=%lu ",
base, *(char **)pbase, *psize);
}
return 0;
}
#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
{
int res;
unsigned long long bufID;
unsigned char *dbuf = reg->base;
res = cuFunc.cuPointerGetAttribute(&bufID, CU_POINTER_ATTRIBUTE_BUFFER_ID,
(CUdeviceptr)dbuf);
if (res != CUDA_SUCCESS) {
opal_show_help("help-mpi-common-cuda.txt", "bufferID failed", true, res);
return 0;
}
opal_output_verbose(50, mca_common_cuda_output,
"CUDA: base=%p, bufID=%llu, reg->gpu_bufID=%llu, %s", dbuf, bufID, reg->gpu_bufID,
(reg->gpu_bufID == bufID ? "BUFFER_ID match":"BUFFER_ID do not match"));
if (bufID != reg->gpu_bufID) {
return 1;
} else {
return 0;
}
}
/*
* Get the buffer ID from the memory and store it in the registration.
* This is needed to ensure the cached registration is not stale. If
* we fail to get buffer ID, print an error and set buffer ID to 0.
*/
void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
{
int res;
unsigned long long bufID = 0;
unsigned char *dbuf = reg->base;
res = cuFunc.cuPointerGetAttribute(&bufID, CU_POINTER_ATTRIBUTE_BUFFER_ID,
(CUdeviceptr)dbuf);
if (res != CUDA_SUCCESS) {
opal_show_help("help-mpi-common-cuda.txt", "bufferID failed", true, res);
}
reg->gpu_bufID = bufID;
}
#endif /* OMPI_CUDA_SUPPORT_60 */

View File

@ -20,6 +20,7 @@
#ifndef OMPI_MCA_COMMON_CUDA_H
#define OMPI_MCA_COMMON_CUDA_H
#include "ompi/mca/btl/btl.h"
#include "opal/datatype/opal_convertor.h"
#define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8
@ -31,6 +32,7 @@ struct mca_mpool_common_cuda_reg_t {
};
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
extern bool mca_common_cuda_enabled;
#define OMPI_GDR_SUPPORT 0
OMPI_DECLSPEC int mca_common_cuda_register_mca_variables(void);
@ -72,6 +74,27 @@ OMPI_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registratio
OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
OMPI_DECLSPEC int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
#endif /* OMPI_CUDA_SUPPORT_60 */
/**
* Return: 0 if no packing is required for sending (the upper layer
* can use directly the pointer to the contiguous user
* buffer).
* 1 if data does need to be packed, i.e. heterogeneous peers
* (source arch != dest arch) or non contiguous memory
* layout.
*/
static inline int32_t opal_convertor_cuda_need_buffers( opal_convertor_t* pConvertor )
{
int32_t retval;
uint32_t cudaflag = pConvertor->flags & CONVERTOR_CUDA; /* Save CUDA flag */
pConvertor->flags &= ~CONVERTOR_CUDA; /* Clear CUDA flag if it exists */
retval = opal_convertor_need_buffers(pConvertor);
pConvertor->flags |= cudaflag; /* Restore CUDA flag */
return retval;
}
#endif /* OMPI_MCA_COMMON_CUDA_H */

View File

@ -165,3 +165,10 @@ An error occurred while trying to map in the address of a function.
Function Name: %s
Error string: %s
CUDA-aware support is disabled.
#
[bufferID failed]
An error occurred while trying to get the BUFFER_ID of a GPU memory regiion. This
could cause incorrect results. Turn of GPU Direct RDMA support by running with
--mca btl_openib_cuda_want_gdr_support 0.
cuPointerGetAttribute return value: %d
Check the cuda.h file for what the return value means.