1
1
openmpi/ompi/mca/common/cuda/common_cuda.h
Rolf vandeVaart b955dbd6d9 Fix various items discovered by review of ticket #3951.
This commit was SVN r29900.
2013-12-13 21:25:07 +00:00

100 строки
4.7 KiB
C

/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MCA_COMMON_CUDA_H
#define OMPI_MCA_COMMON_CUDA_H
#include "ompi/mca/btl/btl.h"
#include "opal/datatype/opal_convertor.h"
#define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8
struct mca_mpool_common_cuda_reg_t {
mca_mpool_base_registration_t base;
uint64_t memHandle[MEMHANDLE_SIZE];
uint64_t evtHandle[EVTHANDLE_SIZE];
uint64_t event;
};
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
extern bool mca_common_cuda_enabled;
OMPI_DECLSPEC int mca_common_cuda_register_mca_variables(void);
OMPI_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg);
OMPI_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
OMPI_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg);
OMPI_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
struct mca_btl_base_descriptor_t *, int *done);
OMPI_DECLSPEC int mca_common_cuda_record_ipc_event(char *msg,
struct mca_btl_base_descriptor_t *frag);
OMPI_DECLSPEC int mca_common_cuda_record_dtoh_event(char *msg,
struct mca_btl_base_descriptor_t *frag);
OMPI_DECLSPEC int mca_common_cuda_record_htod_event(char *msg,
struct mca_btl_base_descriptor_t *frag);
OMPI_DECLSPEC void *mca_common_cuda_get_dtoh_stream(void);
OMPI_DECLSPEC void *mca_common_cuda_get_htod_stream(void);
OMPI_DECLSPEC int progress_one_cuda_ipc_event(struct mca_btl_base_descriptor_t **);
OMPI_DECLSPEC int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **);
OMPI_DECLSPEC int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **);
OMPI_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg,
mca_mpool_common_cuda_reg_t *old_reg);
OMPI_DECLSPEC void mca_common_cuda_construct_event_and_handle(uint64_t **event, void **handle);
OMPI_DECLSPEC void mca_common_cuda_destruct_event(uint64_t *event);
OMPI_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
OMPI_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
OMPI_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
#if OPAL_CUDA_GDR_SUPPORT
OMPI_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
#endif /* OPAL_CUDA_GDR_SUPPORT */
/**
* Return: 0 if no packing is required for sending (the upper layer
* can use directly the pointer to the contiguous user
* buffer).
* 1 if data does need to be packed, i.e. heterogeneous peers
* (source arch != dest arch) or non contiguous memory
* layout.
*/
static inline int32_t opal_convertor_cuda_need_buffers( opal_convertor_t* pConvertor )
{
int32_t retval;
uint32_t cudaflag = pConvertor->flags & CONVERTOR_CUDA; /* Save CUDA flag */
pConvertor->flags &= ~CONVERTOR_CUDA; /* Clear CUDA flag if it exists */
retval = opal_convertor_need_buffers(pConvertor);
pConvertor->flags |= cudaflag; /* Restore CUDA flag */
return retval;
}
#endif /* OMPI_MCA_COMMON_CUDA_H */