b955dbd6d9
This commit was SVN r29900.
100 строки
4.7 KiB
C
100 строки
4.7 KiB
C
/*
|
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef OMPI_MCA_COMMON_CUDA_H
|
|
#define OMPI_MCA_COMMON_CUDA_H
|
|
#include "ompi/mca/btl/btl.h"
|
|
#include "opal/datatype/opal_convertor.h"
|
|
|
|
#define MEMHANDLE_SIZE 8
|
|
#define EVTHANDLE_SIZE 8
|
|
struct mca_mpool_common_cuda_reg_t {
|
|
mca_mpool_base_registration_t base;
|
|
uint64_t memHandle[MEMHANDLE_SIZE];
|
|
uint64_t evtHandle[EVTHANDLE_SIZE];
|
|
uint64_t event;
|
|
};
|
|
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
|
|
extern bool mca_common_cuda_enabled;
|
|
|
|
OMPI_DECLSPEC int mca_common_cuda_register_mca_variables(void);
|
|
|
|
OMPI_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg);
|
|
|
|
OMPI_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
|
|
|
|
OMPI_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg);
|
|
|
|
OMPI_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
|
struct mca_btl_base_descriptor_t *, int *done);
|
|
|
|
OMPI_DECLSPEC int mca_common_cuda_record_ipc_event(char *msg,
|
|
struct mca_btl_base_descriptor_t *frag);
|
|
OMPI_DECLSPEC int mca_common_cuda_record_dtoh_event(char *msg,
|
|
struct mca_btl_base_descriptor_t *frag);
|
|
OMPI_DECLSPEC int mca_common_cuda_record_htod_event(char *msg,
|
|
struct mca_btl_base_descriptor_t *frag);
|
|
|
|
OMPI_DECLSPEC void *mca_common_cuda_get_dtoh_stream(void);
|
|
OMPI_DECLSPEC void *mca_common_cuda_get_htod_stream(void);
|
|
|
|
OMPI_DECLSPEC int progress_one_cuda_ipc_event(struct mca_btl_base_descriptor_t **);
|
|
OMPI_DECLSPEC int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **);
|
|
OMPI_DECLSPEC int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **);
|
|
|
|
OMPI_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg,
|
|
mca_mpool_common_cuda_reg_t *old_reg);
|
|
|
|
OMPI_DECLSPEC void mca_common_cuda_construct_event_and_handle(uint64_t **event, void **handle);
|
|
OMPI_DECLSPEC void mca_common_cuda_destruct_event(uint64_t *event);
|
|
|
|
OMPI_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_mpool_base_registration_t *newreg,
|
|
mca_mpool_base_registration_t *hdrreg);
|
|
OMPI_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg);
|
|
OMPI_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg,
|
|
mca_mpool_base_registration_t *hdrreg);
|
|
OMPI_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg);
|
|
OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
|
|
OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
|
|
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
|
|
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
|
|
#if OPAL_CUDA_GDR_SUPPORT
|
|
OMPI_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
|
|
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
|
|
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
|
/**
|
|
* Return: 0 if no packing is required for sending (the upper layer
|
|
* can use directly the pointer to the contiguous user
|
|
* buffer).
|
|
* 1 if data does need to be packed, i.e. heterogeneous peers
|
|
* (source arch != dest arch) or non contiguous memory
|
|
* layout.
|
|
*/
|
|
static inline int32_t opal_convertor_cuda_need_buffers( opal_convertor_t* pConvertor )
|
|
{
|
|
int32_t retval;
|
|
uint32_t cudaflag = pConvertor->flags & CONVERTOR_CUDA; /* Save CUDA flag */
|
|
pConvertor->flags &= ~CONVERTOR_CUDA; /* Clear CUDA flag if it exists */
|
|
retval = opal_convertor_need_buffers(pConvertor);
|
|
pConvertor->flags |= cudaflag; /* Restore CUDA flag */
|
|
return retval;
|
|
}
|
|
|
|
#endif /* OMPI_MCA_COMMON_CUDA_H */
|