1
1
openmpi/opal/mca/common/cuda/common_cuda.h

112 строки
5.1 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_MCA_COMMON_CUDA_H
#define OPAL_MCA_COMMON_CUDA_H
#include "opal/mca/btl/btl.h"
#include "opal/datatype/opal_convertor.h"
#define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8
struct mca_mpool_common_cuda_reg_data_t {
uint64_t memHandle[MEMHANDLE_SIZE];
uint64_t evtHandle[EVTHANDLE_SIZE];
uint64_t event;
opal_ptr_t memh_seg_addr;
size_t memh_seg_len;
};
typedef struct mca_mpool_common_cuda_reg_data_t mca_mpool_common_cuda_reg_data_t;
struct mca_mpool_common_cuda_reg_t {
mca_mpool_base_registration_t base;
mca_mpool_common_cuda_reg_data_t data;
};
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
extern bool mca_common_cuda_enabled;
OPAL_DECLSPEC void mca_common_cuda_register_mca_variables(void);
OPAL_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg);
OPAL_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg);
OPAL_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
struct mca_btl_base_descriptor_t *, int *done);
OPAL_DECLSPEC int mca_common_cuda_record_ipc_event(char *msg,
struct mca_btl_base_descriptor_t *frag);
OPAL_DECLSPEC int mca_common_cuda_record_dtoh_event(char *msg,
struct mca_btl_base_descriptor_t *frag);
OPAL_DECLSPEC int mca_common_cuda_record_htod_event(char *msg,
struct mca_btl_base_descriptor_t *frag);
OPAL_DECLSPEC void *mca_common_cuda_get_dtoh_stream(void);
OPAL_DECLSPEC void *mca_common_cuda_get_htod_stream(void);
OPAL_DECLSPEC int progress_one_cuda_ipc_event(struct mca_btl_base_descriptor_t **);
OPAL_DECLSPEC int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **);
OPAL_DECLSPEC int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **);
OPAL_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg,
mca_mpool_common_cuda_reg_t *old_reg);
OPAL_DECLSPEC void mca_common_cuda_construct_event_and_handle(uintptr_t *event, void *handle);
OPAL_DECLSPEC void mca_common_cuda_destruct_event(uintptr_t event);
OPAL_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg);
OPAL_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg);
OPAL_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
OPAL_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
OPAL_DECLSPEC int mca_common_cuda_stage_one_init(void);
OPAL_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
OPAL_DECLSPEC void mca_common_cuda_fini(void);
#if OPAL_CUDA_GDR_SUPPORT
OPAL_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OPAL_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
#endif /* OPAL_CUDA_GDR_SUPPORT */
/**
* Return: 0 if no packing is required for sending (the upper layer
* can use directly the pointer to the contiguous user
* buffer).
* 1 if data does need to be packed, i.e. heterogeneous peers
* (source arch != dest arch) or non contiguous memory
* layout.
*/
static inline int32_t opal_convertor_cuda_need_buffers( opal_convertor_t* pConvertor )
{
int32_t retval;
uint32_t cudaflag = pConvertor->flags & CONVERTOR_CUDA; /* Save CUDA flag */
pConvertor->flags &= ~CONVERTOR_CUDA; /* Clear CUDA flag if it exists */
retval = opal_convertor_need_buffers(pConvertor);
pConvertor->flags |= cudaflag; /* Restore CUDA flag */
return retval;
}
#endif /* OPAL_MCA_COMMON_CUDA_H */