Remove redundant macro. This was from reviewed of earlier ticket.
Fixes trac:3878. Reviewed by jsquyres. This commit was SVN r29581. The following Trac tickets were found above: Ticket 3878 --> https://svn.open-mpi.org/trac/ompi/ticket/3878
Этот коммит содержится в:
родитель
99f9fdee01
Коммит
ee7510b025
@ -127,7 +127,7 @@ mca_btl_openib_la_SOURCES = $(component_sources)
|
||||
mca_btl_openib_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS)
|
||||
mca_btl_openib_la_LIBADD = $(btl_openib_LIBS) \
|
||||
$(top_ompi_builddir)/ompi/mca/common/verbs/libmca_common_verbs.la
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
mca_btl_openib_la_LIBADD += \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
@ -57,10 +57,10 @@
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/grdma/mpool_grdma.h"
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "opal/datatype/opal_datatype_cuda.h"
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
@ -1296,14 +1296,14 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*) ptr + reserve );
|
||||
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
/* If the convertor is copying the data asynchronously, then record an event
|
||||
* that will trigger the callback when it completes. Mark descriptor as async.*/
|
||||
if (convertor->flags & CONVERTOR_CUDA_ASYNC) {
|
||||
mca_common_cuda_record_dtoh_event("btl_openib", (mca_btl_base_descriptor_t *)frag);
|
||||
to_base_frag(frag)->base.des_flags = flags | MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
*size = max_data;
|
||||
|
||||
|
@ -311,10 +311,10 @@ struct mca_btl_openib_component_t {
|
||||
size_t memalign_threshold;
|
||||
void* (*previous_malloc_hook)(size_t __size, const void*);
|
||||
#endif
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
bool cuda_async_send;
|
||||
bool cuda_async_recv;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
|
||||
bool rroce_enable;
|
||||
#endif
|
||||
|
@ -113,12 +113,12 @@ static int btl_openib_component_open(void);
|
||||
static int btl_openib_component_close(void);
|
||||
static mca_btl_base_module_t **btl_openib_component_init(int*, bool, bool);
|
||||
static int btl_openib_component_progress(void);
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
|
||||
mca_btl_openib_endpoint_t *ep,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
int status);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
/*
|
||||
* Local variables
|
||||
*/
|
||||
@ -605,7 +605,7 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
|
||||
"openib_reg_mr: base=%p, bound=%p, size=%d, flags=0x%x", reg->base, reg->bound,
|
||||
(int) (reg->bound - reg->base + 1), reg->flags));
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) {
|
||||
mca_common_cuda_register(base, size,
|
||||
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name);
|
||||
@ -631,7 +631,7 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) {
|
||||
mca_common_cuda_unregister(openib_reg->base.base,
|
||||
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name);
|
||||
@ -3133,13 +3133,13 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
/* call registered callback */
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
/* The COPY_ASYNC flag should not be set */
|
||||
assert(0 == (des->des_flags & MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC));
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||
reg->cbfunc( &openib_btl->super, hdr->tag, des, reg->cbdata );
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
if (des->des_flags & MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC) {
|
||||
/* Since ASYNC flag is set, we know this descriptor is being used
|
||||
* for asynchronous copy and cannot be freed yet. Therefore, set
|
||||
@ -3149,7 +3149,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
des->des_cbdata = (void *)ep;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
|
||||
cqp = (hdr->credits >> 11) & 0x0f;
|
||||
hdr->credits &= 0x87ff;
|
||||
@ -3240,7 +3240,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
/**
|
||||
* Called by the PML when the copying of the data out of the fragment
|
||||
* is complete.
|
||||
@ -3316,7 +3316,7 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
|
||||
send_credits(ep, cqp);
|
||||
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
static char* btl_openib_component_status_to_string(enum ibv_wc_status status)
|
||||
{
|
||||
@ -3800,7 +3800,7 @@ static int btl_openib_component_progress(void)
|
||||
count += progress_one_device(device);
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
/* Check to see if there are any outstanding dtoh CUDA events that
|
||||
* have completed. If so, issue the PML callbacks on the fragments.
|
||||
* The only thing that gets completed here are asynchronous copies
|
||||
@ -3819,7 +3819,7 @@ static int btl_openib_component_progress(void)
|
||||
if (count > 0) {
|
||||
OPAL_OUTPUT((-1, "btl_openib: DONE with openib progress, count=%d", count));
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
return count;
|
||||
|
||||
|
@ -564,7 +564,7 @@ int btl_openib_register_mca_params(void)
|
||||
/* Default to bandwidth auto-detection */
|
||||
mca_btl_openib_module.super.btl_bandwidth = 0;
|
||||
mca_btl_openib_module.super.btl_latency = 4;
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
/* Default is enabling CUDA asynchronous send copies */
|
||||
CHECK(reg_bool("cuda_async_send", NULL,
|
||||
"Enable or disable CUDA async send copies "
|
||||
@ -580,7 +580,7 @@ int btl_openib_register_mca_params(void)
|
||||
mca_btl_openib_module.super.btl_max_send_size = 128 * 1024;
|
||||
/* Turn of message coalescing - not sure if it works with GPU buffers */
|
||||
mca_btl_openib_component.use_message_coalescing = 0;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
CHECK(mca_btl_base_param_register(
|
||||
&mca_btl_openib_component.super.btl_version,
|
||||
&mca_btl_openib_module.super));
|
||||
@ -727,7 +727,7 @@ int btl_openib_verify_mca_params (void)
|
||||
mca_btl_openib_component.buffer_alignment = 64;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
if (mca_btl_openib_component.cuda_async_send) {
|
||||
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND;
|
||||
} else {
|
||||
|
@ -51,7 +51,7 @@ mca_btl_smcuda_la_LDFLAGS = -module -avoid-version
|
||||
mca_btl_smcuda_la_LIBADD = \
|
||||
$(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la
|
||||
mca_btl_smcuda_la_CPPFLAGS = $(btl_smcuda_CPPFLAGS)
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
mca_btl_smcuda_la_LIBADD += \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
@ -51,9 +51,9 @@
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/sm/mpool_sm.h"
|
||||
|
||||
@ -91,11 +91,11 @@ mca_btl_smcuda_t mca_btl_smcuda = {
|
||||
mca_btl_smcuda_alloc,
|
||||
mca_btl_smcuda_free,
|
||||
mca_btl_smcuda_prepare_src,
|
||||
#if OMPI_CUDA_SUPPORT || OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
|
||||
#if OPAL_CUDA_SUPPORT || OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
|
||||
mca_btl_smcuda_prepare_dst,
|
||||
#else
|
||||
NULL,
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
mca_btl_smcuda_send,
|
||||
mca_btl_smcuda_sendi,
|
||||
NULL, /* put */
|
||||
@ -107,10 +107,10 @@ mca_btl_smcuda_t mca_btl_smcuda = {
|
||||
}
|
||||
};
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/*
|
||||
* calculate offset of an address from the beginning of a shared memory segment
|
||||
@ -341,7 +341,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Create a local memory pool that sends handles to the remote
|
||||
* side. Note that the res argument is not really used, but
|
||||
* needed to satisfy function signature. */
|
||||
@ -351,7 +351,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
|
||||
if (NULL == smcuda_btl->super.btl_mpool) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/* it is now safe to free the mpool resources */
|
||||
free(res);
|
||||
@ -478,7 +478,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
{
|
||||
mca_mpool_base_resources_t resources; /* unused, but needed */
|
||||
|
||||
@ -489,7 +489,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
||||
NULL,
|
||||
&resources);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
return ep;
|
||||
}
|
||||
|
||||
@ -543,11 +543,11 @@ int mca_btl_smcuda_add_procs(
|
||||
return_code = OMPI_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
peers[proc]->proc_ompi = procs[proc];
|
||||
peers[proc]->ipcstate = IPC_INIT;
|
||||
peers[proc]->ipctries = 0;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
n_local_procs++;
|
||||
|
||||
@ -795,9 +795,9 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (0 != reserve) {
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
if ( reserve + max_data <= mca_btl_smcuda_component.eager_limit ) {
|
||||
MCA_BTL_SMCUDA_FRAG_ALLOC_EAGER(frag);
|
||||
} else {
|
||||
@ -820,7 +820,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.base.seg_len = reserve + max_data;
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
} else {
|
||||
/* Normally, we are here because we have a GPU buffer and we are preparing
|
||||
* to send it. However, we can also be there because we have received a
|
||||
@ -851,7 +851,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
|
||||
frag->segment.memh_seg_len = registration->bound - registration->base + 1;
|
||||
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
frag->base.des_src = &(frag->segment.base);
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.order = MCA_BTL_NO_ORDER;
|
||||
@ -919,12 +919,12 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
|
||||
mca_btl_smcuda_component_progress();
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Initiate setting up CUDA IPC support. */
|
||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
|
||||
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/* this check should be unnecessary... turn into an assertion? */
|
||||
if( length < mca_btl_smcuda_component.eager_limit ) {
|
||||
@ -1004,12 +1004,12 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
|
||||
mca_btl_smcuda_component_progress();
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Initiate setting up CUDA IPC support */
|
||||
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
|
||||
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/* available header space */
|
||||
frag->hdr->len = frag->segment.base.seg_len;
|
||||
@ -1036,7 +1036,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
@ -1071,10 +1071,10 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
|
||||
frag->base.des_flags = flags;
|
||||
return &frag->base;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* descriptor)
|
||||
@ -1242,7 +1242,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b
|
||||
|
||||
}
|
||||
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -202,12 +202,12 @@ struct mca_btl_smcuda_component_t {
|
||||
char *sm_mpool_rndv_file_name;
|
||||
char *sm_ctl_file_name;
|
||||
char *sm_rndv_file_name;
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int cuda_ipc_verbose;
|
||||
int cuda_ipc_output;
|
||||
int use_cuda_ipc;
|
||||
int use_cuda_ipc_same_gpu;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
};
|
||||
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component;
|
||||
@ -478,7 +478,7 @@ extern int mca_btl_smcuda_send(
|
||||
mca_btl_base_tag_t tag
|
||||
);
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/**
|
||||
* Remote get using device memory.
|
||||
*/
|
||||
@ -519,7 +519,7 @@ enum ipcState {
|
||||
IPC_BAD
|
||||
};
|
||||
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
|
||||
extern void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl,
|
||||
|
@ -53,10 +53,10 @@
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/rte/rte.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
@ -163,7 +163,7 @@ static int smcuda_register(void)
|
||||
/* default number of extra procs to allow for future growth */
|
||||
mca_btl_smcuda_param_register_int("sm_extra_procs", 0, &mca_btl_smcuda_component.sm_extra_procs);
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Lower priority when CUDA support is not requested */
|
||||
if (ompi_mpi_cuda_support) {
|
||||
mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
|
||||
@ -175,9 +175,9 @@ static int smcuda_register(void)
|
||||
mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, &mca_btl_smcuda_component.cuda_ipc_verbose);
|
||||
mca_btl_smcuda_component.cuda_ipc_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component.cuda_ipc_verbose);
|
||||
#else /* OMPI_CUDA_SUPPORT */
|
||||
#else /* OPAL_CUDA_SUPPORT */
|
||||
mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH-1;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
mca_btl_smcuda.super.btl_eager_limit = 4*1024;
|
||||
mca_btl_smcuda.super.btl_rndv_eager_limit = 4*1024;
|
||||
mca_btl_smcuda.super.btl_max_send_size = 32*1024;
|
||||
@ -619,7 +619,7 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
|
||||
/**
|
||||
* Send a CUDA IPC ACK or NOTREADY message back to the peer.
|
||||
@ -827,7 +827,7 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl,
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/*
|
||||
* SM component initialization
|
||||
@ -931,13 +931,13 @@ mca_btl_smcuda_component_init(int *num_btls,
|
||||
/* set flag indicating btl not inited */
|
||||
mca_btl_smcuda.btl_inited = false;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Assume CUDA GET works. */
|
||||
mca_btl_smcuda.super.btl_get = mca_btl_smcuda_get_cuda;
|
||||
/* Register a smcuda control function to help setup IPC support */
|
||||
mca_btl_base_active_message_trigger[MCA_BTL_TAG_SMCUDA].cbfunc = btl_smcuda_control;
|
||||
mca_btl_base_active_message_trigger[MCA_BTL_TAG_SMCUDA].cbdata = NULL;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
|
||||
return btls;
|
||||
@ -1065,9 +1065,9 @@ int mca_btl_smcuda_component_progress(void)
|
||||
seg.seg_len = hdr->len;
|
||||
Frag.base.des_dst_cnt = 1;
|
||||
Frag.base.des_dst = &seg;
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
Frag.hdr = hdr; /* needed for peer rank in control messages */
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
reg->cbfunc(&mca_btl_smcuda.super, hdr->tag, &(Frag.base),
|
||||
reg->cbdata);
|
||||
/* return the fragment */
|
||||
@ -1126,7 +1126,7 @@ int mca_btl_smcuda_component_progress(void)
|
||||
}
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Check to see if there are any outstanding CUDA events that have
|
||||
* completed. If so, issue the PML callbacks on the fragments.
|
||||
*/
|
||||
@ -1149,6 +1149,6 @@ int mca_btl_smcuda_component_progress(void)
|
||||
}
|
||||
nevents++;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
return nevents;
|
||||
}
|
||||
|
@ -34,9 +34,9 @@ struct mca_btl_base_endpoint_t {
|
||||
* SMP specfic data structures. */
|
||||
int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing
|
||||
* SMP specfic data structures. */
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_mpool_base_module_t *mpool; /**< mpool for remotely registered memory */
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */
|
||||
#endif
|
||||
@ -45,11 +45,11 @@ struct mca_btl_base_endpoint_t {
|
||||
/** lock for concurrent access to endpoint state */
|
||||
opal_mutex_t endpoint_lock;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
ompi_proc_t *proc_ompi; /**< Needed for adding CUDA IPC support dynamically */
|
||||
enum ipcState ipcstate; /**< CUDA IPC connection status */
|
||||
int ipctries; /**< Number of times CUDA IPC connect was sent */
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
};
|
||||
|
||||
void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep);
|
||||
|
@ -37,9 +37,9 @@ static inline void mca_btl_smcuda_frag_common_constructor(mca_btl_smcuda_frag_t*
|
||||
frag->base.des_dst = &frag->segment.base;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_flags = 0;
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
frag->registration = NULL;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
}
|
||||
|
||||
static void mca_btl_smcuda_frag1_constructor(mca_btl_smcuda_frag_t* frag)
|
||||
|
@ -48,13 +48,13 @@ typedef struct mca_btl_smcuda_hdr_t mca_btl_smcuda_hdr_t;
|
||||
|
||||
struct mca_btl_smcuda_segment_t {
|
||||
mca_btl_base_segment_t base;
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
uint8_t key[128]; /* 64 bytes for CUDA mem handle, 64 bytes for CUDA event handle */
|
||||
/** Address of the entire memory handle */
|
||||
ompi_ptr_t memh_seg_addr;
|
||||
/** Length in bytes of entire memory handle */
|
||||
uint32_t memh_seg_len;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
};
|
||||
typedef struct mca_btl_smcuda_segment_t mca_btl_smcuda_segment_t;
|
||||
|
||||
@ -65,9 +65,9 @@ struct mca_btl_smcuda_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_smcuda_segment_t segment;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
struct mca_mpool_base_registration_t *registration;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
size_t size;
|
||||
/* pointer written to the FIFO, this is the base of the shared memory region */
|
||||
mca_btl_smcuda_hdr_t *hdr;
|
||||
|
@ -55,7 +55,7 @@ mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component)
|
||||
mca_btl_tcp_la_SOURCES = $(component_sources)
|
||||
mca_btl_tcp_la_LDFLAGS = -module -avoid-version
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
mca_btl_tcp_la_LIBADD = \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
@ -69,9 +69,9 @@
|
||||
#include "btl_tcp_proc.h"
|
||||
#include "btl_tcp_frag.h"
|
||||
#include "btl_tcp_endpoint.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
|
||||
/*
|
||||
@ -1084,9 +1084,9 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_common_cuda_stage_one_init();
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
memcpy(btls, mca_btl_tcp_component.tcp_btls, mca_btl_tcp_component.tcp_num_btls*sizeof(mca_btl_tcp_module_t*));
|
||||
*num_btl_modules = mca_btl_tcp_component.tcp_num_btls;
|
||||
|
@ -21,7 +21,7 @@
|
||||
* This file contains various support functions for doing CUDA
|
||||
* operations. Some of the features are only available in CUDA 4.1
|
||||
* and later, so some code is conditionalized around the
|
||||
* OMPI_CUDA_SUPPORT_41 macro.
|
||||
* OPAL_CUDA_SUPPORT_41 macro.
|
||||
*/
|
||||
#include "ompi_config.h"
|
||||
|
||||
@ -81,13 +81,13 @@ struct cudaFunctionTable {
|
||||
int (*cuEventDestroy)(CUevent);
|
||||
int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int);
|
||||
int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr);
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent);
|
||||
int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle);
|
||||
int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int);
|
||||
int (*cuIpcCloseMemHandle)(CUdeviceptr);
|
||||
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
int (*cuCtxGetDevice)(CUdevice *);
|
||||
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
|
||||
int (*cuDeviceGet)(CUdevice *, int);
|
||||
@ -132,7 +132,7 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
static int mca_common_cuda_async = 1;
|
||||
|
||||
/* Array of CUDA events to be queried for IPC stream, sending side and
|
||||
@ -185,7 +185,7 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
|
||||
#define CUDA_DUMP_EVTHANDLE(a)
|
||||
#endif /* OPAL_ENABLE_DEBUG */
|
||||
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
|
||||
|
||||
/**
|
||||
@ -269,7 +269,7 @@ int mca_common_cuda_stage_one_init(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_common_cuda_warning);
|
||||
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
/* Use this flag to test async vs sync copies */
|
||||
mca_common_cuda_async = 1;
|
||||
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async",
|
||||
@ -287,7 +287,7 @@ int mca_common_cuda_stage_one_init(void)
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&cuda_event_max);
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
|
||||
mca_common_cuda_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);
|
||||
@ -439,13 +439,13 @@ int mca_common_cuda_stage_one_init(void)
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemFree);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
|
||||
@ -527,7 +527,7 @@ static int mca_common_cuda_stage_three_init(void)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
if (true == mca_common_cuda_enabled) {
|
||||
/* Set up an array to store outstanding IPC async copy events */
|
||||
cuda_event_ipc_array = NULL;
|
||||
@ -564,7 +564,7 @@ static int mca_common_cuda_stage_three_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
if (true == mca_common_cuda_enabled) {
|
||||
/* Set up an array to store outstanding async dtoh events. Used on the
|
||||
* sending side for asynchronous copies. */
|
||||
@ -782,7 +782,7 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
|
||||
}
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
/*
|
||||
* Get the memory handle of a local section of memory that can be sent
|
||||
* to the remote size so it can access the memory. This is the
|
||||
@ -1467,7 +1467,7 @@ static float mydifftime(struct timespec ts_start, struct timespec ts_end) {
|
||||
}
|
||||
#endif /* CUDA_COMMON_TIMING */
|
||||
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
|
||||
/* Routines that get plugged into the opal datatype code */
|
||||
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
|
||||
@ -1613,7 +1613,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
|
||||
int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
int res;
|
||||
@ -1657,5 +1657,5 @@ void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
|
||||
reg->gpu_bufID = bufID;
|
||||
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
|
||||
|
@ -75,10 +75,10 @@ OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
|
||||
OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
|
||||
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
|
||||
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
|
||||
#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
|
||||
OMPI_DECLSPEC int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
|
||||
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
|
||||
#endif /* OMPI_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
/**
|
||||
* Return: 0 if no packing is required for sending (the upper layer
|
||||
* can use directly the pointer to the contiguous user
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
#
|
||||
# If CUDA support was requested, then build the CUDA support library.
|
||||
# This code checks the variable CUDA_SUPPORT which was set earlier in
|
||||
# the configure sequence by the opal_configure_options.m4 code.
|
||||
# This code checks just makes sure the check was done earlier by the
|
||||
# opal_check_cuda.m4 code.
|
||||
#
|
||||
|
||||
AC_DEFUN([MCA_ompi_common_cuda_CONFIG],[
|
||||
@ -20,24 +20,10 @@ AC_DEFUN([MCA_ompi_common_cuda_CONFIG],[
|
||||
# make sure that CUDA-aware checks have been done
|
||||
AC_REQUIRE([OPAL_CHECK_CUDA])
|
||||
|
||||
# Use CUDA_SUPPORT which was filled in by the opal configure code.
|
||||
AM_CONDITIONAL([MCA_ompi_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
|
||||
AC_DEFINE_UNQUOTED([OMPI_CUDA_SUPPORT],$CUDA_SUPPORT,
|
||||
[Whether we want cuda memory registration support in OMPI code])
|
||||
AS_IF([test "x$CUDA_SUPPORT" = "x1"],
|
||||
AS_IF([test "x$OPAL_CUDA_SUPPORT" = "x1"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
||||
# Check to see if we have features of CUDA 4.1 available as well.
|
||||
AM_CONDITIONAL([MCA_ompi_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"])
|
||||
AC_DEFINE_UNQUOTED([OMPI_CUDA_SUPPORT_41],$CUDA_SUPPORT_41,
|
||||
[Whether we want support CUDA 4.1 features])
|
||||
|
||||
# Check to see if we have features of CUDA 6.0 available as well.
|
||||
AM_CONDITIONAL([MCA_ompi_cuda_support_60], [test "x$CUDA_SUPPORT_60" = "x1"])
|
||||
AC_DEFINE_UNQUOTED([OMPI_CUDA_SUPPORT_60],$CUDA_SUPPORT_60,
|
||||
[Whether we want support CUDA 6.0 features])
|
||||
|
||||
# Copy over the includes needed to build CUDA
|
||||
common_cuda_CPPFLAGS=$opal_datatype_cuda_CPPFLAGS
|
||||
AC_SUBST([common_cuda_CPPFLAGS])
|
||||
|
@ -46,7 +46,7 @@ mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mpool_gpusm_la_SOURCES = $(sources)
|
||||
mca_mpool_gpusm_la_LDFLAGS = -module -avoid-version
|
||||
mca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS)
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
mca_mpool_gpusm_la_LIBADD += \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
@ -131,7 +131,7 @@ void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
||||
if(0 == align)
|
||||
align = mca_mpool_base_page_size;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* CUDA cannot handle registering overlapping regions, so make
|
||||
* sure each region is page sized and page aligned. */
|
||||
align = mca_mpool_base_page_size;
|
||||
|
@ -46,7 +46,7 @@ mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mpool_rgpusm_la_SOURCES = $(sources)
|
||||
mca_mpool_rgpusm_la_LDFLAGS = -module -avoid-version
|
||||
mca_mpool_rgpusm_la_LIBADD = $(mpool_rgpusm_LIBS)
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
mca_mpool_rgpusm_la_LIBADD += \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
@ -44,7 +44,7 @@ mca_mpool_sm_la_SOURCES = $(sources)
|
||||
mca_mpool_sm_la_LDFLAGS = -module -avoid-version
|
||||
mca_mpool_sm_la_LIBADD = \
|
||||
$(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
mca_mpool_sm_la_LIBADD += \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
@ -53,7 +53,7 @@ bfo_sources = \
|
||||
pml_bfo_start.c
|
||||
|
||||
# If we have CUDA support requested, build the CUDA file also
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
bfo_sources += \
|
||||
pml_bfo_cuda.c
|
||||
endif
|
||||
|
@ -50,7 +50,7 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size) {
|
||||
int rc;
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
|
||||
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
|
||||
unsigned char *base;
|
||||
@ -84,7 +84,7 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
|
||||
#else
|
||||
/* Just do the rendezvous but set initial data to be sent to zero */
|
||||
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -36,10 +36,10 @@
|
||||
#include "opal/util/arch.h"
|
||||
#include "ompi/memchecker.h"
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int mca_pml_bfo_cuda_need_buffers(mca_pml_bfo_recv_request_t* recvreq,
|
||||
mca_btl_base_module_t* btl);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
#if PML_BFO
|
||||
#include "pml_bfo_failover.h"
|
||||
#endif /* PML_BFO */
|
||||
@ -540,15 +540,15 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
|
||||
* sender side is already registered. We need to be smarter here, perhaps
|
||||
* do couple of RDMA reads */
|
||||
if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) {
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (mca_pml_bfo_cuda_need_buffers(recvreq, btl)) {
|
||||
mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
|
||||
return;
|
||||
}
|
||||
#else /* OMPI_CUDA_SUPPORT */
|
||||
#else /* OPAL_CUDA_SUPPORT */
|
||||
mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
|
||||
return;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
}
|
||||
|
||||
MCA_PML_BFO_RDMA_FRAG_ALLOC(frag);
|
||||
@ -583,7 +583,7 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
|
||||
frag->rdma_btl = btl;
|
||||
#endif /* PML_BFO */
|
||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
|
||||
if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) {
|
||||
/* Check to see if this is a CUDA get */
|
||||
@ -600,12 +600,12 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else /* OMPI_CUDA_SUPPORT */
|
||||
#else /* OPAL_CUDA_SUPPORT */
|
||||
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
|
||||
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
frag->rdma_hdr.hdr_rget = *hdr;
|
||||
frag->rdma_req = recvreq;
|
||||
frag->rdma_ep = bml_endpoint;
|
||||
|
@ -319,12 +319,12 @@ mca_pml_bfo_send_request_schedule(mca_pml_bfo_send_request_t* sendreq)
|
||||
mca_pml_bfo_send_request_schedule_exclusive(sendreq);
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int mca_pml_bfo_send_request_start_cuda(
|
||||
mca_pml_bfo_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/**
|
||||
* Start the specified request
|
||||
@ -410,11 +410,11 @@ mca_pml_bfo_send_request_start_btl( mca_pml_bfo_send_request_t* sendreq,
|
||||
MCA_PML_BFO_HDR_FLAGS_CONTIG);
|
||||
}
|
||||
} else {
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) {
|
||||
return mca_pml_bfo_send_request_start_cuda(sendreq, bml_btl, size);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0);
|
||||
}
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ ob1_sources = \
|
||||
pml_ob1_start.c
|
||||
|
||||
# If we have CUDA support requested, build the CUDA file also
|
||||
if MCA_ompi_cuda_support
|
||||
if OPAL_cuda_support
|
||||
ob1_sources += \
|
||||
pml_ob1_cuda.c
|
||||
endif
|
||||
|
@ -79,11 +79,11 @@ mca_pml_ob1_t mca_pml_ob1 = {
|
||||
}
|
||||
};
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void mca_pml_ob1_cuda_add_ipc_support(struct mca_btl_base_module_t* btl,
|
||||
int32_t flags, ompi_proc_t* errproc,
|
||||
char* btlinfo);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl,
|
||||
int32_t flags, ompi_proc_t* errproc,
|
||||
@ -732,12 +732,12 @@ void mca_pml_ob1_process_pending_rdma(void)
|
||||
void mca_pml_ob1_error_handler(
|
||||
struct mca_btl_base_module_t* btl, int32_t flags,
|
||||
ompi_proc_t* errproc, char* btlinfo ) {
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (flags & MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC) {
|
||||
mca_pml_ob1_cuda_add_ipc_support(btl, flags, errproc, btlinfo);
|
||||
return;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,7 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size) {
|
||||
int rc;
|
||||
#if OMPI_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
|
||||
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
|
||||
unsigned char *base;
|
||||
@ -87,7 +87,7 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
|
||||
#else
|
||||
/* Just do the rendezvous but set initial data to be sent to zero */
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -22,12 +22,12 @@
|
||||
#include "pml_ob1.h"
|
||||
#include "pml_ob1_sendreq.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#include "pml_ob1_recvreq.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
static void mca_pml_ob1_process_pending_cuda_async_copies(void);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
int mca_pml_ob1_progress(void)
|
||||
{
|
||||
@ -35,9 +35,9 @@ int mca_pml_ob1_progress(void)
|
||||
int j, completed_requests = 0;
|
||||
bool send_succedded;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_pml_ob1_process_pending_cuda_async_copies();
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
if( OPAL_LIKELY(0 == queue_length) )
|
||||
return 0;
|
||||
@ -87,7 +87,7 @@ int mca_pml_ob1_progress(void)
|
||||
return completed_requests;
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
static void mca_pml_ob1_process_pending_cuda_async_copies(void)
|
||||
{
|
||||
mca_btl_base_descriptor_t *frag;
|
||||
@ -106,4 +106,4 @@ static void mca_pml_ob1_process_pending_cuda_async_copies(void)
|
||||
/* Consider progressing dtoh events here in future */
|
||||
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
@ -44,10 +44,10 @@
|
||||
#include "pml_ob1_recvreq.h"
|
||||
#include "pml_ob1_sendreq.h"
|
||||
#include "pml_ob1_hdr.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "opal/datatype/opal_datatype_cuda.h"
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_pml_ob1_buffer_t,
|
||||
ompi_free_list_item_t,
|
||||
@ -334,7 +334,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND)) {
|
||||
/* The user's buffer is GPU and this BTL can support asynchronous copies,
|
||||
@ -343,7 +343,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
void *strm = mca_common_cuda_get_dtoh_stream();
|
||||
opal_cuda_set_copy_function_async(&sendreq->req_send.req_base.req_convertor, strm);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
if(send_request_pml_complete_check(sendreq) == false)
|
||||
mca_pml_ob1_send_request_schedule(sendreq);
|
||||
@ -364,7 +364,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
}
|
||||
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_FRAG);
|
||||
recvreq = (mca_pml_ob1_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
/* If data is destined for GPU buffer and convertor was set up for asynchronous
|
||||
* copies, then start the copy and return. The copy completion will trigger
|
||||
* the next phase. */
|
||||
@ -379,7 +379,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
|
||||
return;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
mca_pml_ob1_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
|
||||
|
||||
return;
|
||||
|
@ -37,15 +37,15 @@
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "opal/util/arch.h"
|
||||
#include "ompi/memchecker.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "opal/datatype/opal_datatype_cuda.h"
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int mca_pml_ob1_cuda_need_buffers(mca_pml_ob1_recv_request_t* recvreq,
|
||||
mca_btl_base_module_t* btl);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
void mca_pml_ob1_recv_request_process_pending(void)
|
||||
{
|
||||
@ -530,7 +530,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
|
||||
}
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
/**
|
||||
* This function is basically the first half of the code in the
|
||||
* mca_pml_ob1_recv_request_progress_frag function. This fires off
|
||||
@ -607,7 +607,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
|
||||
mca_pml_ob1_recv_request_schedule(recvreq, NULL);
|
||||
}
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/*
|
||||
* Update the recv request status to reflect the number of bytes
|
||||
@ -638,9 +638,9 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
* sender side is already registered. We need to be smarter here, perhaps
|
||||
* do couple of RDMA reads */
|
||||
if (opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) {
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (mca_pml_ob1_cuda_need_buffers(recvreq, btl))
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
{
|
||||
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
|
||||
return;
|
||||
@ -651,7 +651,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (OPAL_UNLIKELY(NULL == rdma_bml)) {
|
||||
if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) {
|
||||
mca_bml_base_btl_t *bml_btl;
|
||||
@ -666,7 +666,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == rdma_bml)) {
|
||||
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
|
||||
@ -786,7 +786,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq
|
||||
mca_pml_ob1_recv_request_schedule(recvreq, NULL);
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
/* If BTL supports it and this is a CUDA buffer being received into,
|
||||
* have all subsequent FRAGS copied in asynchronously. */
|
||||
if ((recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
|
||||
|
@ -304,7 +304,7 @@ void mca_pml_ob1_recv_request_progress_frag(
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments);
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void mca_pml_ob1_recv_request_frag_copy_start(
|
||||
mca_pml_ob1_recv_request_t* req,
|
||||
struct mca_btl_base_module_t* btl,
|
||||
@ -316,7 +316,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished(struct mca_btl_base_module_t* b
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status );
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -331,7 +331,7 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
/**
|
||||
* This function is called when the copy of the frag from the GPU buffer
|
||||
* to the internal buffer is complete. Used to support asynchronous
|
||||
@ -362,7 +362,7 @@ mca_pml_ob1_copy_frag_completion( mca_btl_base_module_t* btl,
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
}
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/**
|
||||
* Buffer the entire message and mark as complete.
|
||||
@ -1062,7 +1062,7 @@ cannot_pack:
|
||||
&(sendreq->req_send.req_base), size, PERUSE_SEND);
|
||||
#endif /* OMPI_WANT_PERUSE */
|
||||
|
||||
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
/* At this point, check to see if the BTL is doing an asynchronous
|
||||
* copy. This would have been initiated in the mca_bml_base_prepare_src
|
||||
* called above. The flag is checked here as we let the hdr be
|
||||
@ -1086,7 +1086,7 @@ cannot_pack:
|
||||
}
|
||||
continue;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/* initiate send - note that this may complete before the call returns */
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_FRAG);
|
||||
|
@ -320,12 +320,12 @@ mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
|
||||
mca_pml_ob1_send_request_schedule_exclusive(sendreq);
|
||||
}
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int mca_pml_ob1_send_request_start_cuda(
|
||||
mca_pml_ob1_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
/**
|
||||
* Start the specified request
|
||||
@ -411,11 +411,11 @@ mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq,
|
||||
MCA_PML_OB1_HDR_FLAGS_CONTIG);
|
||||
}
|
||||
} else {
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) {
|
||||
return mca_pml_ob1_send_request_start_cuda(sendreq, bml_btl, size);
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0);
|
||||
}
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ int ompi_mpi_leave_pinned = -1;
|
||||
bool ompi_mpi_leave_pinned_pipeline = false;
|
||||
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
|
||||
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
|
||||
bool ompi_mpi_built_with_cuda_support = OPAL_INT_TO_BOOL(OMPI_CUDA_SUPPORT);
|
||||
bool ompi_mpi_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
|
||||
bool ompi_mpi_cuda_support;
|
||||
|
||||
uint32_t ompi_hostname_cutoff = UINT32_MAX;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user