1
1

Remove redundant macro. This was from reviewed of earlier ticket.

Fixes trac:3878.  Reviewed by jsquyres.

This commit was SVN r29581.

The following Trac tickets were found above:
  Ticket 3878 --> https://svn.open-mpi.org/trac/ompi/ticket/3878
Этот коммит содержится в:
Rolf vandeVaart 2013-11-01 12:19:40 +00:00
родитель 99f9fdee01
Коммит ee7510b025
35 изменённых файлов: 159 добавлений и 173 удалений

Просмотреть файл

@ -127,7 +127,7 @@ mca_btl_openib_la_SOURCES = $(component_sources)
mca_btl_openib_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS)
mca_btl_openib_la_LIBADD = $(btl_openib_LIBS) \
$(top_ompi_builddir)/ompi/mca/common/verbs/libmca_common_verbs.la
if MCA_ompi_cuda_support
if OPAL_cuda_support
mca_btl_openib_la_LIBADD += \
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
endif

Просмотреть файл

@ -57,10 +57,10 @@
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/grdma/mpool_grdma.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_datatype_cuda.h"
#include "ompi/mca/common/cuda/common_cuda.h"
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#include <errno.h>
#include <sys/types.h>
@ -1296,14 +1296,14 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*) ptr + reserve );
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
/* If the convertor is copying the data asynchronously, then record an event
* that will trigger the callback when it completes. Mark descriptor as async.*/
if (convertor->flags & CONVERTOR_CUDA_ASYNC) {
mca_common_cuda_record_dtoh_event("btl_openib", (mca_btl_base_descriptor_t *)frag);
to_base_frag(frag)->base.des_flags = flags | MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
*size = max_data;

Просмотреть файл

@ -311,10 +311,10 @@ struct mca_btl_openib_component_t {
size_t memalign_threshold;
void* (*previous_malloc_hook)(size_t __size, const void*);
#endif
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
bool cuda_async_send;
bool cuda_async_recv;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
bool rroce_enable;
#endif

Просмотреть файл

@ -113,12 +113,12 @@ static int btl_openib_component_open(void);
static int btl_openib_component_close(void);
static mca_btl_base_module_t **btl_openib_component_init(int*, bool, bool);
static int btl_openib_component_progress(void);
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
mca_btl_openib_endpoint_t *ep,
mca_btl_base_descriptor_t* des,
int status);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/*
* Local variables
*/
@ -605,7 +605,7 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
"openib_reg_mr: base=%p, bound=%p, size=%d, flags=0x%x", reg->base, reg->bound,
(int) (reg->bound - reg->base + 1), reg->flags));
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) {
mca_common_cuda_register(base, size,
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name);
@ -631,7 +631,7 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
return OMPI_ERROR;
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) {
mca_common_cuda_unregister(openib_reg->base.base,
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name);
@ -3133,13 +3133,13 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
/* call registered callback */
mca_btl_active_message_callback_t* reg;
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
/* The COPY_ASYNC flag should not be set */
assert(0 == (des->des_flags & MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC));
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
reg = mca_btl_base_active_message_trigger + hdr->tag;
reg->cbfunc( &openib_btl->super, hdr->tag, des, reg->cbdata );
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
if (des->des_flags & MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC) {
/* Since ASYNC flag is set, we know this descriptor is being used
* for asynchronous copy and cannot be freed yet. Therefore, set
@ -3149,7 +3149,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
des->des_cbdata = (void *)ep;
return OMPI_SUCCESS;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
cqp = (hdr->credits >> 11) & 0x0f;
hdr->credits &= 0x87ff;
@ -3240,7 +3240,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
return OMPI_SUCCESS;
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
/**
* Called by the PML when the copying of the data out of the fragment
* is complete.
@ -3316,7 +3316,7 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
send_credits(ep, cqp);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
static char* btl_openib_component_status_to_string(enum ibv_wc_status status)
{
@ -3800,7 +3800,7 @@ static int btl_openib_component_progress(void)
count += progress_one_device(device);
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
/* Check to see if there are any outstanding dtoh CUDA events that
* have completed. If so, issue the PML callbacks on the fragments.
* The only thing that gets completed here are asynchronous copies
@ -3819,7 +3819,7 @@ static int btl_openib_component_progress(void)
if (count > 0) {
OPAL_OUTPUT((-1, "btl_openib: DONE with openib progress, count=%d", count));
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
return count;

Просмотреть файл

@ -564,7 +564,7 @@ int btl_openib_register_mca_params(void)
/* Default to bandwidth auto-detection */
mca_btl_openib_module.super.btl_bandwidth = 0;
mca_btl_openib_module.super.btl_latency = 4;
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
/* Default is enabling CUDA asynchronous send copies */
CHECK(reg_bool("cuda_async_send", NULL,
"Enable or disable CUDA async send copies "
@ -580,7 +580,7 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_module.super.btl_max_send_size = 128 * 1024;
/* Turn of message coalescing - not sure if it works with GPU buffers */
mca_btl_openib_component.use_message_coalescing = 0;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
CHECK(mca_btl_base_param_register(
&mca_btl_openib_component.super.btl_version,
&mca_btl_openib_module.super));
@ -727,7 +727,7 @@ int btl_openib_verify_mca_params (void)
mca_btl_openib_component.buffer_alignment = 64;
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
if (mca_btl_openib_component.cuda_async_send) {
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND;
} else {

Просмотреть файл

@ -51,7 +51,7 @@ mca_btl_smcuda_la_LDFLAGS = -module -avoid-version
mca_btl_smcuda_la_LIBADD = \
$(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la
mca_btl_smcuda_la_CPPFLAGS = $(btl_smcuda_CPPFLAGS)
if MCA_ompi_cuda_support
if OPAL_cuda_support
mca_btl_smcuda_la_LIBADD += \
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
endif

Просмотреть файл

@ -51,9 +51,9 @@
#include "ompi/class/ompi_free_list.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/btl/btl.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
#include "ompi/mca/common/cuda/common_cuda.h"
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/sm/mpool_sm.h"
@ -91,11 +91,11 @@ mca_btl_smcuda_t mca_btl_smcuda = {
mca_btl_smcuda_alloc,
mca_btl_smcuda_free,
mca_btl_smcuda_prepare_src,
#if OMPI_CUDA_SUPPORT || OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
#if OPAL_CUDA_SUPPORT || OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
mca_btl_smcuda_prepare_dst,
#else
NULL,
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
mca_btl_smcuda_send,
mca_btl_smcuda_sendi,
NULL, /* put */
@ -107,10 +107,10 @@ mca_btl_smcuda_t mca_btl_smcuda = {
}
};
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/*
* calculate offset of an address from the beginning of a shared memory segment
@ -341,7 +341,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
return rc;
}
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/* Create a local memory pool that sends handles to the remote
* side. Note that the res argument is not really used, but
* needed to satisfy function signature. */
@ -351,7 +351,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
if (NULL == smcuda_btl->super.btl_mpool) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/* it is now safe to free the mpool resources */
free(res);
@ -478,7 +478,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
return NULL;
}
#endif
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
{
mca_mpool_base_resources_t resources; /* unused, but needed */
@ -489,7 +489,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
NULL,
&resources);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
return ep;
}
@ -543,11 +543,11 @@ int mca_btl_smcuda_add_procs(
return_code = OMPI_ERROR;
goto CLEANUP;
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
peers[proc]->proc_ompi = procs[proc];
peers[proc]->ipcstate = IPC_INIT;
peers[proc]->ipctries = 0;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
n_local_procs++;
@ -795,9 +795,9 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
size_t max_data = *size;
int rc;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (0 != reserve) {
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
if ( reserve + max_data <= mca_btl_smcuda_component.eager_limit ) {
MCA_BTL_SMCUDA_FRAG_ALLOC_EAGER(frag);
} else {
@ -820,7 +820,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
return NULL;
}
frag->segment.base.seg_len = reserve + max_data;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
} else {
/* Normally, we are here because we have a GPU buffer and we are preparing
* to send it. However, we can also be there because we have received a
@ -851,7 +851,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
frag->segment.memh_seg_len = registration->bound - registration->base + 1;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
frag->base.des_src = &(frag->segment.base);
frag->base.des_src_cnt = 1;
frag->base.order = MCA_BTL_NO_ORDER;
@ -919,12 +919,12 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
mca_btl_smcuda_component_progress();
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/* Initiate setting up CUDA IPC support. */
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/* this check should be unnecessary... turn into an assertion? */
if( length < mca_btl_smcuda_component.eager_limit ) {
@ -1004,12 +1004,12 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
mca_btl_smcuda_component_progress();
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/* Initiate setting up CUDA IPC support */
if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/* available header space */
frag->hdr->len = frag->segment.base.seg_len;
@ -1036,7 +1036,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
*/
return 0;
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
@ -1071,10 +1071,10 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
frag->base.des_flags = flags;
return &frag->base;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor)
@ -1242,7 +1242,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/**
*

Просмотреть файл

@ -202,12 +202,12 @@ struct mca_btl_smcuda_component_t {
char *sm_mpool_rndv_file_name;
char *sm_ctl_file_name;
char *sm_rndv_file_name;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
int cuda_ipc_verbose;
int cuda_ipc_output;
int use_cuda_ipc;
int use_cuda_ipc_same_gpu;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
};
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component;
@ -478,7 +478,7 @@ extern int mca_btl_smcuda_send(
mca_btl_base_tag_t tag
);
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/**
* Remote get using device memory.
*/
@ -519,7 +519,7 @@ enum ipcState {
IPC_BAD
};
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
extern void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl,

Просмотреть файл

@ -53,10 +53,10 @@
#include "ompi/mca/common/sm/common_sm.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/rte/rte.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
#include "ompi/runtime/params.h"
#include "ompi/mca/common/cuda/common_cuda.h"
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#if OPAL_ENABLE_FT_CR == 1
#include "opal/runtime/opal_cr.h"
@ -163,7 +163,7 @@ static int smcuda_register(void)
/* default number of extra procs to allow for future growth */
mca_btl_smcuda_param_register_int("sm_extra_procs", 0, &mca_btl_smcuda_component.sm_extra_procs);
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/* Lower priority when CUDA support is not requested */
if (ompi_mpi_cuda_support) {
mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
@ -175,9 +175,9 @@ static int smcuda_register(void)
mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, &mca_btl_smcuda_component.cuda_ipc_verbose);
mca_btl_smcuda_component.cuda_ipc_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component.cuda_ipc_verbose);
#else /* OMPI_CUDA_SUPPORT */
#else /* OPAL_CUDA_SUPPORT */
mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH-1;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
mca_btl_smcuda.super.btl_eager_limit = 4*1024;
mca_btl_smcuda.super.btl_rndv_eager_limit = 4*1024;
mca_btl_smcuda.super.btl_max_send_size = 32*1024;
@ -619,7 +619,7 @@ out:
return rc;
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/**
* Send a CUDA IPC ACK or NOTREADY message back to the peer.
@ -827,7 +827,7 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl,
}
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/*
* SM component initialization
@ -931,13 +931,13 @@ mca_btl_smcuda_component_init(int *num_btls,
/* set flag indicating btl not inited */
mca_btl_smcuda.btl_inited = false;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/* Assume CUDA GET works. */
mca_btl_smcuda.super.btl_get = mca_btl_smcuda_get_cuda;
/* Register a smcuda control function to help setup IPC support */
mca_btl_base_active_message_trigger[MCA_BTL_TAG_SMCUDA].cbfunc = btl_smcuda_control;
mca_btl_base_active_message_trigger[MCA_BTL_TAG_SMCUDA].cbdata = NULL;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
return btls;
@ -1065,9 +1065,9 @@ int mca_btl_smcuda_component_progress(void)
seg.seg_len = hdr->len;
Frag.base.des_dst_cnt = 1;
Frag.base.des_dst = &seg;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
Frag.hdr = hdr; /* needed for peer rank in control messages */
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
reg->cbfunc(&mca_btl_smcuda.super, hdr->tag, &(Frag.base),
reg->cbdata);
/* return the fragment */
@ -1126,7 +1126,7 @@ int mca_btl_smcuda_component_progress(void)
}
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/* Check to see if there are any outstanding CUDA events that have
* completed. If so, issue the PML callbacks on the fragments.
*/
@ -1149,6 +1149,6 @@ int mca_btl_smcuda_component_progress(void)
}
nevents++;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
return nevents;
}

Просмотреть файл

@ -34,9 +34,9 @@ struct mca_btl_base_endpoint_t {
* SMP specfic data structures. */
int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing
* SMP specfic data structures. */
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
mca_mpool_base_module_t *mpool; /**< mpool for remotely registered memory */
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#if OMPI_ENABLE_PROGRESS_THREADS == 1
int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */
#endif
@ -45,11 +45,11 @@ struct mca_btl_base_endpoint_t {
/** lock for concurrent access to endpoint state */
opal_mutex_t endpoint_lock;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
ompi_proc_t *proc_ompi; /**< Needed for adding CUDA IPC support dynamically */
enum ipcState ipcstate; /**< CUDA IPC connection status */
int ipctries; /**< Number of times CUDA IPC connect was sent */
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
};
void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep);

Просмотреть файл

@ -37,9 +37,9 @@ static inline void mca_btl_smcuda_frag_common_constructor(mca_btl_smcuda_frag_t*
frag->base.des_dst = &frag->segment.base;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = 0;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
frag->registration = NULL;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
}
static void mca_btl_smcuda_frag1_constructor(mca_btl_smcuda_frag_t* frag)

Просмотреть файл

@ -48,13 +48,13 @@ typedef struct mca_btl_smcuda_hdr_t mca_btl_smcuda_hdr_t;
struct mca_btl_smcuda_segment_t {
mca_btl_base_segment_t base;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
uint8_t key[128]; /* 64 bytes for CUDA mem handle, 64 bytes for CUDA event handle */
/** Address of the entire memory handle */
ompi_ptr_t memh_seg_addr;
/** Length in bytes of entire memory handle */
uint32_t memh_seg_len;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
};
typedef struct mca_btl_smcuda_segment_t mca_btl_smcuda_segment_t;
@ -65,9 +65,9 @@ struct mca_btl_smcuda_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_smcuda_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
struct mca_mpool_base_registration_t *registration;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
size_t size;
/* pointer written to the FIFO, this is the base of the shared memory region */
mca_btl_smcuda_hdr_t *hdr;

Просмотреть файл

@ -55,7 +55,7 @@ mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_btl_tcp_la_SOURCES = $(component_sources)
mca_btl_tcp_la_LDFLAGS = -module -avoid-version
if MCA_ompi_cuda_support
if OPAL_cuda_support
mca_btl_tcp_la_LIBADD = \
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
endif

Просмотреть файл

@ -69,9 +69,9 @@
#include "btl_tcp_proc.h"
#include "btl_tcp_frag.h"
#include "btl_tcp_endpoint.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
#include "ompi/mca/common/cuda/common_cuda.h"
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/*
@ -1084,9 +1084,9 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
return NULL;
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
mca_common_cuda_stage_one_init();
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
memcpy(btls, mca_btl_tcp_component.tcp_btls, mca_btl_tcp_component.tcp_num_btls*sizeof(mca_btl_tcp_module_t*));
*num_btl_modules = mca_btl_tcp_component.tcp_num_btls;

Просмотреть файл

@ -21,7 +21,7 @@
* This file contains various support functions for doing CUDA
* operations. Some of the features are only available in CUDA 4.1
* and later, so some code is conditionalized around the
* OMPI_CUDA_SUPPORT_41 macro.
* OPAL_CUDA_SUPPORT_41 macro.
*/
#include "ompi_config.h"
@ -81,13 +81,13 @@ struct cudaFunctionTable {
int (*cuEventDestroy)(CUevent);
int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int);
int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr);
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent);
int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle);
int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int);
int (*cuIpcCloseMemHandle)(CUdeviceptr);
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
int (*cuCtxGetDevice)(CUdevice *);
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
int (*cuDeviceGet)(CUdevice *, int);
@ -132,7 +132,7 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t,
NULL,
NULL);
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
static int mca_common_cuda_async = 1;
/* Array of CUDA events to be queried for IPC stream, sending side and
@ -185,7 +185,7 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
#define CUDA_DUMP_EVTHANDLE(a)
#endif /* OPAL_ENABLE_DEBUG */
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
/**
@ -269,7 +269,7 @@ int mca_common_cuda_stage_one_init(void)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_common_cuda_warning);
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
/* Use this flag to test async vs sync copies */
mca_common_cuda_async = 1;
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async",
@ -287,7 +287,7 @@ int mca_common_cuda_stage_one_init(void)
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cuda_event_max);
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
mca_common_cuda_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);
@ -439,13 +439,13 @@ int mca_common_cuda_stage_one_init(void)
OMPI_CUDA_DLSYM(libcuda_handle, cuMemFree);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
@ -527,7 +527,7 @@ static int mca_common_cuda_stage_three_init(void)
return OMPI_ERROR;
}
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
if (true == mca_common_cuda_enabled) {
/* Set up an array to store outstanding IPC async copy events */
cuda_event_ipc_array = NULL;
@ -564,7 +564,7 @@ static int mca_common_cuda_stage_three_init(void)
}
}
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
if (true == mca_common_cuda_enabled) {
/* Set up an array to store outstanding async dtoh events. Used on the
* sending side for asynchronous copies. */
@ -782,7 +782,7 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
}
}
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
/*
* Get the memory handle of a local section of memory that can be sent
* to the remote size so it can access the memory. This is the
@ -1467,7 +1467,7 @@ static float mydifftime(struct timespec ts_start, struct timespec ts_end) {
}
#endif /* CUDA_COMMON_TIMING */
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
/* Routines that get plugged into the opal datatype code */
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
@ -1613,7 +1613,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
return 0;
}
#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
{
int res;
@ -1657,5 +1657,5 @@ void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
reg->gpu_bufID = bufID;
}
#endif /* OMPI_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_SUPPORT_60 */

Просмотреть файл

@ -75,10 +75,10 @@ OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
OMPI_DECLSPEC int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
#endif /* OMPI_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_SUPPORT_60 */
/**
* Return: 0 if no packing is required for sending (the upper layer
* can use directly the pointer to the contiguous user

Просмотреть файл

@ -10,8 +10,8 @@
#
# If CUDA support was requested, then build the CUDA support library.
# This code checks the variable CUDA_SUPPORT which was set earlier in
# the configure sequence by the opal_configure_options.m4 code.
# This code checks just makes sure the check was done earlier by the
# opal_check_cuda.m4 code.
#
AC_DEFUN([MCA_ompi_common_cuda_CONFIG],[
@ -20,24 +20,10 @@ AC_DEFUN([MCA_ompi_common_cuda_CONFIG],[
# make sure that CUDA-aware checks have been done
AC_REQUIRE([OPAL_CHECK_CUDA])
# Use CUDA_SUPPORT which was filled in by the opal configure code.
AM_CONDITIONAL([MCA_ompi_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
AC_DEFINE_UNQUOTED([OMPI_CUDA_SUPPORT],$CUDA_SUPPORT,
[Whether we want cuda memory registration support in OMPI code])
AS_IF([test "x$CUDA_SUPPORT" = "x1"],
AS_IF([test "x$OPAL_CUDA_SUPPORT" = "x1"],
[$1],
[$2])
# Check to see if we have features of CUDA 4.1 available as well.
AM_CONDITIONAL([MCA_ompi_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"])
AC_DEFINE_UNQUOTED([OMPI_CUDA_SUPPORT_41],$CUDA_SUPPORT_41,
[Whether we want support CUDA 4.1 features])
# Check to see if we have features of CUDA 6.0 available as well.
AM_CONDITIONAL([MCA_ompi_cuda_support_60], [test "x$CUDA_SUPPORT_60" = "x1"])
AC_DEFINE_UNQUOTED([OMPI_CUDA_SUPPORT_60],$CUDA_SUPPORT_60,
[Whether we want support CUDA 6.0 features])
# Copy over the includes needed to build CUDA
common_cuda_CPPFLAGS=$opal_datatype_cuda_CPPFLAGS
AC_SUBST([common_cuda_CPPFLAGS])

Просмотреть файл

@ -46,7 +46,7 @@ mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_gpusm_la_SOURCES = $(sources)
mca_mpool_gpusm_la_LDFLAGS = -module -avoid-version
mca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS)
if MCA_ompi_cuda_support
if OPAL_cuda_support
mca_mpool_gpusm_la_LIBADD += \
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
endif

Просмотреть файл

@ -131,7 +131,7 @@ void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
if(0 == align)
align = mca_mpool_base_page_size;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
/* CUDA cannot handle registering overlapping regions, so make
* sure each region is page sized and page aligned. */
align = mca_mpool_base_page_size;

Просмотреть файл

@ -46,7 +46,7 @@ mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_rgpusm_la_SOURCES = $(sources)
mca_mpool_rgpusm_la_LDFLAGS = -module -avoid-version
mca_mpool_rgpusm_la_LIBADD = $(mpool_rgpusm_LIBS)
if MCA_ompi_cuda_support
if OPAL_cuda_support
mca_mpool_rgpusm_la_LIBADD += \
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
endif

Просмотреть файл

@ -44,7 +44,7 @@ mca_mpool_sm_la_SOURCES = $(sources)
mca_mpool_sm_la_LDFLAGS = -module -avoid-version
mca_mpool_sm_la_LIBADD = \
$(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la
if MCA_ompi_cuda_support
if OPAL_cuda_support
mca_mpool_sm_la_LIBADD += \
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
endif

Просмотреть файл

@ -53,7 +53,7 @@ bfo_sources = \
pml_bfo_start.c
# If we have CUDA support requested, build the CUDA file also
if MCA_ompi_cuda_support
if OPAL_cuda_support
bfo_sources += \
pml_bfo_cuda.c
endif

Просмотреть файл

@ -50,7 +50,7 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size) {
int rc;
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
unsigned char *base;
@ -84,7 +84,7 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
#else
/* Just do the rendezvous but set initial data to be sent to zero */
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
return rc;
}

Просмотреть файл

@ -36,10 +36,10 @@
#include "opal/util/arch.h"
#include "ompi/memchecker.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
int mca_pml_bfo_cuda_need_buffers(mca_pml_bfo_recv_request_t* recvreq,
mca_btl_base_module_t* btl);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#if PML_BFO
#include "pml_bfo_failover.h"
#endif /* PML_BFO */
@ -540,15 +540,15 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
* sender side is already registered. We need to be smarter here, perhaps
* do couple of RDMA reads */
if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) {
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (mca_pml_bfo_cuda_need_buffers(recvreq, btl)) {
mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
return;
}
#else /* OMPI_CUDA_SUPPORT */
#else /* OPAL_CUDA_SUPPORT */
mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
return;
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
}
MCA_PML_BFO_RDMA_FRAG_ALLOC(frag);
@ -583,7 +583,7 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
frag->rdma_btl = btl;
#endif /* PML_BFO */
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) {
/* Check to see if this is a CUDA get */
@ -600,12 +600,12 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
return;
}
}
#else /* OMPI_CUDA_SUPPORT */
#else /* OPAL_CUDA_SUPPORT */
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
ompi_rte_abort(-1, NULL);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
frag->rdma_hdr.hdr_rget = *hdr;
frag->rdma_req = recvreq;
frag->rdma_ep = bml_endpoint;

Просмотреть файл

@ -319,12 +319,12 @@ mca_pml_bfo_send_request_schedule(mca_pml_bfo_send_request_t* sendreq)
mca_pml_bfo_send_request_schedule_exclusive(sendreq);
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
int mca_pml_bfo_send_request_start_cuda(
mca_pml_bfo_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/**
* Start the specified request
@ -410,11 +410,11 @@ mca_pml_bfo_send_request_start_btl( mca_pml_bfo_send_request_t* sendreq,
MCA_PML_BFO_HDR_FLAGS_CONTIG);
}
} else {
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) {
return mca_pml_bfo_send_request_start_cuda(sendreq, bml_btl, size);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0);
}
}

Просмотреть файл

@ -49,7 +49,7 @@ ob1_sources = \
pml_ob1_start.c
# If we have CUDA support requested, build the CUDA file also
if MCA_ompi_cuda_support
if OPAL_cuda_support
ob1_sources += \
pml_ob1_cuda.c
endif

Просмотреть файл

@ -79,11 +79,11 @@ mca_pml_ob1_t mca_pml_ob1 = {
}
};
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
void mca_pml_ob1_cuda_add_ipc_support(struct mca_btl_base_module_t* btl,
int32_t flags, ompi_proc_t* errproc,
char* btlinfo);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl,
int32_t flags, ompi_proc_t* errproc,
@ -732,12 +732,12 @@ void mca_pml_ob1_process_pending_rdma(void)
void mca_pml_ob1_error_handler(
struct mca_btl_base_module_t* btl, int32_t flags,
ompi_proc_t* errproc, char* btlinfo ) {
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (flags & MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC) {
mca_pml_ob1_cuda_add_ipc_support(btl, flags, errproc, btlinfo);
return;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
ompi_rte_abort(-1, NULL);
}

Просмотреть файл

@ -53,7 +53,7 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size) {
int rc;
#if OMPI_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_41
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
unsigned char *base;
@ -87,7 +87,7 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
#else
/* Just do the rendezvous but set initial data to be sent to zero */
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
#endif /* OMPI_CUDA_SUPPORT_41 */
#endif /* OPAL_CUDA_SUPPORT_41 */
return rc;
}

Просмотреть файл

@ -22,12 +22,12 @@
#include "pml_ob1.h"
#include "pml_ob1_sendreq.h"
#include "ompi/mca/bml/base/base.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
#include "ompi/mca/common/cuda/common_cuda.h"
#include "pml_ob1_recvreq.h"
#include "ompi/runtime/params.h"
static void mca_pml_ob1_process_pending_cuda_async_copies(void);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
int mca_pml_ob1_progress(void)
{
@ -35,9 +35,9 @@ int mca_pml_ob1_progress(void)
int j, completed_requests = 0;
bool send_succedded;
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
mca_pml_ob1_process_pending_cuda_async_copies();
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
if( OPAL_LIKELY(0 == queue_length) )
return 0;
@ -87,7 +87,7 @@ int mca_pml_ob1_progress(void)
return completed_requests;
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
static void mca_pml_ob1_process_pending_cuda_async_copies(void)
{
mca_btl_base_descriptor_t *frag;
@ -106,4 +106,4 @@ static void mca_pml_ob1_process_pending_cuda_async_copies(void)
/* Consider progressing dtoh events here in future */
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */

Просмотреть файл

@ -44,10 +44,10 @@
#include "pml_ob1_recvreq.h"
#include "pml_ob1_sendreq.h"
#include "pml_ob1_hdr.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_datatype_cuda.h"
#include "ompi/mca/common/cuda/common_cuda.h"
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
OBJ_CLASS_INSTANCE( mca_pml_ob1_buffer_t,
ompi_free_list_item_t,
@ -334,7 +334,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
(btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND)) {
/* The user's buffer is GPU and this BTL can support asynchronous copies,
@ -343,7 +343,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
void *strm = mca_common_cuda_get_dtoh_stream();
opal_cuda_set_copy_function_async(&sendreq->req_send.req_base.req_convertor, strm);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
if(send_request_pml_complete_check(sendreq) == false)
mca_pml_ob1_send_request_schedule(sendreq);
@ -364,7 +364,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
}
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_FRAG);
recvreq = (mca_pml_ob1_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
/* If data is destined for GPU buffer and convertor was set up for asynchronous
* copies, then start the copy and return. The copy completion will trigger
* the next phase. */
@ -379,7 +379,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
return;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
mca_pml_ob1_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
return;

Просмотреть файл

@ -37,15 +37,15 @@
#include "ompi/mca/bml/base/base.h"
#include "opal/util/arch.h"
#include "ompi/memchecker.h"
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_datatype_cuda.h"
#include "ompi/mca/common/cuda/common_cuda.h"
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
int mca_pml_ob1_cuda_need_buffers(mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
void mca_pml_ob1_recv_request_process_pending(void)
{
@ -530,7 +530,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
}
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
/**
* This function is basically the first half of the code in the
* mca_pml_ob1_recv_request_progress_frag function. This fires off
@ -607,7 +607,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
mca_pml_ob1_recv_request_schedule(recvreq, NULL);
}
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/*
* Update the recv request status to reflect the number of bytes
@ -638,9 +638,9 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
* sender side is already registered. We need to be smarter here, perhaps
* do couple of RDMA reads */
if (opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) {
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (mca_pml_ob1_cuda_need_buffers(recvreq, btl))
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
{
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
return;
@ -651,7 +651,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (OPAL_UNLIKELY(NULL == rdma_bml)) {
if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) {
mca_bml_base_btl_t *bml_btl;
@ -666,7 +666,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
return;
}
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
if (OPAL_UNLIKELY(NULL == rdma_bml)) {
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
@ -786,7 +786,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq
mca_pml_ob1_recv_request_schedule(recvreq, NULL);
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
/* If BTL supports it and this is a CUDA buffer being received into,
* have all subsequent FRAGS copied in asynchronously. */
if ((recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) &&

Просмотреть файл

@ -304,7 +304,7 @@ void mca_pml_ob1_recv_request_progress_frag(
mca_btl_base_segment_t* segments,
size_t num_segments);
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
void mca_pml_ob1_recv_request_frag_copy_start(
mca_pml_ob1_recv_request_t* req,
struct mca_btl_base_module_t* btl,
@ -316,7 +316,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished(struct mca_btl_base_module_t* b
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status );
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/**
*
*/

Просмотреть файл

@ -331,7 +331,7 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
/**
* This function is called when the copy of the frag from the GPU buffer
* to the internal buffer is complete. Used to support asynchronous
@ -362,7 +362,7 @@ mca_pml_ob1_copy_frag_completion( mca_btl_base_module_t* btl,
orte_errmgr.abort(-1, NULL);
}
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/**
* Buffer the entire message and mark as complete.
@ -1062,7 +1062,7 @@ cannot_pack:
&(sendreq->req_send.req_base), size, PERUSE_SEND);
#endif /* OMPI_WANT_PERUSE */
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
/* At this point, check to see if the BTL is doing an asynchronous
* copy. This would have been initiated in the mca_bml_base_prepare_src
* called above. The flag is checked here as we let the hdr be
@ -1086,7 +1086,7 @@ cannot_pack:
}
continue;
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/* initiate send - note that this may complete before the call returns */
rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_FRAG);

Просмотреть файл

@ -320,12 +320,12 @@ mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
mca_pml_ob1_send_request_schedule_exclusive(sendreq);
}
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
int mca_pml_ob1_send_request_start_cuda(
mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size);
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
/**
* Start the specified request
@ -411,11 +411,11 @@ mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq,
MCA_PML_OB1_HDR_FLAGS_CONTIG);
}
} else {
#if OMPI_CUDA_SUPPORT
#if OPAL_CUDA_SUPPORT
if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) {
return mca_pml_ob1_send_request_start_cuda(sendreq, bml_btl, size);
}
#endif /* OMPI_CUDA_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0);
}
}

Просмотреть файл

@ -63,7 +63,7 @@ int ompi_mpi_leave_pinned = -1;
bool ompi_mpi_leave_pinned_pipeline = false;
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_mpi_built_with_cuda_support = OPAL_INT_TO_BOOL(OMPI_CUDA_SUPPORT);
bool ompi_mpi_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
bool ompi_mpi_cuda_support;
uint32_t ompi_hostname_cutoff = UINT32_MAX;