osc/rdma: add support for network AMOs
This commit adds support for using network AMOs for MPI_Accumulate, MPI_Fetch_and_op, and MPI_Compare_and_swap. This support is only enabled if the ompi_single_intrinsic info key is specified or the acc_single_interinsic MCA variable is set. This configuration indicates to this implementation that no long accumulates will be performed since these do not currently mix with the AMO implementation. This commit also cleans up the code somwhat. This includes removing unnecessary struct keywords where the type is also typedef'd. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
184d53a018
Коммит
1ce5847e8b
@ -8,7 +8,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||
@ -86,6 +86,12 @@ struct ompi_osc_rdma_component_t {
|
||||
/** Default value of the no_locks info key for new windows */
|
||||
bool no_locks;
|
||||
|
||||
/** Accumulate operations will only operate on a single intrinsic datatype */
|
||||
bool acc_single_intrinsic;
|
||||
|
||||
/** Use network AMOs when available */
|
||||
bool acc_use_amo;
|
||||
|
||||
/** Priority of the osc/rdma component */
|
||||
unsigned int priority;
|
||||
|
||||
@ -121,12 +127,13 @@ struct ompi_osc_rdma_module_t {
|
||||
/** value of same_size info key for this window */
|
||||
bool same_size;
|
||||
|
||||
/** window should have accumulate ordering... */
|
||||
bool accumulate_ordering;
|
||||
|
||||
/** passive-target synchronization will not be used in this window */
|
||||
bool no_locks;
|
||||
|
||||
bool acc_single_intrinsic;
|
||||
|
||||
bool acc_use_amo;
|
||||
|
||||
/** flavor of this window */
|
||||
int flavor;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -25,7 +25,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
do {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local regions");
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local region(s)");
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
@ -70,7 +70,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void *compare_buffer, void *result_buffer,
|
||||
static inline int ompi_osc_rdma_cas_local (const void *source_addr, const void *compare_addr, void *result_addr,
|
||||
ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
|
||||
uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
|
||||
ompi_osc_rdma_module_t *module)
|
||||
@ -79,10 +79,10 @@ static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void
|
||||
|
||||
ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
|
||||
memcpy (result_buffer, (void *) (uintptr_t) target_address, datatype->super.size);
|
||||
memcpy (result_addr, (void *) (uintptr_t) target_address, datatype->super.size);
|
||||
|
||||
if (0 == memcmp (compare_buffer, result_buffer, datatype->super.size)) {
|
||||
memcpy ((void *) (uintptr_t) target_address, source_buffer, datatype->super.size);
|
||||
if (0 == memcmp (compare_addr, result_addr, datatype->super.size)) {
|
||||
memcpy ((void *) (uintptr_t) target_address, source_addr, datatype->super.size);
|
||||
}
|
||||
|
||||
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
@ -258,15 +258,19 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation faile with opal error code %d", ret);
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation failed with opal error code %d", ret);
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
|
||||
ompi_osc_rdma_cleanup_rdma (sync, frag, NULL, NULL);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const void *source_buffer, int source_count,
|
||||
ompi_datatype_t *source_datatype, void *result_buffer, int result_count,
|
||||
static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const void *source_addr, int source_count,
|
||||
ompi_datatype_t *source_datatype, void *result_addr, int result_count,
|
||||
ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle, int target_count,
|
||||
ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request)
|
||||
@ -304,15 +308,15 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
|
||||
if (source_datatype) {
|
||||
(void) ompi_datatype_get_extent (source_datatype, &lb, &extent);
|
||||
source_buffer = (void *)((intptr_t) source_buffer + lb);
|
||||
source_addr = (void *)((intptr_t) source_addr + lb);
|
||||
}
|
||||
|
||||
if (result_datatype) {
|
||||
(void) ompi_datatype_get_extent (result_datatype, &lb, &extent);
|
||||
result_buffer = (void *)((intptr_t) result_buffer + lb);
|
||||
result_addr = (void *)((intptr_t) result_addr + lb);
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_gacc_contig (sync, source_buffer, source_count, source_datatype, result_buffer,
|
||||
ret = ompi_osc_rdma_gacc_contig (sync, source_addr, source_count, source_datatype, result_addr,
|
||||
result_count, result_datatype, peer, target_address,
|
||||
target_handle, target_count, target_datatype, op,
|
||||
request);
|
||||
@ -323,12 +327,12 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
if (source_datatype) {
|
||||
/* the convertors will handle the lb */
|
||||
(void) ompi_datatype_get_extent (source_datatype, &lb, &extent);
|
||||
source_buffer = (void *)((intptr_t) source_buffer - lb);
|
||||
source_addr = (void *)((intptr_t) source_addr - lb);
|
||||
}
|
||||
|
||||
if (result_datatype) {
|
||||
(void) ompi_datatype_get_extent (result_datatype, &lb, &extent);
|
||||
result_buffer = (void *)((intptr_t) result_buffer - lb);
|
||||
result_addr = (void *)((intptr_t) result_addr - lb);
|
||||
}
|
||||
}
|
||||
|
||||
@ -362,7 +366,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
/* the source may be NULL if using MPI_OP_NO_OP with MPI_Get_accumulate */
|
||||
if (source_datatype) {
|
||||
OBJ_CONSTRUCT(&source_convertor, opal_convertor_t);
|
||||
ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &source_datatype->super, source_count, source_buffer,
|
||||
ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &source_datatype->super, source_count, source_addr,
|
||||
0, &source_convertor);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
@ -427,7 +431,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
if (result_datatype) {
|
||||
/* prepare a convertor for this part of the result */
|
||||
opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor, &result_datatype->super, result_count,
|
||||
result_buffer, 0, &subreq->convertor);
|
||||
result_addr, 0, &subreq->convertor);
|
||||
opal_convertor_set_position (&subreq->convertor, &result_position);
|
||||
subreq->type = OMPI_OSC_RDMA_TYPE_GET_ACC;
|
||||
} else {
|
||||
@ -478,41 +482,206 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void ompi_osc_rdma_cas_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *data, int status)
|
||||
{
|
||||
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
|
||||
ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data;
|
||||
void *result_buffer = (void *)(intptr_t) ((int64_t *) local_address)[1];
|
||||
void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1];
|
||||
size_t size = ((int64_t *) local_address)[2];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic compare-and-swap complete. result: 0x%" PRIx64,
|
||||
*((int64_t *) local_address));
|
||||
|
||||
/* copy the result */
|
||||
memcpy (result_buffer, local_address, 8);
|
||||
memcpy (result_addr, local_address, size);
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const void *source_buffer, const void *compare_buffer,
|
||||
void *result_buffer, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
|
||||
static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr,
|
||||
void *result_addr, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
|
||||
uint64_t target_address, mca_btl_base_registration_handle_t *target_handle)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
const size_t size = datatype->super.size;
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
int64_t compare, source;
|
||||
int ret, flags;
|
||||
char *ptr;
|
||||
int ret;
|
||||
|
||||
/* XXX -- TODO -- Update the BTL interface to allow for other CAS sizes */
|
||||
if (datatype->super.size != 8) {
|
||||
if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->selected_btl->btl_flags))) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using 64-bit btl atomics. compare: 0x%"
|
||||
PRIx64 ", origin: 0x%" PRIx64, *((int64_t *) compare_buffer), *((int64_t *) source_buffer));
|
||||
compare = (8 == size) ? ((int64_t *) compare_addr)[0] : ((int32_t *) compare_addr)[0];
|
||||
source = (8 == size) ? ((int64_t *) source_addr)[0] : ((int32_t *) source_addr)[0];
|
||||
flags = (4 == size) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using %d-bit btl atomics. compare: 0x%"
|
||||
PRIx64 ", origin: 0x%" PRIx64, size * 8, *((int64_t *) compare_addr), *((int64_t *) source_addr));
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 24, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* store the destination and size in the temporary buffer */
|
||||
((int64_t *) ptr)[1] = (intptr_t) result_addr;
|
||||
((int64_t *) ptr)[2] = size;
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, target_address,
|
||||
frag->handle, target_handle, compare, source, flags, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_cas_atomic_complete, sync, frag);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
|
||||
if (1 == ret) {
|
||||
memcpy (result_addr, ptr, size);
|
||||
ret = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void ompi_osc_rdma_fetch_and_op_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *data, int status)
|
||||
{
|
||||
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
|
||||
ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data;
|
||||
void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1];
|
||||
ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) (intptr_t) ((int64_t *) local_address)[2];
|
||||
size_t size = ((int64_t *) local_address)[3];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic fetch-and-op complete. result: 0x%" PRIx64,
|
||||
*((int64_t *) local_address));
|
||||
|
||||
/* copy the result */
|
||||
if (result_addr) {
|
||||
memcpy (result_addr, local_address, size);
|
||||
}
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, status);
|
||||
}
|
||||
}
|
||||
|
||||
static int ompi_osc_rdma_op_mapping[OMPI_OP_NUM_OF_TYPES] = {
|
||||
[OMPI_OP_MAX] = MCA_BTL_ATOMIC_MAX,
|
||||
[OMPI_OP_MIN] = MCA_BTL_ATOMIC_MIN,
|
||||
[OMPI_OP_SUM] = MCA_BTL_ATOMIC_ADD,
|
||||
[OMPI_OP_BAND] = MCA_BTL_ATOMIC_AND,
|
||||
[OMPI_OP_BOR] = MCA_BTL_ATOMIC_OR,
|
||||
[OMPI_OP_BXOR] = MCA_BTL_ATOMIC_XOR,
|
||||
[OMPI_OP_LAND] = MCA_BTL_ATOMIC_LAND,
|
||||
[OMPI_OP_LOR] = MCA_BTL_ATOMIC_LOR,
|
||||
[OMPI_OP_LXOR] = MCA_BTL_ATOMIC_LXOR,
|
||||
[OMPI_OP_REPLACE] = MCA_BTL_ATOMIC_SWAP,
|
||||
};
|
||||
|
||||
static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt,
|
||||
ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
int ret, btl_op, flags;
|
||||
char *ptr = NULL;
|
||||
int64_t origin;
|
||||
|
||||
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
|
||||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
|
||||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
|
||||
if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) {
|
||||
flags |= MCA_BTL_ATOMIC_FLAG_FLOAT;
|
||||
}
|
||||
|
||||
btl_op = ompi_osc_rdma_op_mapping[op->op_type];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using %d-bit btl atomics. origin: 0x%" PRIx64,
|
||||
(4 == extent) ? 32 : 64, *((int64_t *) origin_addr));
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 32, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
origin = (8 == extent) ? ((int64_t *) origin_addr)[0] : ((int32_t *) origin_addr)[0];
|
||||
|
||||
/* store the destination, request, and extent in the temporary buffer for the callback */
|
||||
((int64_t *) ptr)[1] = (intptr_t) result_addr;
|
||||
((int64_t *) ptr)[2] = (intptr_t) req;
|
||||
((int64_t *) ptr)[3] = extent;
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->data_endpoint, ptr, target_address,
|
||||
frag->handle, target_handle, btl_op, origin, flags,
|
||||
MCA_BTL_NO_ORDER, ompi_osc_rdma_fetch_and_op_atomic_complete,
|
||||
sync, frag);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
|
||||
if (OPAL_LIKELY(1 == ret)) {
|
||||
memcpy (result_addr, ptr, extent);
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, OMPI_SUCCESS);
|
||||
}
|
||||
ret = OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt,
|
||||
ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
uint64_t address, offset;
|
||||
char *ptr = NULL;
|
||||
int ret, btl_op;
|
||||
|
||||
if (extent > 8) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* align the address. the user should not call with an unaligned address so don't need to range check here */
|
||||
address = target_address & ~7;
|
||||
offset = target_address & ~address;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using compare-and-swap. origin: 0x%" PRIx64,
|
||||
*((int64_t *) origin_addr));
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 16, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
@ -520,27 +689,124 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
|
||||
}
|
||||
|
||||
/* store the destination in the temporary buffer */
|
||||
((int64_t *) ptr)[1] = (intptr_t) result_buffer;
|
||||
do {
|
||||
bool complete = false;
|
||||
|
||||
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, target_address,
|
||||
frag->handle, target_handle, ((int64_t *)compare_buffer)[0],
|
||||
*((int64_t *) source_buffer), 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_cas_atomic_complete, module, frag);
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
return ret;
|
||||
ret = ompi_osc_get_data_blocking (module, peer->data_endpoint, address, target_handle, ptr, 8);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
((int64_t *) ptr)[1] = ((int64_t *) ptr)[0];
|
||||
|
||||
if (&ompi_mpi_op_no_op.op == op) {
|
||||
memcpy (ptr + offset, origin_addr, extent);
|
||||
} else {
|
||||
ompi_op_reduce (op, (void *) origin_addr, ptr + offset, 1, dt);
|
||||
}
|
||||
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, address,
|
||||
frag->handle, target_handle, ((int64_t *) ptr)[1],
|
||||
((int64_t *) ptr)[0], 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_atomic_complete, (void *) &complete, NULL);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
while (!complete) {
|
||||
ompi_osc_rdma_progress (module);
|
||||
}
|
||||
|
||||
if (((int64_t *) ptr)[1] == ((int64_t *) ptr)[0]) {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
|
||||
if (result_addr) {
|
||||
memcpy (result_addr, ptr + 8 + offset, extent);
|
||||
}
|
||||
|
||||
if (1 != ret) {
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
} else {
|
||||
memcpy (result_buffer, ptr, 8);
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ompi_osc_rdma_acc_single_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *data, int status)
|
||||
{
|
||||
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
|
||||
ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) data;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic accumulate complete");
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, status);
|
||||
}
|
||||
}
|
||||
|
||||
static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, ompi_datatype_t *dt, ptrdiff_t extent,
|
||||
ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
|
||||
ompi_op_t *op, ompi_osc_rdma_request_t *req)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
|
||||
int ret, btl_op, flags;
|
||||
int64_t origin;
|
||||
|
||||
if (!(module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
|
||||
/* btl put atomics not supported or disabled. fall back on fetch-and-op */
|
||||
return ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, NULL, dt, extent, peer, target_address, target_handle, op, req);
|
||||
}
|
||||
|
||||
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
|
||||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
|
||||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
origin = (8 == extent) ? ((uint64_t *) origin_addr)[0] : ((uint32_t *) origin_addr)[0];
|
||||
|
||||
/* set the appropriate flags for this atomic */
|
||||
flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
|
||||
if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) {
|
||||
flags |= MCA_BTL_ATOMIC_FLAG_FLOAT;
|
||||
}
|
||||
|
||||
btl_op = ompi_osc_rdma_op_mapping[op->op_type];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating accumulate using 64-bit btl atomics. origin: 0x%" PRIx64,
|
||||
*((int64_t *) origin_addr));
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->data_endpoint, target_address,
|
||||
target_handle, btl_op, origin, flags, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_acc_single_atomic_complete, sync, req);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
if (1 == ret) {
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, OMPI_SUCCESS);
|
||||
}
|
||||
ret = OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* ompi_osc_rdma_cas_get_complete:
|
||||
@ -561,45 +827,49 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote compare-and-swap get complete on sync %p. "
|
||||
"status %d", (void *) sync, status);
|
||||
|
||||
if (OMPI_SUCCESS == status) {
|
||||
/* copy data to the user buffer (for gacc) */
|
||||
memcpy (request->result_addr, (void *) source, request->len);
|
||||
|
||||
if (0 == memcmp ((void *) source, request->compare_addr, request->len)) {
|
||||
/* the target and compare buffers match so write the source to the target */
|
||||
memcpy ((void *) source, request->origin_addr, request->len);
|
||||
|
||||
ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, local_address,
|
||||
request->target_address, local_handle,
|
||||
(mca_btl_base_registration_handle_t *) request->ctx,
|
||||
request->len, 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_acc_put_complete, request, NULL);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "could not start put to complete accumulate operation. opal return code "
|
||||
"%d", ret);
|
||||
}
|
||||
|
||||
/* TODO -- we can do better. probably should queue up the next step and handle it in progress */
|
||||
assert (OPAL_SUCCESS == ret);
|
||||
} else {
|
||||
/* this is a no-op. nothing more to do except release the accumulate lock */
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_release_exclusive (module, request->peer,
|
||||
offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
|
||||
/* the request is now complete and the outstanding rdma operation is complete */
|
||||
ompi_osc_rdma_request_complete (request, status);
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;
|
||||
}
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* copy data to the user buffer (for gacc) */
|
||||
memcpy (request->result_addr, (void *) source, request->len);
|
||||
|
||||
if (0 == memcmp ((void *) source, request->compare_addr, request->len)) {
|
||||
/* the target and compare buffers match. write the source to the target */
|
||||
memcpy ((void *) source, request->origin_addr, request->len);
|
||||
|
||||
ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, local_address,
|
||||
request->target_address, local_handle,
|
||||
(mca_btl_base_registration_handle_t *) request->ctx,
|
||||
request->len, 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_acc_put_complete, request, NULL);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "could not start put to complete accumulate operation. opal return code "
|
||||
"%d", ret);
|
||||
}
|
||||
|
||||
/* TODO -- we can do better. probably should queue up the next step and handle it in progress */
|
||||
assert (OPAL_SUCCESS == ret);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* this is a no-op. nothing more to do except release the accumulate lock */
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_release_exclusive (module, request->peer,
|
||||
offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
|
||||
/* the request is now complete and the outstanding rdma operation is complete */
|
||||
ompi_osc_rdma_request_complete (request, status);
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;
|
||||
}
|
||||
|
||||
static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffer, const void *compare_buffer, void *result_buffer,
|
||||
static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr, void *result_addr,
|
||||
ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle)
|
||||
{
|
||||
@ -649,10 +919,10 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
|
||||
/* set up the request */
|
||||
request->frag = frag;
|
||||
request->origin_addr = (void *) source_buffer;
|
||||
request->origin_addr = (void *) source_addr;
|
||||
request->ctx = (void *) target_handle;
|
||||
request->result_addr = result_buffer;
|
||||
request->compare_addr = compare_buffer;
|
||||
request->result_addr = result_addr;
|
||||
request->compare_addr = compare_addr;
|
||||
request->result_dt = datatype;
|
||||
request->offset = (ptrdiff_t) offset;
|
||||
request->target_address = target_address;
|
||||
@ -670,6 +940,9 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret)) {
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
return ret;
|
||||
}
|
||||
@ -684,8 +957,8 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
|
||||
|
||||
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr,
|
||||
struct ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
|
||||
struct ompi_win_t *win)
|
||||
ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
|
||||
ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -708,8 +981,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (MCA_OSC_RDMA_SAME_OP <= module->accumulate_ops) {
|
||||
if (win->w_acc_ops <= OMPI_WIN_ACCUMULATE_OPS_SAME_OP) {
|
||||
/* the user has indicated that they will only use the same op (or same op and no op)
|
||||
* for operations on overlapping memory ranges. that indicates it is safe to go ahead
|
||||
* and use network atomic operations. */
|
||||
@ -718,8 +990,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ompi_osc_rdma_peer_local_base (peer)) {
|
||||
return ompi_osc_rdma_cas_local (origin_addr, compare_addr, result_addr, dt,
|
||||
@ -733,15 +1004,16 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
|
||||
|
||||
static inline
|
||||
int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
ompi_datatype_t *origin_datatype, void *result_addr, int result_count,
|
||||
ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
int target_rank, MPI_Aint target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
|
||||
ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_osc_rdma_request_t *request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
mca_btl_base_registration_handle_t *target_handle;
|
||||
uint64_t target_address;
|
||||
ptrdiff_t lb, extent;
|
||||
int ret;
|
||||
|
||||
/* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */
|
||||
@ -753,12 +1025,35 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_datatype->super.size * target_count,
|
||||
&target_address, &target_handle);
|
||||
(void) ompi_datatype_get_extent (origin_datatype, &lb, &extent);
|
||||
|
||||
ret = osc_rdma_get_remote_segment (module, peer, target_disp, extent * target_count, &target_address, &target_handle);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (module->acc_single_intrinsic && extent <= 8) {
|
||||
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
|
||||
if (NULL == result_addr) {
|
||||
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, extent, peer, target_address,
|
||||
target_handle, op, request);
|
||||
} else {
|
||||
ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address,
|
||||
target_handle, op, request);
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address,
|
||||
target_handle, op, request);
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (ompi_osc_rdma_peer_local_base (peer)) {
|
||||
/* local/self optimization */
|
||||
return ompi_osc_rdma_gacc_local (origin_addr, origin_count, origin_datatype, result_addr, result_count,
|
||||
@ -771,13 +1066,10 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
|
||||
target_datatype, op, request);
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win)
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -801,14 +1093,10 @@ int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
|
||||
}
|
||||
|
||||
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win,
|
||||
ompi_request_t **request)
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win, ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -842,31 +1130,9 @@ int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, struct ompi_op_t *op, struct ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name, target_rank,
|
||||
(unsigned long) target_disp, op->o_name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer, target_rank,
|
||||
target_disp, 1, dt, op, NULL);
|
||||
}
|
||||
|
||||
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
|
||||
struct ompi_win_t *win, struct ompi_request_t **request)
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win, ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -898,11 +1164,9 @@ int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
|
||||
struct ompi_win_t *win)
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -921,3 +1185,24 @@ int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
|
||||
NULL, peer, target_rank, target_disp, target_count, target_datatype,
|
||||
op, NULL);
|
||||
}
|
||||
|
||||
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name,
|
||||
target_rank, (unsigned long) target_disp, op->o_name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer,
|
||||
target_rank, target_disp, 1, dt, op, NULL);
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -14,44 +14,30 @@
|
||||
|
||||
#include "osc_rdma.h"
|
||||
|
||||
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr,
|
||||
void *result_addr, struct ompi_datatype_t *dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp,
|
||||
struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr,
|
||||
ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
|
||||
ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_dt,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr,
|
||||
struct ompi_datatype_t *dt, int target,
|
||||
OPAL_PTRDIFF_TYPE target_disp,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_dt,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win, ompi_request_t **request);
|
||||
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win, ompi_request_t **request);
|
||||
|
||||
|
||||
#endif /* OSC_RDMA_ACCUMULATE_H */
|
||||
|
@ -772,9 +772,9 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request)
|
||||
ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
mca_btl_base_registration_handle_t *target_handle;
|
||||
@ -807,9 +807,9 @@ static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const voi
|
||||
ompi_osc_rdma_put_contig, false);
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE source_disp, int source_count,
|
||||
struct ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request)
|
||||
ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
mca_btl_base_registration_handle_t *source_handle;
|
||||
@ -841,9 +841,9 @@ static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *ori
|
||||
source_handle, source_count, source_datatype, request,
|
||||
module->selected_btl->btl_get_limit, ompi_osc_rdma_get_contig, true);
|
||||
}
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, ompi_win_t *win)
|
||||
ompi_datatype_t *target_datatype, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -862,10 +862,10 @@ int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_da
|
||||
target_count, target_datatype, NULL);
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request)
|
||||
ompi_datatype_t *target_datatype, ompi_win_t *win,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -897,9 +897,9 @@ int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_d
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count,
|
||||
struct ompi_datatype_t *source_datatype, struct ompi_win_t *win)
|
||||
ompi_datatype_t *source_datatype, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -918,10 +918,10 @@ int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype
|
||||
source_disp, source_count, source_datatype, NULL);
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count,
|
||||
struct ompi_datatype_t *source_datatype, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request)
|
||||
ompi_datatype_t *source_datatype, ompi_win_t *win,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
|
@ -96,23 +96,23 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o
|
||||
|
||||
/* prototypes for implementations of MPI RMA window functions. these will be called from the
|
||||
* mpi interface (ompi/mpi/c) */
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win,
|
||||
ompi_request_t **request);
|
||||
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win,
|
||||
ompi_request_t **request);
|
||||
|
||||
/**
|
||||
* @brief read data from a remote memory region (blocking)
|
||||
|
@ -173,6 +173,20 @@ static int ompi_osc_rdma_component_register (void)
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.no_locks);
|
||||
|
||||
mca_osc_rdma_component.acc_single_intrinsic = false;
|
||||
(void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_single_intrinsic",
|
||||
"Enable optimizations for MPI_Fetch_and_op, MPI_Accumulate, etc for codes "
|
||||
"that will not use anything more than a single predefined datatype (default: false)",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_single_intrinsic);
|
||||
|
||||
mca_osc_rdma_component.acc_use_amo = true;
|
||||
(void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_use_amo",
|
||||
"Enable the use of network atomic memory operations when using single "
|
||||
"intrinsic optimizations. If not set network compare-and-swap will be "
|
||||
"used instread (default: true)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_use_amo);
|
||||
|
||||
mca_osc_rdma_component.buffer_size = 32768;
|
||||
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "buffer_size",
|
||||
"Size of temporary buffers (default: 32k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
|
||||
@ -585,7 +599,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
}
|
||||
}
|
||||
|
||||
if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) {
|
||||
if (MPI_WIN_FLAVOR_CREATE == module->flavor) {
|
||||
ret = ompi_osc_rdma_initialize_region (module, base, size);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
break;
|
||||
@ -600,6 +614,20 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
}
|
||||
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions;
|
||||
module->state->disp_unit = module->disp_unit;
|
||||
module->state->region_count = 1;
|
||||
region->base = state_region->base + my_base_offset;
|
||||
region->len = size;
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
memcpy (region->btl_handle_data, state_region->btl_handle_data, module->selected_btl->btl_registration_handle_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* barrier to make sure all ranks have attached */
|
||||
shared_comm->c_coll.coll_barrier(shared_comm, shared_comm->c_coll.coll_barrier_module);
|
||||
|
||||
offset = data_base;
|
||||
for (int i = 0 ; i < local_size ; ++i) {
|
||||
ompi_osc_rdma_peer_extended_t *ex_peer;
|
||||
@ -646,21 +674,18 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
if (temp[i].size) {
|
||||
ex_peer->super.base = (uint64_t) (uintptr_t) module->segment_base + offset;
|
||||
ex_peer->super.base = state_region->base + offset;
|
||||
offset += temp[i].size;
|
||||
} else {
|
||||
ex_peer->super.base = 0;
|
||||
}
|
||||
}
|
||||
|
||||
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
|
||||
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
|
||||
|
||||
offset += temp[i].size;
|
||||
} else {
|
||||
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
|
||||
|
||||
ex_peer->super.base = peer_region->base;
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data;
|
||||
}
|
||||
ex_peer->super.base = peer_region->base;
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1020,6 +1045,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
module->same_disp_unit = check_config_value_bool ("same_disp_unit", info);
|
||||
module->same_size = check_config_value_bool ("same_size", info);
|
||||
module->no_locks = check_config_value_bool ("no_locks", info);
|
||||
module->acc_single_intrinsic = check_config_value_bool ("ompi_single_accumulate", info);
|
||||
module->acc_use_amo = mca_osc_rdma_component.acc_use_amo;
|
||||
|
||||
module->all_sync.module = module;
|
||||
|
||||
@ -1047,14 +1074,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
}
|
||||
}
|
||||
|
||||
/* options */
|
||||
/* FIX ME: should actually check this value... */
|
||||
#if 1
|
||||
module->accumulate_ordering = 1;
|
||||
#else
|
||||
ompi_osc_base_config_value_equal("accumulate_ordering", info, "none");
|
||||
#endif
|
||||
|
||||
ret = ompi_comm_dup(comm, &module->comm);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ompi_osc_rdma_free (win);
|
||||
@ -1132,17 +1151,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
}
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_share_data (module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
|
||||
ompi_osc_rdma_free (win);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* for now the leader is always rank 0 in the communicator */
|
||||
module->leader = ompi_osc_rdma_module_peer (module, 0);
|
||||
|
||||
/* lock data */
|
||||
if (module->no_locks) {
|
||||
win->w_flags |= OMPI_WIN_NO_LOCKS;
|
||||
@ -1177,20 +1185,19 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
/* sync memory - make sure all initialization completed */
|
||||
opal_atomic_mb();
|
||||
|
||||
/* barrier to prevent arrival of lock requests before we're
|
||||
fully created */
|
||||
ret = module->comm->c_coll.coll_barrier(module->comm,
|
||||
module->comm->c_coll.coll_barrier_module);
|
||||
ret = ompi_osc_rdma_share_data (module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
|
||||
ompi_osc_rdma_free (win);
|
||||
return ret;
|
||||
} else {
|
||||
/* for now the leader is always rank 0 in the communicator */
|
||||
module->leader = ompi_osc_rdma_module_peer (module, 0);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d",
|
||||
ompi_comm_get_cid(module->comm));
|
||||
}
|
||||
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d",
|
||||
ompi_comm_get_cid(module->comm));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
@ -43,12 +43,6 @@ int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush: %d, %s", target, win->w_name);
|
||||
|
||||
if (ompi_comm_rank (module->comm) == target) {
|
||||
/* nothing to flush. call one round of progress */
|
||||
ompi_osc_rdma_progress (module);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
lock = ompi_osc_rdma_module_sync_lookup (module, target, &peer);
|
||||
|
@ -218,6 +218,10 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
|
||||
|
||||
memcpy (ex_peer->super.base_handle, base_region->btl_handle_data, registration_handle_size);
|
||||
}
|
||||
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
ex_peer->super.super.data_endpoint = ex_peer->super.super.state_endpoint;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -75,6 +75,9 @@ struct ompi_osc_rdma_peer_basic_t {
|
||||
/** remote peer's base pointer */
|
||||
osc_rdma_base_t base;
|
||||
|
||||
/** local pointer to peer's base */
|
||||
osc_rdma_base_t local_base;
|
||||
|
||||
/** registration handle associated with the base */
|
||||
mca_btl_base_registration_handle_t *base_handle;
|
||||
};
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user