Merge pull request #2045 from hjelmn/osc_rdma_atomics
osc/rdma: add support for network AMOs
Этот коммит содержится в:
Коммит
7c8e7691a7
@ -8,7 +8,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||
@ -86,6 +86,12 @@ struct ompi_osc_rdma_component_t {
|
||||
/** Default value of the no_locks info key for new windows */
|
||||
bool no_locks;
|
||||
|
||||
/** Accumulate operations will only operate on a single intrinsic datatype */
|
||||
bool acc_single_intrinsic;
|
||||
|
||||
/** Use network AMOs when available */
|
||||
bool acc_use_amo;
|
||||
|
||||
/** Priority of the osc/rdma component */
|
||||
unsigned int priority;
|
||||
|
||||
@ -121,12 +127,13 @@ struct ompi_osc_rdma_module_t {
|
||||
/** value of same_size info key for this window */
|
||||
bool same_size;
|
||||
|
||||
/** window should have accumulate ordering... */
|
||||
bool accumulate_ordering;
|
||||
|
||||
/** passive-target synchronization will not be used in this window */
|
||||
bool no_locks;
|
||||
|
||||
bool acc_single_intrinsic;
|
||||
|
||||
bool acc_use_amo;
|
||||
|
||||
/** flavor of this window */
|
||||
int flavor;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -25,7 +25,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
do {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local regions");
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local region(s)");
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
@ -70,7 +70,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void *compare_buffer, void *result_buffer,
|
||||
static inline int ompi_osc_rdma_cas_local (const void *source_addr, const void *compare_addr, void *result_addr,
|
||||
ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
|
||||
uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
|
||||
ompi_osc_rdma_module_t *module)
|
||||
@ -79,10 +79,10 @@ static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void
|
||||
|
||||
ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
|
||||
memcpy (result_buffer, (void *) (uintptr_t) target_address, datatype->super.size);
|
||||
memcpy (result_addr, (void *) (uintptr_t) target_address, datatype->super.size);
|
||||
|
||||
if (0 == memcmp (compare_buffer, result_buffer, datatype->super.size)) {
|
||||
memcpy ((void *) (uintptr_t) target_address, source_buffer, datatype->super.size);
|
||||
if (0 == memcmp (compare_addr, result_addr, datatype->super.size)) {
|
||||
memcpy ((void *) (uintptr_t) target_address, source_addr, datatype->super.size);
|
||||
}
|
||||
|
||||
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
@ -258,15 +258,19 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation faile with opal error code %d", ret);
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation failed with opal error code %d", ret);
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
|
||||
ompi_osc_rdma_cleanup_rdma (sync, frag, NULL, NULL);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const void *source_buffer, int source_count,
|
||||
ompi_datatype_t *source_datatype, void *result_buffer, int result_count,
|
||||
static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const void *source_addr, int source_count,
|
||||
ompi_datatype_t *source_datatype, void *result_addr, int result_count,
|
||||
ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle, int target_count,
|
||||
ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request)
|
||||
@ -304,15 +308,15 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
|
||||
if (source_datatype) {
|
||||
(void) ompi_datatype_get_extent (source_datatype, &lb, &extent);
|
||||
source_buffer = (void *)((intptr_t) source_buffer + lb);
|
||||
source_addr = (void *)((intptr_t) source_addr + lb);
|
||||
}
|
||||
|
||||
if (result_datatype) {
|
||||
(void) ompi_datatype_get_extent (result_datatype, &lb, &extent);
|
||||
result_buffer = (void *)((intptr_t) result_buffer + lb);
|
||||
result_addr = (void *)((intptr_t) result_addr + lb);
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_gacc_contig (sync, source_buffer, source_count, source_datatype, result_buffer,
|
||||
ret = ompi_osc_rdma_gacc_contig (sync, source_addr, source_count, source_datatype, result_addr,
|
||||
result_count, result_datatype, peer, target_address,
|
||||
target_handle, target_count, target_datatype, op,
|
||||
request);
|
||||
@ -323,12 +327,12 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
if (source_datatype) {
|
||||
/* the convertors will handle the lb */
|
||||
(void) ompi_datatype_get_extent (source_datatype, &lb, &extent);
|
||||
source_buffer = (void *)((intptr_t) source_buffer - lb);
|
||||
source_addr = (void *)((intptr_t) source_addr - lb);
|
||||
}
|
||||
|
||||
if (result_datatype) {
|
||||
(void) ompi_datatype_get_extent (result_datatype, &lb, &extent);
|
||||
result_buffer = (void *)((intptr_t) result_buffer - lb);
|
||||
result_addr = (void *)((intptr_t) result_addr - lb);
|
||||
}
|
||||
}
|
||||
|
||||
@ -362,7 +366,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
/* the source may be NULL if using MPI_OP_NO_OP with MPI_Get_accumulate */
|
||||
if (source_datatype) {
|
||||
OBJ_CONSTRUCT(&source_convertor, opal_convertor_t);
|
||||
ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &source_datatype->super, source_count, source_buffer,
|
||||
ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &source_datatype->super, source_count, source_addr,
|
||||
0, &source_convertor);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
@ -427,7 +431,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
if (result_datatype) {
|
||||
/* prepare a convertor for this part of the result */
|
||||
opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor, &result_datatype->super, result_count,
|
||||
result_buffer, 0, &subreq->convertor);
|
||||
result_addr, 0, &subreq->convertor);
|
||||
opal_convertor_set_position (&subreq->convertor, &result_position);
|
||||
subreq->type = OMPI_OSC_RDMA_TYPE_GET_ACC;
|
||||
} else {
|
||||
@ -478,41 +482,206 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void ompi_osc_rdma_cas_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *data, int status)
|
||||
{
|
||||
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
|
||||
ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data;
|
||||
void *result_buffer = (void *)(intptr_t) ((int64_t *) local_address)[1];
|
||||
void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1];
|
||||
size_t size = ((int64_t *) local_address)[2];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic compare-and-swap complete. result: 0x%" PRIx64,
|
||||
*((int64_t *) local_address));
|
||||
|
||||
/* copy the result */
|
||||
memcpy (result_buffer, local_address, 8);
|
||||
memcpy (result_addr, local_address, size);
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const void *source_buffer, const void *compare_buffer,
|
||||
void *result_buffer, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
|
||||
static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr,
|
||||
void *result_addr, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
|
||||
uint64_t target_address, mca_btl_base_registration_handle_t *target_handle)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
const size_t size = datatype->super.size;
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
int64_t compare, source;
|
||||
int ret, flags;
|
||||
char *ptr;
|
||||
int ret;
|
||||
|
||||
/* XXX -- TODO -- Update the BTL interface to allow for other CAS sizes */
|
||||
if (datatype->super.size != 8) {
|
||||
if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->selected_btl->btl_flags))) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using 64-bit btl atomics. compare: 0x%"
|
||||
PRIx64 ", origin: 0x%" PRIx64, *((int64_t *) compare_buffer), *((int64_t *) source_buffer));
|
||||
compare = (8 == size) ? ((int64_t *) compare_addr)[0] : ((int32_t *) compare_addr)[0];
|
||||
source = (8 == size) ? ((int64_t *) source_addr)[0] : ((int32_t *) source_addr)[0];
|
||||
flags = (4 == size) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using %d-bit btl atomics. compare: 0x%"
|
||||
PRIx64 ", origin: 0x%" PRIx64, size * 8, *((int64_t *) compare_addr), *((int64_t *) source_addr));
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 24, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* store the destination and size in the temporary buffer */
|
||||
((int64_t *) ptr)[1] = (intptr_t) result_addr;
|
||||
((int64_t *) ptr)[2] = size;
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, target_address,
|
||||
frag->handle, target_handle, compare, source, flags, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_cas_atomic_complete, sync, frag);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
|
||||
if (1 == ret) {
|
||||
memcpy (result_addr, ptr, size);
|
||||
ret = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void ompi_osc_rdma_fetch_and_op_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *data, int status)
|
||||
{
|
||||
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
|
||||
ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data;
|
||||
void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1];
|
||||
ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) (intptr_t) ((int64_t *) local_address)[2];
|
||||
size_t size = ((int64_t *) local_address)[3];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic fetch-and-op complete. result: 0x%" PRIx64,
|
||||
*((int64_t *) local_address));
|
||||
|
||||
/* copy the result */
|
||||
if (result_addr) {
|
||||
memcpy (result_addr, local_address, size);
|
||||
}
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, status);
|
||||
}
|
||||
}
|
||||
|
||||
static int ompi_osc_rdma_op_mapping[OMPI_OP_NUM_OF_TYPES] = {
|
||||
[OMPI_OP_MAX] = MCA_BTL_ATOMIC_MAX,
|
||||
[OMPI_OP_MIN] = MCA_BTL_ATOMIC_MIN,
|
||||
[OMPI_OP_SUM] = MCA_BTL_ATOMIC_ADD,
|
||||
[OMPI_OP_BAND] = MCA_BTL_ATOMIC_AND,
|
||||
[OMPI_OP_BOR] = MCA_BTL_ATOMIC_OR,
|
||||
[OMPI_OP_BXOR] = MCA_BTL_ATOMIC_XOR,
|
||||
[OMPI_OP_LAND] = MCA_BTL_ATOMIC_LAND,
|
||||
[OMPI_OP_LOR] = MCA_BTL_ATOMIC_LOR,
|
||||
[OMPI_OP_LXOR] = MCA_BTL_ATOMIC_LXOR,
|
||||
[OMPI_OP_REPLACE] = MCA_BTL_ATOMIC_SWAP,
|
||||
};
|
||||
|
||||
static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt,
|
||||
ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
int ret, btl_op, flags;
|
||||
char *ptr = NULL;
|
||||
int64_t origin;
|
||||
|
||||
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
|
||||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
|
||||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
|
||||
if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) {
|
||||
flags |= MCA_BTL_ATOMIC_FLAG_FLOAT;
|
||||
}
|
||||
|
||||
btl_op = ompi_osc_rdma_op_mapping[op->op_type];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using %d-bit btl atomics. origin: 0x%" PRIx64,
|
||||
(4 == extent) ? 32 : 64, *((int64_t *) origin_addr));
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 32, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
origin = (8 == extent) ? ((int64_t *) origin_addr)[0] : ((int32_t *) origin_addr)[0];
|
||||
|
||||
/* store the destination, request, and extent in the temporary buffer for the callback */
|
||||
((int64_t *) ptr)[1] = (intptr_t) result_addr;
|
||||
((int64_t *) ptr)[2] = (intptr_t) req;
|
||||
((int64_t *) ptr)[3] = extent;
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->data_endpoint, ptr, target_address,
|
||||
frag->handle, target_handle, btl_op, origin, flags,
|
||||
MCA_BTL_NO_ORDER, ompi_osc_rdma_fetch_and_op_atomic_complete,
|
||||
sync, frag);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
|
||||
if (OPAL_LIKELY(1 == ret)) {
|
||||
memcpy (result_addr, ptr, extent);
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, OMPI_SUCCESS);
|
||||
}
|
||||
ret = OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt,
|
||||
ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
uint64_t address, offset;
|
||||
char *ptr = NULL;
|
||||
int ret, btl_op;
|
||||
|
||||
if (extent > 8) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* align the address. the user should not call with an unaligned address so don't need to range check here */
|
||||
address = target_address & ~7;
|
||||
offset = target_address & ~address;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using compare-and-swap. origin: 0x%" PRIx64,
|
||||
*((int64_t *) origin_addr));
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 16, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
@ -520,27 +689,124 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
|
||||
}
|
||||
|
||||
/* store the destination in the temporary buffer */
|
||||
((int64_t *) ptr)[1] = (intptr_t) result_buffer;
|
||||
do {
|
||||
bool complete = false;
|
||||
|
||||
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, target_address,
|
||||
frag->handle, target_handle, ((int64_t *)compare_buffer)[0],
|
||||
*((int64_t *) source_buffer), 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_cas_atomic_complete, module, frag);
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
ret = ompi_osc_get_data_blocking (module, peer->data_endpoint, address, target_handle, ptr, 8);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (1 != ret) {
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
} else {
|
||||
memcpy (result_buffer, ptr, 8);
|
||||
((int64_t *) ptr)[1] = ((int64_t *) ptr)[0];
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
if (&ompi_mpi_op_no_op.op == op) {
|
||||
memcpy (ptr + offset, origin_addr, extent);
|
||||
} else {
|
||||
ompi_op_reduce (op, (void *) origin_addr, ptr + offset, 1, dt);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, address,
|
||||
frag->handle, target_handle, ((int64_t *) ptr)[1],
|
||||
((int64_t *) ptr)[0], 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_atomic_complete, (void *) &complete, NULL);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
while (!complete) {
|
||||
ompi_osc_rdma_progress (module);
|
||||
}
|
||||
|
||||
if (((int64_t *) ptr)[1] == ((int64_t *) ptr)[0]) {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
|
||||
if (result_addr) {
|
||||
memcpy (result_addr, ptr + 8 + offset, extent);
|
||||
}
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ompi_osc_rdma_acc_single_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *data, int status)
|
||||
{
|
||||
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
|
||||
ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) data;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic accumulate complete");
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, status);
|
||||
}
|
||||
}
|
||||
|
||||
static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, ompi_datatype_t *dt, ptrdiff_t extent,
|
||||
ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
|
||||
ompi_op_t *op, ompi_osc_rdma_request_t *req)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
|
||||
int ret, btl_op, flags;
|
||||
int64_t origin;
|
||||
|
||||
if (!(module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
|
||||
/* btl put atomics not supported or disabled. fall back on fetch-and-op */
|
||||
return ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, NULL, dt, extent, peer, target_address, target_handle, op, req);
|
||||
}
|
||||
|
||||
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
|
||||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
|
||||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
origin = (8 == extent) ? ((uint64_t *) origin_addr)[0] : ((uint32_t *) origin_addr)[0];
|
||||
|
||||
/* set the appropriate flags for this atomic */
|
||||
flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
|
||||
if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) {
|
||||
flags |= MCA_BTL_ATOMIC_FLAG_FLOAT;
|
||||
}
|
||||
|
||||
btl_op = ompi_osc_rdma_op_mapping[op->op_type];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating accumulate using 64-bit btl atomics. origin: 0x%" PRIx64,
|
||||
*((int64_t *) origin_addr));
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
do {
|
||||
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->data_endpoint, target_address,
|
||||
target_handle, btl_op, origin, flags, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_acc_single_atomic_complete, sync, req);
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
|
||||
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
if (1 == ret) {
|
||||
if (req) {
|
||||
ompi_osc_rdma_request_complete (req, OMPI_SUCCESS);
|
||||
}
|
||||
ret = OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* ompi_osc_rdma_cas_get_complete:
|
||||
@ -561,12 +827,15 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote compare-and-swap get complete on sync %p. "
|
||||
"status %d", (void *) sync, status);
|
||||
|
||||
if (OMPI_SUCCESS == status) {
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* copy data to the user buffer (for gacc) */
|
||||
memcpy (request->result_addr, (void *) source, request->len);
|
||||
|
||||
if (0 == memcmp ((void *) source, request->compare_addr, request->len)) {
|
||||
/* the target and compare buffers match so write the source to the target */
|
||||
/* the target and compare buffers match. write the source to the target */
|
||||
memcpy ((void *) source, request->origin_addr, request->len);
|
||||
|
||||
ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, local_address,
|
||||
@ -581,7 +850,10 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
|
||||
/* TODO -- we can do better. probably should queue up the next step and handle it in progress */
|
||||
assert (OPAL_SUCCESS == ret);
|
||||
} else {
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* this is a no-op. nothing more to do except release the accumulate lock */
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
|
||||
@ -595,11 +867,9 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffer, const void *compare_buffer, void *result_buffer,
|
||||
static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr, void *result_addr,
|
||||
ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
|
||||
mca_btl_base_registration_handle_t *target_handle)
|
||||
{
|
||||
@ -649,10 +919,10 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
|
||||
/* set up the request */
|
||||
request->frag = frag;
|
||||
request->origin_addr = (void *) source_buffer;
|
||||
request->origin_addr = (void *) source_addr;
|
||||
request->ctx = (void *) target_handle;
|
||||
request->result_addr = result_buffer;
|
||||
request->compare_addr = compare_buffer;
|
||||
request->result_addr = result_addr;
|
||||
request->compare_addr = compare_addr;
|
||||
request->result_dt = datatype;
|
||||
request->offset = (ptrdiff_t) offset;
|
||||
request->target_address = target_address;
|
||||
@ -670,6 +940,9 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret)) {
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
return ret;
|
||||
}
|
||||
@ -684,8 +957,8 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
|
||||
|
||||
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr,
|
||||
struct ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
|
||||
struct ompi_win_t *win)
|
||||
ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
|
||||
ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -708,8 +981,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (MCA_OSC_RDMA_SAME_OP <= module->accumulate_ops) {
|
||||
if (win->w_acc_ops <= OMPI_WIN_ACCUMULATE_OPS_SAME_OP) {
|
||||
/* the user has indicated that they will only use the same op (or same op and no op)
|
||||
* for operations on overlapping memory ranges. that indicates it is safe to go ahead
|
||||
* and use network atomic operations. */
|
||||
@ -718,8 +990,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ompi_osc_rdma_peer_local_base (peer)) {
|
||||
return ompi_osc_rdma_cas_local (origin_addr, compare_addr, result_addr, dt,
|
||||
@ -733,15 +1004,16 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
|
||||
|
||||
static inline
|
||||
int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
ompi_datatype_t *origin_datatype, void *result_addr, int result_count,
|
||||
ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
int target_rank, MPI_Aint target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
|
||||
ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_osc_rdma_request_t *request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
mca_btl_base_registration_handle_t *target_handle;
|
||||
uint64_t target_address;
|
||||
ptrdiff_t lb, extent;
|
||||
int ret;
|
||||
|
||||
/* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */
|
||||
@ -753,12 +1025,35 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_datatype->super.size * target_count,
|
||||
&target_address, &target_handle);
|
||||
(void) ompi_datatype_get_extent (origin_datatype, &lb, &extent);
|
||||
|
||||
ret = osc_rdma_get_remote_segment (module, peer, target_disp, extent * target_count, &target_address, &target_handle);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (module->acc_single_intrinsic && extent <= 8) {
|
||||
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
|
||||
if (NULL == result_addr) {
|
||||
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, extent, peer, target_address,
|
||||
target_handle, op, request);
|
||||
} else {
|
||||
ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address,
|
||||
target_handle, op, request);
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address,
|
||||
target_handle, op, request);
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (ompi_osc_rdma_peer_local_base (peer)) {
|
||||
/* local/self optimization */
|
||||
return ompi_osc_rdma_gacc_local (origin_addr, origin_count, origin_datatype, result_addr, result_count,
|
||||
@ -771,13 +1066,10 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
|
||||
target_datatype, op, request);
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win)
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -801,14 +1093,10 @@ int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
|
||||
}
|
||||
|
||||
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win,
|
||||
ompi_request_t **request)
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win, ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -842,31 +1130,9 @@ int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, struct ompi_op_t *op, struct ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name, target_rank,
|
||||
(unsigned long) target_disp, op->o_name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer, target_rank,
|
||||
target_disp, 1, dt, op, NULL);
|
||||
}
|
||||
|
||||
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
|
||||
struct ompi_win_t *win, struct ompi_request_t **request)
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win, ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -898,11 +1164,9 @@ int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
|
||||
struct ompi_win_t *win)
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -921,3 +1185,24 @@ int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
|
||||
NULL, peer, target_rank, target_disp, target_count, target_datatype,
|
||||
op, NULL);
|
||||
}
|
||||
|
||||
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name,
|
||||
target_rank, (unsigned long) target_disp, op->o_name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer,
|
||||
target_rank, target_disp, 1, dt, op, NULL);
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -14,44 +14,30 @@
|
||||
|
||||
#include "osc_rdma.h"
|
||||
|
||||
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr,
|
||||
void *result_addr, struct ompi_datatype_t *dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp,
|
||||
struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr,
|
||||
ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
|
||||
ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_dt,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr,
|
||||
struct ompi_datatype_t *dt, int target,
|
||||
OPAL_PTRDIFF_TYPE target_disp,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win);
|
||||
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_dt,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count,
|
||||
struct ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp,
|
||||
int target_count, struct ompi_datatype_t *target_datatype,
|
||||
struct ompi_op_t *op, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
|
||||
ompi_win_t *win, ompi_request_t **request);
|
||||
|
||||
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
void *result_addr, int result_count, ompi_datatype_t *result_datatype,
|
||||
int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
|
||||
ompi_op_t *op, ompi_win_t *win, ompi_request_t **request);
|
||||
|
||||
|
||||
#endif /* OSC_RDMA_ACCUMULATE_H */
|
||||
|
@ -772,9 +772,9 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count,
|
||||
struct ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer,
|
||||
OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request)
|
||||
ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
mca_btl_base_registration_handle_t *target_handle;
|
||||
@ -807,9 +807,9 @@ static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const voi
|
||||
ompi_osc_rdma_put_contig, false);
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE source_disp, int source_count,
|
||||
struct ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request)
|
||||
ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
mca_btl_base_registration_handle_t *source_handle;
|
||||
@ -841,9 +841,9 @@ static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *ori
|
||||
source_handle, source_count, source_datatype, request,
|
||||
module->selected_btl->btl_get_limit, ompi_osc_rdma_get_contig, true);
|
||||
}
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, ompi_win_t *win)
|
||||
ompi_datatype_t *target_datatype, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -862,10 +862,10 @@ int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_da
|
||||
target_count, target_datatype, NULL);
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_datatype, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request)
|
||||
ompi_datatype_t *target_datatype, ompi_win_t *win,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -897,9 +897,9 @@ int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_d
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count,
|
||||
struct ompi_datatype_t *source_datatype, struct ompi_win_t *win)
|
||||
ompi_datatype_t *source_datatype, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
@ -918,10 +918,10 @@ int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype
|
||||
source_disp, source_count, source_datatype, NULL);
|
||||
}
|
||||
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype,
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
|
||||
int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count,
|
||||
struct ompi_datatype_t *source_datatype, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request)
|
||||
ompi_datatype_t *source_datatype, ompi_win_t *win,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
|
@ -96,23 +96,23 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o
|
||||
|
||||
/* prototypes for implementations of MPI RMA window functions. these will be called from the
|
||||
* mpi interface (ompi/mpi/c) */
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win);
|
||||
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win,
|
||||
ompi_request_t **request);
|
||||
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt,
|
||||
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
|
||||
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
|
||||
struct ompi_datatype_t *target_dt, struct ompi_win_t *win,
|
||||
struct ompi_request_t **request);
|
||||
ompi_datatype_t *target_dt, ompi_win_t *win,
|
||||
ompi_request_t **request);
|
||||
|
||||
/**
|
||||
* @brief read data from a remote memory region (blocking)
|
||||
|
@ -173,6 +173,20 @@ static int ompi_osc_rdma_component_register (void)
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.no_locks);
|
||||
|
||||
mca_osc_rdma_component.acc_single_intrinsic = false;
|
||||
(void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_single_intrinsic",
|
||||
"Enable optimizations for MPI_Fetch_and_op, MPI_Accumulate, etc for codes "
|
||||
"that will not use anything more than a single predefined datatype (default: false)",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_single_intrinsic);
|
||||
|
||||
mca_osc_rdma_component.acc_use_amo = true;
|
||||
(void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_use_amo",
|
||||
"Enable the use of network atomic memory operations when using single "
|
||||
"intrinsic optimizations. If not set network compare-and-swap will be "
|
||||
"used instread (default: true)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_use_amo);
|
||||
|
||||
mca_osc_rdma_component.buffer_size = 32768;
|
||||
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "buffer_size",
|
||||
"Size of temporary buffers (default: 32k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
|
||||
@ -585,7 +599,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
}
|
||||
}
|
||||
|
||||
if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) {
|
||||
if (MPI_WIN_FLAVOR_CREATE == module->flavor) {
|
||||
ret = ompi_osc_rdma_initialize_region (module, base, size);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
break;
|
||||
@ -600,6 +614,20 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
}
|
||||
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions;
|
||||
module->state->disp_unit = module->disp_unit;
|
||||
module->state->region_count = 1;
|
||||
region->base = state_region->base + my_base_offset;
|
||||
region->len = size;
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
memcpy (region->btl_handle_data, state_region->btl_handle_data, module->selected_btl->btl_registration_handle_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* barrier to make sure all ranks have attached */
|
||||
shared_comm->c_coll.coll_barrier(shared_comm, shared_comm->c_coll.coll_barrier_module);
|
||||
|
||||
offset = data_base;
|
||||
for (int i = 0 ; i < local_size ; ++i) {
|
||||
ompi_osc_rdma_peer_extended_t *ex_peer;
|
||||
@ -646,15 +674,13 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
if (temp[i].size) {
|
||||
ex_peer->super.base = (uint64_t) (uintptr_t) module->segment_base + offset;
|
||||
ex_peer->super.base = state_region->base + offset;
|
||||
offset += temp[i].size;
|
||||
} else {
|
||||
ex_peer->super.base = 0;
|
||||
}
|
||||
}
|
||||
|
||||
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
|
||||
|
||||
offset += temp[i].size;
|
||||
} else {
|
||||
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
|
||||
|
||||
ex_peer->super.base = peer_region->base;
|
||||
@ -662,7 +688,6 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ompi_osc_module_add_peer (module, peer);
|
||||
}
|
||||
@ -1020,6 +1045,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
module->same_disp_unit = check_config_value_bool ("same_disp_unit", info);
|
||||
module->same_size = check_config_value_bool ("same_size", info);
|
||||
module->no_locks = check_config_value_bool ("no_locks", info);
|
||||
module->acc_single_intrinsic = check_config_value_bool ("ompi_single_accumulate", info);
|
||||
module->acc_use_amo = mca_osc_rdma_component.acc_use_amo;
|
||||
|
||||
module->all_sync.module = module;
|
||||
|
||||
@ -1047,14 +1074,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
}
|
||||
}
|
||||
|
||||
/* options */
|
||||
/* FIX ME: should actually check this value... */
|
||||
#if 1
|
||||
module->accumulate_ordering = 1;
|
||||
#else
|
||||
ompi_osc_base_config_value_equal("accumulate_ordering", info, "none");
|
||||
#endif
|
||||
|
||||
ret = ompi_comm_dup(comm, &module->comm);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ompi_osc_rdma_free (win);
|
||||
@ -1132,17 +1151,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
}
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_share_data (module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
|
||||
ompi_osc_rdma_free (win);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* for now the leader is always rank 0 in the communicator */
|
||||
module->leader = ompi_osc_rdma_module_peer (module, 0);
|
||||
|
||||
/* lock data */
|
||||
if (module->no_locks) {
|
||||
win->w_flags |= OMPI_WIN_NO_LOCKS;
|
||||
@ -1177,20 +1185,19 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
/* sync memory - make sure all initialization completed */
|
||||
opal_atomic_mb();
|
||||
|
||||
/* barrier to prevent arrival of lock requests before we're
|
||||
fully created */
|
||||
ret = module->comm->c_coll.coll_barrier(module->comm,
|
||||
module->comm->c_coll.coll_barrier_module);
|
||||
ret = ompi_osc_rdma_share_data (module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
|
||||
ompi_osc_rdma_free (win);
|
||||
return ret;
|
||||
}
|
||||
|
||||
} else {
|
||||
/* for now the leader is always rank 0 in the communicator */
|
||||
module->leader = ompi_osc_rdma_module_peer (module, 0);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d",
|
||||
ompi_comm_get_cid(module->comm));
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
@ -43,12 +43,6 @@ int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush: %d, %s", target, win->w_name);
|
||||
|
||||
if (ompi_comm_rank (module->comm) == target) {
|
||||
/* nothing to flush. call one round of progress */
|
||||
ompi_osc_rdma_progress (module);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
lock = ompi_osc_rdma_module_sync_lookup (module, target, &peer);
|
||||
|
@ -218,6 +218,10 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
|
||||
|
||||
memcpy (ex_peer->super.base_handle, base_region->btl_handle_data, registration_handle_size);
|
||||
}
|
||||
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
ex_peer->super.super.data_endpoint = ex_peer->super.super.state_endpoint;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -75,6 +75,9 @@ struct ompi_osc_rdma_peer_basic_t {
|
||||
/** remote peer's base pointer */
|
||||
osc_rdma_base_t base;
|
||||
|
||||
/** local pointer to peer's base */
|
||||
osc_rdma_base_t local_base;
|
||||
|
||||
/** registration handle associated with the base */
|
||||
mca_btl_base_registration_handle_t *base_handle;
|
||||
};
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user