1
1

Merge pull request #2045 from hjelmn/osc_rdma_atomics

osc/rdma: add support for network AMOs
Этот коммит содержится в:
Nathan Hjelm 2016-09-08 11:21:49 -06:00 коммит произвёл GitHub
родитель e84b35217f 1ce5847e8b
Коммит 7c8e7691a7
9 изменённых файлов: 532 добавлений и 246 удалений

Просмотреть файл

@ -8,7 +8,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
@ -86,6 +86,12 @@ struct ompi_osc_rdma_component_t {
/** Default value of the no_locks info key for new windows */ /** Default value of the no_locks info key for new windows */
bool no_locks; bool no_locks;
/** Accumulate operations will only operate on a single intrinsic datatype */
bool acc_single_intrinsic;
/** Use network AMOs when available */
bool acc_use_amo;
/** Priority of the osc/rdma component */ /** Priority of the osc/rdma component */
unsigned int priority; unsigned int priority;
@ -121,12 +127,13 @@ struct ompi_osc_rdma_module_t {
/** value of same_size info key for this window */ /** value of same_size info key for this window */
bool same_size; bool same_size;
/** window should have accumulate ordering... */
bool accumulate_ordering;
/** passive-target synchronization will not be used in this window */ /** passive-target synchronization will not be used in this window */
bool no_locks; bool no_locks;
bool acc_single_intrinsic;
bool acc_use_amo;
/** flavor of this window */ /** flavor of this window */
int flavor; int flavor;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -25,7 +25,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
int ret = OMPI_SUCCESS; int ret = OMPI_SUCCESS;
do { do {
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local regions"); OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local region(s)");
if (!ompi_osc_rdma_peer_is_exclusive (peer)) { if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
@ -70,7 +70,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
return ret; return ret;
} }
static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void *compare_buffer, void *result_buffer, static inline int ompi_osc_rdma_cas_local (const void *source_addr, const void *compare_addr, void *result_addr,
ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
ompi_osc_rdma_module_t *module) ompi_osc_rdma_module_t *module)
@ -79,10 +79,10 @@ static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void
ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
memcpy (result_buffer, (void *) (uintptr_t) target_address, datatype->super.size); memcpy (result_addr, (void *) (uintptr_t) target_address, datatype->super.size);
if (0 == memcmp (compare_buffer, result_buffer, datatype->super.size)) { if (0 == memcmp (compare_addr, result_addr, datatype->super.size)) {
memcpy ((void *) (uintptr_t) target_address, source_buffer, datatype->super.size); memcpy ((void *) (uintptr_t) target_address, source_addr, datatype->super.size);
} }
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
@ -258,15 +258,19 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation faile with opal error code %d", ret); OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation failed with opal error code %d", ret);
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
(void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
}
ompi_osc_rdma_cleanup_rdma (sync, frag, NULL, NULL); ompi_osc_rdma_cleanup_rdma (sync, frag, NULL, NULL);
return ret; return ret;
} }
static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const void *source_buffer, int source_count, static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const void *source_addr, int source_count,
ompi_datatype_t *source_datatype, void *result_buffer, int result_count, ompi_datatype_t *source_datatype, void *result_addr, int result_count,
ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address, ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
mca_btl_base_registration_handle_t *target_handle, int target_count, mca_btl_base_registration_handle_t *target_handle, int target_count,
ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request) ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request)
@ -304,15 +308,15 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
if (source_datatype) { if (source_datatype) {
(void) ompi_datatype_get_extent (source_datatype, &lb, &extent); (void) ompi_datatype_get_extent (source_datatype, &lb, &extent);
source_buffer = (void *)((intptr_t) source_buffer + lb); source_addr = (void *)((intptr_t) source_addr + lb);
} }
if (result_datatype) { if (result_datatype) {
(void) ompi_datatype_get_extent (result_datatype, &lb, &extent); (void) ompi_datatype_get_extent (result_datatype, &lb, &extent);
result_buffer = (void *)((intptr_t) result_buffer + lb); result_addr = (void *)((intptr_t) result_addr + lb);
} }
ret = ompi_osc_rdma_gacc_contig (sync, source_buffer, source_count, source_datatype, result_buffer, ret = ompi_osc_rdma_gacc_contig (sync, source_addr, source_count, source_datatype, result_addr,
result_count, result_datatype, peer, target_address, result_count, result_datatype, peer, target_address,
target_handle, target_count, target_datatype, op, target_handle, target_count, target_datatype, op,
request); request);
@ -323,12 +327,12 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
if (source_datatype) { if (source_datatype) {
/* the convertors will handle the lb */ /* the convertors will handle the lb */
(void) ompi_datatype_get_extent (source_datatype, &lb, &extent); (void) ompi_datatype_get_extent (source_datatype, &lb, &extent);
source_buffer = (void *)((intptr_t) source_buffer - lb); source_addr = (void *)((intptr_t) source_addr - lb);
} }
if (result_datatype) { if (result_datatype) {
(void) ompi_datatype_get_extent (result_datatype, &lb, &extent); (void) ompi_datatype_get_extent (result_datatype, &lb, &extent);
result_buffer = (void *)((intptr_t) result_buffer - lb); result_addr = (void *)((intptr_t) result_addr - lb);
} }
} }
@ -362,7 +366,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
/* the source may be NULL if using MPI_OP_NO_OP with MPI_Get_accumulate */ /* the source may be NULL if using MPI_OP_NO_OP with MPI_Get_accumulate */
if (source_datatype) { if (source_datatype) {
OBJ_CONSTRUCT(&source_convertor, opal_convertor_t); OBJ_CONSTRUCT(&source_convertor, opal_convertor_t);
ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &source_datatype->super, source_count, source_buffer, ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &source_datatype->super, source_count, source_addr,
0, &source_convertor); 0, &source_convertor);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret; return ret;
@ -427,7 +431,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
if (result_datatype) { if (result_datatype) {
/* prepare a convertor for this part of the result */ /* prepare a convertor for this part of the result */
opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor, &result_datatype->super, result_count, opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor, &result_datatype->super, result_count,
result_buffer, 0, &subreq->convertor); result_addr, 0, &subreq->convertor);
opal_convertor_set_position (&subreq->convertor, &result_position); opal_convertor_set_position (&subreq->convertor, &result_position);
subreq->type = OMPI_OSC_RDMA_TYPE_GET_ACC; subreq->type = OMPI_OSC_RDMA_TYPE_GET_ACC;
} else { } else {
@ -478,41 +482,206 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
#if 0
static void ompi_osc_rdma_cas_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, static void ompi_osc_rdma_cas_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, mca_btl_base_registration_handle_t *local_handle, void *local_address, mca_btl_base_registration_handle_t *local_handle,
void *context, void *data, int status) void *context, void *data, int status)
{ {
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context; ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data; ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data;
void *result_buffer = (void *)(intptr_t) ((int64_t *) local_address)[1]; void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1];
size_t size = ((int64_t *) local_address)[2];
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic compare-and-swap complete. result: 0x%" PRIx64, OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic compare-and-swap complete. result: 0x%" PRIx64,
*((int64_t *) local_address)); *((int64_t *) local_address));
/* copy the result */ /* copy the result */
memcpy (result_buffer, local_address, 8); memcpy (result_addr, local_address, size);
ompi_osc_rdma_sync_rdma_dec (sync); ompi_osc_rdma_sync_rdma_dec (sync);
ompi_osc_rdma_frag_complete (frag); ompi_osc_rdma_frag_complete (frag);
} }
static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const void *source_buffer, const void *compare_buffer, static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr,
void *result_buffer, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, void *result_addr, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
uint64_t target_address, mca_btl_base_registration_handle_t *target_handle) uint64_t target_address, mca_btl_base_registration_handle_t *target_handle)
{ {
ompi_osc_rdma_module_t *module = sync->module; ompi_osc_rdma_module_t *module = sync->module;
const size_t size = datatype->super.size;
ompi_osc_rdma_frag_t *frag = NULL; ompi_osc_rdma_frag_t *frag = NULL;
int64_t compare, source;
int ret, flags;
char *ptr; char *ptr;
int ret;
/* XXX -- TODO -- Update the BTL interface to allow for other CAS sizes */ if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->selected_btl->btl_flags))) {
if (datatype->super.size != 8) {
return OMPI_ERR_NOT_SUPPORTED; return OMPI_ERR_NOT_SUPPORTED;
} }
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using 64-bit btl atomics. compare: 0x%" compare = (8 == size) ? ((int64_t *) compare_addr)[0] : ((int32_t *) compare_addr)[0];
PRIx64 ", origin: 0x%" PRIx64, *((int64_t *) compare_buffer), *((int64_t *) source_buffer)); source = (8 == size) ? ((int64_t *) source_addr)[0] : ((int32_t *) source_addr)[0];
flags = (4 == size) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using %d-bit btl atomics. compare: 0x%"
PRIx64 ", origin: 0x%" PRIx64, size * 8, *((int64_t *) compare_addr), *((int64_t *) source_addr));
ret = ompi_osc_rdma_frag_alloc (module, 24, &frag, &ptr);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
/* store the destination and size in the temporary buffer */
((int64_t *) ptr)[1] = (intptr_t) result_addr;
((int64_t *) ptr)[2] = size;
ompi_osc_rdma_sync_rdma_inc (sync);
do {
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, target_address,
frag->handle, target_handle, compare, source, flags, MCA_BTL_NO_ORDER,
ompi_osc_rdma_cas_atomic_complete, sync, frag);
ompi_osc_rdma_progress (module);
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
if (OPAL_SUCCESS != ret) {
ompi_osc_rdma_sync_rdma_dec (sync);
if (1 == ret) {
memcpy (result_addr, ptr, size);
ret = OMPI_SUCCESS;
}
ompi_osc_rdma_frag_complete (frag);
}
return ret;
}
static inline void ompi_osc_rdma_fetch_and_op_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, mca_btl_base_registration_handle_t *local_handle,
void *context, void *data, int status)
{
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data;
void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1];
ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) (intptr_t) ((int64_t *) local_address)[2];
size_t size = ((int64_t *) local_address)[3];
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic fetch-and-op complete. result: 0x%" PRIx64,
*((int64_t *) local_address));
/* copy the result */
if (result_addr) {
memcpy (result_addr, local_address, size);
}
ompi_osc_rdma_sync_rdma_dec (sync);
ompi_osc_rdma_frag_complete (frag);
if (req) {
ompi_osc_rdma_request_complete (req, status);
}
}
static int ompi_osc_rdma_op_mapping[OMPI_OP_NUM_OF_TYPES] = {
[OMPI_OP_MAX] = MCA_BTL_ATOMIC_MAX,
[OMPI_OP_MIN] = MCA_BTL_ATOMIC_MIN,
[OMPI_OP_SUM] = MCA_BTL_ATOMIC_ADD,
[OMPI_OP_BAND] = MCA_BTL_ATOMIC_AND,
[OMPI_OP_BOR] = MCA_BTL_ATOMIC_OR,
[OMPI_OP_BXOR] = MCA_BTL_ATOMIC_XOR,
[OMPI_OP_LAND] = MCA_BTL_ATOMIC_LAND,
[OMPI_OP_LOR] = MCA_BTL_ATOMIC_LOR,
[OMPI_OP_LXOR] = MCA_BTL_ATOMIC_LXOR,
[OMPI_OP_REPLACE] = MCA_BTL_ATOMIC_SWAP,
};
static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt,
ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
{
ompi_osc_rdma_module_t *module = sync->module;
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
ompi_osc_rdma_frag_t *frag = NULL;
int ret, btl_op, flags;
char *ptr = NULL;
int64_t origin;
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
return OMPI_ERR_NOT_SUPPORTED;
}
flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) {
flags |= MCA_BTL_ATOMIC_FLAG_FLOAT;
}
btl_op = ompi_osc_rdma_op_mapping[op->op_type];
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using %d-bit btl atomics. origin: 0x%" PRIx64,
(4 == extent) ? 32 : 64, *((int64_t *) origin_addr));
ret = ompi_osc_rdma_frag_alloc (module, 32, &frag, &ptr);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
origin = (8 == extent) ? ((int64_t *) origin_addr)[0] : ((int32_t *) origin_addr)[0];
/* store the destination, request, and extent in the temporary buffer for the callback */
((int64_t *) ptr)[1] = (intptr_t) result_addr;
((int64_t *) ptr)[2] = (intptr_t) req;
((int64_t *) ptr)[3] = extent;
ompi_osc_rdma_sync_rdma_inc (sync);
do {
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->data_endpoint, ptr, target_address,
frag->handle, target_handle, btl_op, origin, flags,
MCA_BTL_NO_ORDER, ompi_osc_rdma_fetch_and_op_atomic_complete,
sync, frag);
ompi_osc_rdma_progress (module);
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
if (OPAL_SUCCESS != ret) {
ompi_osc_rdma_sync_rdma_dec (sync);
if (OPAL_LIKELY(1 == ret)) {
memcpy (result_addr, ptr, extent);
if (req) {
ompi_osc_rdma_request_complete (req, OMPI_SUCCESS);
}
ret = OPAL_SUCCESS;
}
ompi_osc_rdma_frag_complete (frag);
}
return ret;
}
static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt,
ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
{
ompi_osc_rdma_module_t *module = sync->module;
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
ompi_osc_rdma_frag_t *frag = NULL;
uint64_t address, offset;
char *ptr = NULL;
int ret, btl_op;
if (extent > 8) {
return OMPI_ERR_NOT_SUPPORTED;
}
/* align the address. the user should not call with an unaligned address so don't need to range check here */
address = target_address & ~7;
offset = target_address & ~address;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using compare-and-swap. origin: 0x%" PRIx64,
*((int64_t *) origin_addr));
ret = ompi_osc_rdma_frag_alloc (module, 16, &frag, &ptr); ret = ompi_osc_rdma_frag_alloc (module, 16, &frag, &ptr);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
@ -520,27 +689,124 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
} }
/* store the destination in the temporary buffer */ /* store the destination in the temporary buffer */
((int64_t *) ptr)[1] = (intptr_t) result_buffer; do {
bool complete = false;
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, target_address, ret = ompi_osc_get_data_blocking (module, peer->data_endpoint, address, target_handle, ptr, 8);
frag->handle, target_handle, ((int64_t *)compare_buffer)[0], if (OMPI_SUCCESS != ret) {
*((int64_t *) source_buffer), 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_frag_complete (frag);
ompi_osc_rdma_cas_atomic_complete, module, frag);
if (OPAL_UNLIKELY(0 > ret)) {
return ret; return ret;
} }
if (1 != ret) { ((int64_t *) ptr)[1] = ((int64_t *) ptr)[0];
ompi_osc_rdma_sync_rdma_inc (sync);
} else {
memcpy (result_buffer, ptr, 8);
ompi_osc_rdma_frag_complete (frag); if (&ompi_mpi_op_no_op.op == op) {
memcpy (ptr + offset, origin_addr, extent);
} else {
ompi_op_reduce (op, (void *) origin_addr, ptr + offset, 1, dt);
} }
return OMPI_SUCCESS; do {
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, address,
frag->handle, target_handle, ((int64_t *) ptr)[1],
((int64_t *) ptr)[0], 0, MCA_BTL_NO_ORDER,
ompi_osc_rdma_atomic_complete, (void *) &complete, NULL);
ompi_osc_rdma_progress (module);
} while (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
break;
}
while (!complete) {
ompi_osc_rdma_progress (module);
}
if (((int64_t *) ptr)[1] == ((int64_t *) ptr)[0]) {
break;
}
} while (1);
if (result_addr) {
memcpy (result_addr, ptr + 8 + offset, extent);
}
ompi_osc_rdma_frag_complete (frag);
return ret;
}
static void ompi_osc_rdma_acc_single_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, mca_btl_base_registration_handle_t *local_handle,
void *context, void *data, int status)
{
ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context;
ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) data;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic accumulate complete");
ompi_osc_rdma_sync_rdma_dec (sync);
if (req) {
ompi_osc_rdma_request_complete (req, status);
}
}
static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, ompi_datatype_t *dt, ptrdiff_t extent,
ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
ompi_op_t *op, ompi_osc_rdma_request_t *req)
{
ompi_osc_rdma_module_t *module = sync->module;
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
int ret, btl_op, flags;
int64_t origin;
if (!(module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
/* btl put atomics not supported or disabled. fall back on fetch-and-op */
return ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, NULL, dt, extent, peer, target_address, target_handle, op, req);
}
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
return OMPI_ERR_NOT_SUPPORTED;
}
origin = (8 == extent) ? ((uint64_t *) origin_addr)[0] : ((uint32_t *) origin_addr)[0];
/* set the appropriate flags for this atomic */
flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0;
if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) {
flags |= MCA_BTL_ATOMIC_FLAG_FLOAT;
}
btl_op = ompi_osc_rdma_op_mapping[op->op_type];
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating accumulate using 64-bit btl atomics. origin: 0x%" PRIx64,
*((int64_t *) origin_addr));
ompi_osc_rdma_sync_rdma_inc (sync);
do {
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->data_endpoint, target_address,
target_handle, btl_op, origin, flags, MCA_BTL_NO_ORDER,
ompi_osc_rdma_acc_single_atomic_complete, sync, req);
ompi_osc_rdma_progress (module);
} while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret));
if (OPAL_SUCCESS != ret) {
ompi_osc_rdma_sync_rdma_dec (sync);
if (1 == ret) {
if (req) {
ompi_osc_rdma_request_complete (req, OMPI_SUCCESS);
}
ret = OMPI_SUCCESS;
}
}
return ret;
} }
#endif
/** /**
* ompi_osc_rdma_cas_get_complete: * ompi_osc_rdma_cas_get_complete:
@ -561,12 +827,15 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote compare-and-swap get complete on sync %p. " OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote compare-and-swap get complete on sync %p. "
"status %d", (void *) sync, status); "status %d", (void *) sync, status);
if (OMPI_SUCCESS == status) { if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
return;
}
/* copy data to the user buffer (for gacc) */ /* copy data to the user buffer (for gacc) */
memcpy (request->result_addr, (void *) source, request->len); memcpy (request->result_addr, (void *) source, request->len);
if (0 == memcmp ((void *) source, request->compare_addr, request->len)) { if (0 == memcmp ((void *) source, request->compare_addr, request->len)) {
/* the target and compare buffers match so write the source to the target */ /* the target and compare buffers match. write the source to the target */
memcpy ((void *) source, request->origin_addr, request->len); memcpy ((void *) source, request->origin_addr, request->len);
ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, local_address, ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, local_address,
@ -581,7 +850,10 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
/* TODO -- we can do better. probably should queue up the next step and handle it in progress */ /* TODO -- we can do better. probably should queue up the next step and handle it in progress */
assert (OPAL_SUCCESS == ret); assert (OPAL_SUCCESS == ret);
} else {
return;
}
/* this is a no-op. nothing more to do except release the accumulate lock */ /* this is a no-op. nothing more to do except release the accumulate lock */
ompi_osc_rdma_frag_complete (frag); ompi_osc_rdma_frag_complete (frag);
@ -595,11 +867,9 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
ompi_osc_rdma_sync_rdma_dec (sync); ompi_osc_rdma_sync_rdma_dec (sync);
peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING; peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;
}
}
} }
static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffer, const void *compare_buffer, void *result_buffer, static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr, void *result_addr,
ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
mca_btl_base_registration_handle_t *target_handle) mca_btl_base_registration_handle_t *target_handle)
{ {
@ -649,10 +919,10 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
/* set up the request */ /* set up the request */
request->frag = frag; request->frag = frag;
request->origin_addr = (void *) source_buffer; request->origin_addr = (void *) source_addr;
request->ctx = (void *) target_handle; request->ctx = (void *) target_handle;
request->result_addr = result_buffer; request->result_addr = result_addr;
request->compare_addr = compare_buffer; request->compare_addr = compare_addr;
request->result_dt = datatype; request->result_dt = datatype;
request->offset = (ptrdiff_t) offset; request->offset = (ptrdiff_t) offset;
request->target_address = target_address; request->target_address = target_address;
@ -670,6 +940,9 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
} }
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret)) { if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret)) {
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
(void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
}
ompi_osc_rdma_frag_complete (frag); ompi_osc_rdma_frag_complete (frag);
return ret; return ret;
} }
@ -684,8 +957,8 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr, int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr,
struct ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp, ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
struct ompi_win_t *win) ompi_win_t *win)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -708,8 +981,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
return ret; return ret;
} }
#if 0 if (win->w_acc_ops <= OMPI_WIN_ACCUMULATE_OPS_SAME_OP) {
if (MCA_OSC_RDMA_SAME_OP <= module->accumulate_ops) {
/* the user has indicated that they will only use the same op (or same op and no op) /* the user has indicated that they will only use the same op (or same op and no op)
* for operations on overlapping memory ranges. that indicates it is safe to go ahead * for operations on overlapping memory ranges. that indicates it is safe to go ahead
* and use network atomic operations. */ * and use network atomic operations. */
@ -718,8 +990,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
if (OMPI_SUCCESS == ret) { if (OMPI_SUCCESS == ret) {
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
} else }
#endif
if (ompi_osc_rdma_peer_local_base (peer)) { if (ompi_osc_rdma_peer_local_base (peer)) {
return ompi_osc_rdma_cas_local (origin_addr, compare_addr, result_addr, dt, return ompi_osc_rdma_cas_local (origin_addr, compare_addr, result_addr, dt,
@ -733,15 +1004,16 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
static inline static inline
int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count, int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count,
struct ompi_datatype_t *origin_datatype, void *result_addr, int result_count, ompi_datatype_t *origin_datatype, void *result_addr, int result_count,
struct ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer,
int target_rank, MPI_Aint target_disp, int target_count, int target_rank, MPI_Aint target_disp, int target_count,
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, ompi_datatype_t *target_datatype, ompi_op_t *op,
ompi_osc_rdma_request_t *request) ompi_osc_rdma_request_t *request)
{ {
ompi_osc_rdma_module_t *module = sync->module; ompi_osc_rdma_module_t *module = sync->module;
mca_btl_base_registration_handle_t *target_handle; mca_btl_base_registration_handle_t *target_handle;
uint64_t target_address; uint64_t target_address;
ptrdiff_t lb, extent;
int ret; int ret;
/* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */ /* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */
@ -753,12 +1025,35 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_datatype->super.size * target_count, (void) ompi_datatype_get_extent (origin_datatype, &lb, &extent);
&target_address, &target_handle);
ret = osc_rdma_get_remote_segment (module, peer, target_disp, extent * target_count, &target_address, &target_handle);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret; return ret;
} }
if (module->acc_single_intrinsic && extent <= 8) {
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
if (NULL == result_addr) {
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, extent, peer, target_address,
target_handle, op, request);
} else {
ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address,
target_handle, op, request);
}
if (OMPI_SUCCESS == ret) {
return OMPI_SUCCESS;
}
}
ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address,
target_handle, op, request);
if (OMPI_SUCCESS == ret) {
return OMPI_SUCCESS;
}
}
if (ompi_osc_rdma_peer_local_base (peer)) { if (ompi_osc_rdma_peer_local_base (peer)) {
/* local/self optimization */ /* local/self optimization */
return ompi_osc_rdma_gacc_local (origin_addr, origin_count, origin_datatype, result_addr, result_count, return ompi_osc_rdma_gacc_local (origin_addr, origin_count, origin_datatype, result_addr, result_count,
@ -771,13 +1066,10 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
target_datatype, op, request); target_datatype, op, request);
} }
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
struct ompi_datatype_t *origin_datatype, void *result_addr, int result_count, ompi_datatype_t *result_datatype,
void *result_addr, int result_count, int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
struct ompi_datatype_t *result_datatype, ompi_op_t *op, ompi_win_t *win)
int target_rank, MPI_Aint target_disp,
int target_count, struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op, struct ompi_win_t *win)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -801,14 +1093,10 @@ int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
} }
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
struct ompi_datatype_t *origin_datatype, void *result_addr, int result_count, ompi_datatype_t *result_datatype,
void *result_addr, int result_count, int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
struct ompi_datatype_t *result_datatype, ompi_op_t *op, ompi_win_t *win, ompi_request_t **request)
int target_rank, MPI_Aint target_disp,
int target_count, struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op, struct ompi_win_t *win,
ompi_request_t **request)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -842,31 +1130,9 @@ int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target_rank, int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
OPAL_PTRDIFF_TYPE target_disp, struct ompi_op_t *op, struct ompi_win_t *win) OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
{ ompi_win_t *win, ompi_request_t **request)
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer;
ompi_osc_rdma_sync_t *sync;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name, target_rank,
(unsigned long) target_disp, op->o_name, win->w_name);
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
if (OPAL_UNLIKELY(NULL == sync)) {
return OMPI_ERR_RMA_SYNC;
}
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer, target_rank,
target_disp, 1, dt, op, NULL);
}
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
struct ompi_datatype_t *origin_datatype, int target_rank,
OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
struct ompi_win_t *win, struct ompi_request_t **request)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -898,11 +1164,9 @@ int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
struct ompi_datatype_t *origin_datatype, int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_win_t *win)
struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
struct ompi_win_t *win)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -921,3 +1185,24 @@ int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
NULL, peer, target_rank, target_disp, target_count, target_datatype, NULL, peer, target_rank, target_disp, target_count, target_datatype,
op, NULL); op, NULL);
} }
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank,
OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer;
ompi_osc_rdma_sync_t *sync;
int ret;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name,
target_rank, (unsigned long) target_disp, op->o_name, win->w_name);
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
if (OPAL_UNLIKELY(NULL == sync)) {
return OMPI_ERR_RMA_SYNC;
}
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer,
target_rank, target_disp, 1, dt, op, NULL);
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -14,44 +14,30 @@
#include "osc_rdma.h" #include "osc_rdma.h"
int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr,
void *result_addr, struct ompi_datatype_t *dt, ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp,
int target, OPAL_PTRDIFF_TYPE target_disp, ompi_win_t *win);
struct ompi_win_t *win);
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
struct ompi_datatype_t *origin_dt, OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
int target, OPAL_PTRDIFF_TYPE target_disp, ompi_win_t *win);
int target_count, struct ompi_datatype_t *target_dt,
struct ompi_op_t *op, struct ompi_win_t *win);
int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr,
struct ompi_datatype_t *dt, int target,
OPAL_PTRDIFF_TYPE target_disp,
struct ompi_op_t *op, struct ompi_win_t *win);
int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank,
struct ompi_datatype_t *origin_datatype, OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win);
void *result_addr, int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank, MPI_Aint target_disp,
int target_count, struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op, struct ompi_win_t *win);
int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
struct ompi_datatype_t *origin_dt, void *result_addr, int result_count, ompi_datatype_t *result_datatype,
int target, OPAL_PTRDIFF_TYPE target_disp, int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
int target_count, struct ompi_datatype_t *target_dt, ompi_op_t *op, ompi_win_t *win);
struct ompi_op_t *op, struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank,
struct ompi_datatype_t *origin_datatype, OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op,
void *result_addr, int result_count, ompi_win_t *win, ompi_request_t **request);
struct ompi_datatype_t *result_datatype,
int target_rank, MPI_Aint target_disp, int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
int target_count, struct ompi_datatype_t *target_datatype, void *result_addr, int result_count, ompi_datatype_t *result_datatype,
struct ompi_op_t *op, struct ompi_win_t *win, int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype,
struct ompi_request_t **request); ompi_op_t *op, ompi_win_t *win, ompi_request_t **request);
#endif /* OSC_RDMA_ACCUMULATE_H */ #endif /* OSC_RDMA_ACCUMULATE_H */

Просмотреть файл

@ -772,9 +772,9 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
} }
static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count, static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count,
struct ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer, ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer,
OPAL_PTRDIFF_TYPE target_disp, int target_count, OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request) ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request)
{ {
ompi_osc_rdma_module_t *module = sync->module; ompi_osc_rdma_module_t *module = sync->module;
mca_btl_base_registration_handle_t *target_handle; mca_btl_base_registration_handle_t *target_handle;
@ -807,9 +807,9 @@ static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const voi
ompi_osc_rdma_put_contig, false); ompi_osc_rdma_put_contig, false);
} }
static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE source_disp, int source_count, ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE source_disp, int source_count,
struct ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request) ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request)
{ {
ompi_osc_rdma_module_t *module = sync->module; ompi_osc_rdma_module_t *module = sync->module;
mca_btl_base_registration_handle_t *source_handle; mca_btl_base_registration_handle_t *source_handle;
@ -841,9 +841,9 @@ static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *ori
source_handle, source_count, source_datatype, request, source_handle, source_count, source_datatype, request,
module->selected_btl->btl_get_limit, ompi_osc_rdma_get_contig, true); module->selected_btl->btl_get_limit, ompi_osc_rdma_get_contig, true);
} }
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count, int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_datatype, ompi_win_t *win) ompi_datatype_t *target_datatype, ompi_win_t *win)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -862,10 +862,10 @@ int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_da
target_count, target_datatype, NULL); target_count, target_datatype, NULL);
} }
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count, int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_datatype, struct ompi_win_t *win, ompi_datatype_t *target_datatype, ompi_win_t *win,
struct ompi_request_t **request) ompi_request_t **request)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -897,9 +897,9 @@ int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_d
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count, int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count,
struct ompi_datatype_t *source_datatype, struct ompi_win_t *win) ompi_datatype_t *source_datatype, ompi_win_t *win)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;
@ -918,10 +918,10 @@ int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype
source_disp, source_count, source_datatype, NULL); source_disp, source_count, source_datatype, NULL);
} }
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype,
int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count, int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count,
struct ompi_datatype_t *source_datatype, struct ompi_win_t *win, ompi_datatype_t *source_datatype, ompi_win_t *win,
struct ompi_request_t **request) ompi_request_t **request)
{ {
ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t *peer; ompi_osc_rdma_peer_t *peer;

Просмотреть файл

@ -96,23 +96,23 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o
/* prototypes for implementations of MPI RMA window functions. these will be called from the /* prototypes for implementations of MPI RMA window functions. these will be called from the
* mpi interface (ompi/mpi/c) */ * mpi interface (ompi/mpi/c) */
int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_dt, struct ompi_win_t *win); ompi_datatype_t *target_dt, ompi_win_t *win);
int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_dt, struct ompi_win_t *win); ompi_datatype_t *target_dt, ompi_win_t *win);
int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_dt, struct ompi_win_t *win, ompi_datatype_t *target_dt, ompi_win_t *win,
struct ompi_request_t **request); ompi_request_t **request);
int ompi_osc_rdma_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt,
int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count,
struct ompi_datatype_t *target_dt, struct ompi_win_t *win, ompi_datatype_t *target_dt, ompi_win_t *win,
struct ompi_request_t **request); ompi_request_t **request);
/** /**
* @brief read data from a remote memory region (blocking) * @brief read data from a remote memory region (blocking)

Просмотреть файл

@ -173,6 +173,20 @@ static int ompi_osc_rdma_component_register (void)
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.no_locks); MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.no_locks);
mca_osc_rdma_component.acc_single_intrinsic = false;
(void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_single_intrinsic",
"Enable optimizations for MPI_Fetch_and_op, MPI_Accumulate, etc for codes "
"that will not use anything more than a single predefined datatype (default: false)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_single_intrinsic);
mca_osc_rdma_component.acc_use_amo = true;
(void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_use_amo",
"Enable the use of network atomic memory operations when using single "
"intrinsic optimizations. If not set network compare-and-swap will be "
"used instread (default: true)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_use_amo);
mca_osc_rdma_component.buffer_size = 32768; mca_osc_rdma_component.buffer_size = 32768;
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "buffer_size", (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "buffer_size",
"Size of temporary buffers (default: 32k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, "Size of temporary buffers (default: 32k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
@ -585,7 +599,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
} }
} }
if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) { if (MPI_WIN_FLAVOR_CREATE == module->flavor) {
ret = ompi_osc_rdma_initialize_region (module, base, size); ret = ompi_osc_rdma_initialize_region (module, base, size);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
break; break;
@ -600,6 +614,20 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
opal_shmem_unlink (&module->seg_ds); opal_shmem_unlink (&module->seg_ds);
} }
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions;
module->state->disp_unit = module->disp_unit;
module->state->region_count = 1;
region->base = state_region->base + my_base_offset;
region->len = size;
if (module->selected_btl->btl_register_mem) {
memcpy (region->btl_handle_data, state_region->btl_handle_data, module->selected_btl->btl_registration_handle_size);
}
}
/* barrier to make sure all ranks have attached */
shared_comm->c_coll.coll_barrier(shared_comm, shared_comm->c_coll.coll_barrier_module);
offset = data_base; offset = data_base;
for (int i = 0 ; i < local_size ; ++i) { for (int i = 0 ; i < local_size ; ++i) {
ompi_osc_rdma_peer_extended_t *ex_peer; ompi_osc_rdma_peer_extended_t *ex_peer;
@ -646,15 +674,13 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
if (temp[i].size) { if (temp[i].size) {
ex_peer->super.base = (uint64_t) (uintptr_t) module->segment_base + offset; ex_peer->super.base = state_region->base + offset;
offset += temp[i].size;
} else { } else {
ex_peer->super.base = 0; ex_peer->super.base = 0;
} }
}
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
offset += temp[i].size;
} else {
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions; ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
ex_peer->super.base = peer_region->base; ex_peer->super.base = peer_region->base;
@ -662,7 +688,6 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data; ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data;
} }
} }
}
ompi_osc_module_add_peer (module, peer); ompi_osc_module_add_peer (module, peer);
} }
@ -1020,6 +1045,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
module->same_disp_unit = check_config_value_bool ("same_disp_unit", info); module->same_disp_unit = check_config_value_bool ("same_disp_unit", info);
module->same_size = check_config_value_bool ("same_size", info); module->same_size = check_config_value_bool ("same_size", info);
module->no_locks = check_config_value_bool ("no_locks", info); module->no_locks = check_config_value_bool ("no_locks", info);
module->acc_single_intrinsic = check_config_value_bool ("ompi_single_accumulate", info);
module->acc_use_amo = mca_osc_rdma_component.acc_use_amo;
module->all_sync.module = module; module->all_sync.module = module;
@ -1047,14 +1074,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
} }
} }
/* options */
/* FIX ME: should actually check this value... */
#if 1
module->accumulate_ordering = 1;
#else
ompi_osc_base_config_value_equal("accumulate_ordering", info, "none");
#endif
ret = ompi_comm_dup(comm, &module->comm); ret = ompi_comm_dup(comm, &module->comm);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
ompi_osc_rdma_free (win); ompi_osc_rdma_free (win);
@ -1132,17 +1151,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
} }
} }
ret = ompi_osc_rdma_share_data (module);
if (OMPI_SUCCESS != ret) {
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
ompi_osc_rdma_free (win);
return ret;
}
/* for now the leader is always rank 0 in the communicator */
module->leader = ompi_osc_rdma_module_peer (module, 0);
/* lock data */ /* lock data */
if (module->no_locks) { if (module->no_locks) {
win->w_flags |= OMPI_WIN_NO_LOCKS; win->w_flags |= OMPI_WIN_NO_LOCKS;
@ -1177,20 +1185,19 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
/* sync memory - make sure all initialization completed */ /* sync memory - make sure all initialization completed */
opal_atomic_mb(); opal_atomic_mb();
/* barrier to prevent arrival of lock requests before we're ret = ompi_osc_rdma_share_data (module);
fully created */
ret = module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
ompi_osc_rdma_free (win); ompi_osc_rdma_free (win);
return ret; } else {
} /* for now the leader is always rank 0 in the communicator */
module->leader = ompi_osc_rdma_module_peer (module, 0);
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d", OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d",
ompi_comm_get_cid(module->comm)); ompi_comm_get_cid(module->comm));
}
return OMPI_SUCCESS; return ret;
} }

Просмотреть файл

@ -43,12 +43,6 @@ int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush: %d, %s", target, win->w_name); OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush: %d, %s", target, win->w_name);
if (ompi_comm_rank (module->comm) == target) {
/* nothing to flush. call one round of progress */
ompi_osc_rdma_progress (module);
return OMPI_SUCCESS;
}
OPAL_THREAD_LOCK(&module->lock); OPAL_THREAD_LOCK(&module->lock);
lock = ompi_osc_rdma_module_sync_lookup (module, target, &peer); lock = ompi_osc_rdma_module_sync_lookup (module, target, &peer);

Просмотреть файл

@ -218,6 +218,10 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
memcpy (ex_peer->super.base_handle, base_region->btl_handle_data, registration_handle_size); memcpy (ex_peer->super.base_handle, base_region->btl_handle_data, registration_handle_size);
} }
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
ex_peer->super.super.data_endpoint = ex_peer->super.super.state_endpoint;
}
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -75,6 +75,9 @@ struct ompi_osc_rdma_peer_basic_t {
/** remote peer's base pointer */ /** remote peer's base pointer */
osc_rdma_base_t base; osc_rdma_base_t base;
/** local pointer to peer's base */
osc_rdma_base_t local_base;
/** registration handle associated with the base */ /** registration handle associated with the base */
mca_btl_base_registration_handle_t *base_handle; mca_btl_base_registration_handle_t *base_handle;
}; };