osc/rdma: use standard verbosity levels
Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
b8ee05d352
Коммит
9476c7bbca
@ -317,10 +317,13 @@ static inline int _ompi_osc_rdma_register (ompi_osc_rdma_module_t *module, struc
|
||||
size_t size, uint32_t flags, mca_btl_base_registration_handle_t **handle, int line, const char *file)
|
||||
{
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "registering segment with btl. range: %p - %p (%lu bytes)",
|
||||
ptr, (char *) ptr + size, size);
|
||||
|
||||
*handle = module->selected_btl->btl_register_mem (module->selected_btl, endpoint, ptr, size, flags);
|
||||
if (OPAL_UNLIKELY(NULL == *handle)) {
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "failed to register pointer with selected BTL. base: %p, "
|
||||
"size: %lu. file: %s, line: %d", ptr, (unsigned long) size, file, line));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "failed to register pointer with selected BTL. base: %p, "
|
||||
"size: %lu. file: %s, line: %d", ptr, (unsigned long) size, file, line);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
} else {
|
||||
@ -426,8 +429,7 @@ static inline void ompi_osc_rdma_module_lock_remove (struct ompi_osc_rdma_module
|
||||
*/
|
||||
static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_rdma_module_t *module, int target, struct ompi_osc_rdma_peer_t **peer)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"osc/rdma: looking for synchronization object for target %d", target));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "looking for synchronization object for target %d", target);
|
||||
|
||||
switch (module->all_sync.type) {
|
||||
case OMPI_OSC_RDMA_SYNC_TYPE_NONE:
|
||||
@ -438,8 +440,7 @@ static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_r
|
||||
return NULL;
|
||||
case OMPI_OSC_RDMA_SYNC_TYPE_FENCE:
|
||||
case OMPI_OSC_RDMA_SYNC_TYPE_LOCK:
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"osc/rdma: found fence/lock_all access epoch for target %d", target));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found fence/lock_all access epoch for target %d", target);
|
||||
|
||||
/* fence epoch is now active */
|
||||
module->all_sync.epoch_active = true;
|
||||
@ -448,12 +449,13 @@ static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_r
|
||||
return &module->all_sync;
|
||||
case OMPI_OSC_RDMA_SYNC_TYPE_PSCW:
|
||||
if (ompi_osc_rdma_sync_pscw_peer (module, target, peer)) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"osc/rdma: found PSCW access epoch target for %d", target));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found PSCW access epoch target for %d", target);
|
||||
return &module->all_sync;
|
||||
}
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no access epoch found for target %d", target);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -465,7 +467,6 @@ static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_r
|
||||
static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync)
|
||||
{
|
||||
ompi_osc_rdma_aggregation_t *aggregation, *next;
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
|
||||
if (opal_list_get_size (&sync->aggregations)) {
|
||||
OPAL_THREAD_SCOPED_LOCK(&sync->lock,
|
||||
|
@ -25,6 +25,8 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
do {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local regions");
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
@ -56,8 +58,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
|
||||
} while (0);
|
||||
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_gacc_self: failed performing accumulate operation. ret = %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "local accumulate failed with ompi error code %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -74,6 +75,8 @@ static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void
|
||||
uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
|
||||
ompi_osc_rdma_module_t *module)
|
||||
{
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing compare-and-swap with local regions");
|
||||
|
||||
ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
|
||||
memcpy (result_buffer, (void *) (uintptr_t) target_address, datatype->super.size);
|
||||
@ -96,6 +99,9 @@ static void ompi_osc_rdma_acc_put_complete (struct mca_btl_base_module_t *btl, s
|
||||
ompi_osc_rdma_sync_t *sync = request->sync;
|
||||
ompi_osc_rdma_peer_t *peer = request->peer;
|
||||
|
||||
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote accumulate (put/get) complete on "
|
||||
"sync %p. local address %p. opal status %d", (void *) sync, local_address, status);
|
||||
|
||||
ompi_osc_rdma_frag_complete (request->frag);
|
||||
ompi_osc_rdma_request_complete (request, status);
|
||||
|
||||
@ -119,7 +125,11 @@ static void ompi_osc_rdma_acc_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
|
||||
assert (OMPI_SUCCESS == status);
|
||||
|
||||
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote accumulate get complete on sync %p. "
|
||||
"status %d. request type %d", (void *) sync, status, request->type);
|
||||
|
||||
if (OMPI_SUCCESS == status && OMPI_OSC_RDMA_TYPE_GET_ACC == request->type) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unpacking get accumulate result into user buffer");
|
||||
if (NULL == request->result_addr) {
|
||||
/* result buffer is not necessarily contiguous. use the opal datatype engine to
|
||||
* copy the data over in this case */
|
||||
@ -150,6 +160,8 @@ static void ompi_osc_rdma_acc_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
memcpy ((void *) source, request->origin_addr, request->len);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "putting locally accumulated result into target window");
|
||||
|
||||
/* initiate the put of the accumulated data */
|
||||
status = module->selected_btl->btl_put (module->selected_btl, endpoint, (void *) source,
|
||||
request->target_address, local_handle,
|
||||
@ -174,13 +186,15 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
|
||||
char *ptr = NULL;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating accumulate on contiguous region of %lu bytes to remote address %" PRIx64
|
||||
", sync %p", len, target_address, (void *) sync);
|
||||
|
||||
offset = target_address & btl_alignment_mask;;
|
||||
aligned_len = (len + offset + btl_alignment_mask) & ~btl_alignment_mask;
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
|
||||
"Could not allocate an rdma fragment for get accumulate"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_WARN, "could not allocate a temporary buffer for accumulate");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -220,9 +234,8 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
|
||||
/* align the target address */
|
||||
target_address = target_address & ~btl_alignment_mask;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output,
|
||||
"initiating btl get local: {%p, %p}, remote: {0x%" PRIx64 ", %p}...",
|
||||
ptr, (void *) frag->handle, target_address, (void *) target_handle));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating btl get. local: %p (handle %p), remote: 0x%" PRIx64
|
||||
" (handle %p)", ptr, (void *) frag->handle, target_address, (void *) target_handle);
|
||||
|
||||
ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr,
|
||||
target_address, frag->handle, target_handle, aligned_len,
|
||||
@ -232,8 +245,8 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
|
||||
/* copy the put accumulate data */
|
||||
memcpy (ptr, source, len);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output,
|
||||
"initiating btl put..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating btl put. local: %p (handle %p), remote: 0x%" PRIx64
|
||||
" (handle %p)", ptr, (void *) frag->handle, target_address, (void *) target_handle);
|
||||
|
||||
ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, ptr,
|
||||
target_address, frag->handle, target_handle, len, 0,
|
||||
@ -245,7 +258,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "btl operation failed with ret = %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation faile with opal error code %d", ret);
|
||||
|
||||
ompi_osc_rdma_cleanup_rdma (sync, frag, NULL, NULL);
|
||||
|
||||
@ -319,6 +332,8 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
}
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "scheduling accumulate on non-contiguous datatype(s)");
|
||||
|
||||
/* the convertor will handle lb from here */
|
||||
(void) ompi_datatype_get_extent (target_datatype, &lb, &extent);
|
||||
target_address -= lb;
|
||||
@ -419,14 +434,6 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
subreq->type = OMPI_OSC_RDMA_TYPE_ACC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output,
|
||||
"target index = %d, target = {%p, %lu}, source_index = %d, source = {%p, %lu}, result = %p, result position = %lu, "
|
||||
"acc_len = %d, count = %lu",
|
||||
target_iov_index, target_iovec[target_iov_index].iov_base, (unsigned long) target_iovec[target_iov_index].iov_len,
|
||||
source_iov_index, source_iovec[source_iov_index].iov_base, (unsigned long) source_iovec[source_iov_index].iov_len,
|
||||
result_buffer, (unsigned long) result_position, acc_len, (unsigned long)(acc_len / target_primitive->super.size)));
|
||||
|
||||
|
||||
ret = ompi_osc_rdma_gacc_contig (sync, source_iovec[source_iov_index].iov_base, acc_len / target_primitive->super.size,
|
||||
target_primitive, NULL, 0, NULL, peer, (uint64_t) (intptr_t) target_iovec[target_iov_index].iov_base,
|
||||
target_handle, acc_len / target_primitive->super.size, target_primitive, op, subreq);
|
||||
@ -463,6 +470,8 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
OBJ_DESTRUCT(&source_convertor);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)");
|
||||
|
||||
opal_convertor_cleanup (&target_convertor);
|
||||
OBJ_DESTRUCT(&target_convertor);
|
||||
|
||||
@ -478,6 +487,9 @@ static void ompi_osc_rdma_cas_atomic_complete (struct mca_btl_base_module_t *btl
|
||||
ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data;
|
||||
void *result_buffer = (void *)(intptr_t) ((int64_t *) local_address)[1];
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic compare-and-swap complete. result: 0x%" PRIx64,
|
||||
*((int64_t *) local_address));
|
||||
|
||||
/* copy the result */
|
||||
memcpy (result_buffer, local_address, 8);
|
||||
|
||||
@ -499,6 +511,9 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using 64-bit btl atomics. compare: 0x%"
|
||||
PRIx64 ", origin: 0x%" PRIx64, *((int64_t *) compare_buffer), *((int64_t *) source_buffer));
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 16, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
@ -543,6 +558,9 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
ompi_osc_rdma_peer_t *peer = request->peer;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote compare-and-swap get complete on sync %p. "
|
||||
"status %d", (void *) sync, status);
|
||||
|
||||
if (OMPI_SUCCESS == status) {
|
||||
/* copy data to the user buffer (for gacc) */
|
||||
memcpy (request->result_addr, (void *) source, request->len);
|
||||
@ -556,8 +574,8 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
|
||||
request->len, 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_acc_put_complete, request, NULL);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "could not start put to complete accumulate "
|
||||
"operation. opal return code: %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "could not start put to complete accumulate operation. opal return code "
|
||||
"%d", ret);
|
||||
}
|
||||
|
||||
/* TODO -- we can do better. probably should queue up the next step and handle it in progress */
|
||||
@ -592,6 +610,9 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
char *ptr = NULL;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using RMDA on %lu bytes to remote address %" PRIx64
|
||||
", sync %p", len, target_address, (void *) sync);
|
||||
|
||||
OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, request);
|
||||
|
||||
request->internal = true;
|
||||
@ -611,14 +632,15 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
offset = target_address & btl_alignment_mask;;
|
||||
aligned_len = (len + offset + btl_alignment_mask) & ~btl_alignment_mask;
|
||||
|
||||
ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;
|
||||
do {
|
||||
ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
|
||||
"Could not allocate an rdma fragment for get accumulate. Falling back on point-to-point"));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_WARN, "could not allocate an rdma fragment for compare-and-swap");
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (1);
|
||||
|
||||
if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
|
||||
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
@ -635,16 +657,24 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffe
|
||||
request->target_address = target_address;
|
||||
request->len = len;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "initiating btl get..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "RDMA compare-and-swap initiating btl get");
|
||||
|
||||
ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr,
|
||||
target_address, frag->handle, target_handle,
|
||||
aligned_len, 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_cas_get_complete, request, NULL);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
return ret;
|
||||
}
|
||||
do {
|
||||
ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr,
|
||||
target_address, frag->handle, target_handle,
|
||||
aligned_len, 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_cas_get_complete, request, NULL);
|
||||
if (OPAL_LIKELY(OPAL_SUCCESS == ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE != ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret)) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ompi_osc_rdma_progress (module);
|
||||
} while (1);
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
@ -663,15 +693,15 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
|
||||
uint64_t target_address;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "cswap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, %s",
|
||||
(unsigned long) origin_addr, (unsigned long) compare_addr, (unsigned long) result_addr,
|
||||
dt->name, target_rank, (int) target_disp, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "cswap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, %s",
|
||||
(unsigned long) origin_addr, (unsigned long) compare_addr, (unsigned long) result_addr,
|
||||
dt->name, target_rank, (int) target_disp, win->w_name));
|
||||
|
||||
ret = osc_rdma_get_remote_segment (module, peer, target_disp, 8, &target_address, &target_handle);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
return ret;
|
||||
@ -752,18 +782,17 @@ int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count,
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "get_acc: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name,
|
||||
(unsigned long) result_addr, result_count, result_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name,
|
||||
win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"get_acc: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name,
|
||||
(unsigned long) result_addr, result_count, result_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name,
|
||||
win->w_name));
|
||||
|
||||
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype,
|
||||
result_addr, result_count, result_datatype,
|
||||
peer, target_rank, target_disp, target_count,
|
||||
@ -786,18 +815,17 @@ int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count,
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "rget_acc: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name,
|
||||
(unsigned long) result_addr, result_count, result_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name,
|
||||
win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"rget_acc: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name,
|
||||
(unsigned long) result_addr, result_count, result_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name,
|
||||
win->w_name));
|
||||
|
||||
OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request);
|
||||
|
||||
ret = ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, result_addr,
|
||||
@ -820,14 +848,14 @@ int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, stru
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name, target_rank,
|
||||
(unsigned long) target_disp, op->o_name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "fop: %p, %s, %d, %lu, %s, %s",
|
||||
result_addr, dt->name, target_rank, (unsigned long) target_disp, op->o_name, win->w_name));
|
||||
|
||||
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer, target_rank,
|
||||
target_disp, 1, dt, op, NULL);
|
||||
}
|
||||
@ -845,15 +873,15 @@ int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count,
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "racc: 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "racc: 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name, win->w_name));
|
||||
|
||||
OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request);
|
||||
|
||||
ret = ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, NULL, 0,
|
||||
@ -879,15 +907,15 @@ int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "acc: 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "acc: 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank,
|
||||
(unsigned long) target_disp, target_count, target_datatype->name, op->o_name, win->w_name));
|
||||
|
||||
return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, NULL, 0,
|
||||
NULL, peer, target_rank, target_disp, target_count, target_datatype,
|
||||
op, NULL);
|
||||
|
@ -158,17 +158,15 @@ static void ompi_osc_rdma_handle_post (ompi_osc_rdma_module_t *module, int rank,
|
||||
/* look for the posting peer in the group */
|
||||
for (int j = 0 ; j < npeers ; ++j) {
|
||||
if (rank == peers[j]->rank) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"got expected post from %d. still expecting posts from %d processes",
|
||||
rank, (int) (npeers - state->num_post_msgs - 1)));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "got expected post from %d. still expecting posts from %d processes",
|
||||
rank, (int) (npeers - state->num_post_msgs - 1));
|
||||
++state->num_post_msgs;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* post does not belong to this start epoch. save it for later */
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "got unexpected post from %d "
|
||||
". queueing for later", rank));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "got unexpected post from %d . queueing for later", rank);
|
||||
pending_post = OBJ_NEW(ompi_osc_rdma_pending_post_t);
|
||||
pending_post->rank = rank;
|
||||
OPAL_THREAD_SCOPED_LOCK(&module->lock, opal_list_append (&module->pending_posts, &pending_post->super));
|
||||
@ -185,8 +183,7 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
|
||||
osc_rdma_counter_t *temp;
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_post_atomic entering..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post: %p, %d, %s", group, assert, win->w_name);
|
||||
|
||||
/* check if we are already in a post epoch */
|
||||
if (module->pw_group) {
|
||||
@ -230,8 +227,7 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"sending post messages"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "sending post messages");
|
||||
|
||||
/* send a hello counter to everyone in group */
|
||||
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
|
||||
@ -272,8 +268,7 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
|
||||
sizeof (osc_rdma_counter_t) * post_index;
|
||||
|
||||
do {
|
||||
OPAL_OUTPUT_VERBOSE((80, ompi_osc_base_framework.framework_output,
|
||||
"Attempting to post to index %d @ rank %d", post_index, peer->rank));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attempting to post to index %d @ rank %d", post_index, peer->rank);
|
||||
|
||||
/* try to post. if the value isn't 0 then another rank is occupying this index */
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
@ -319,8 +314,7 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
|
||||
|
||||
ompi_osc_rdma_release_peers (peers, ompi_group_size(module->pw_group));
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"post complete"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post complete");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -333,8 +327,8 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int assert, ompi_win_t *win
|
||||
ompi_osc_rdma_sync_t *sync = &module->all_sync;
|
||||
int group_size = ompi_group_size (group);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_start entering..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start: %p, %d, %s", group, assert,
|
||||
win->w_name);
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
@ -351,9 +345,7 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int assert, ompi_win_t *win
|
||||
/* haven't processed any post messaes yet */
|
||||
state->num_post_msgs = 0;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_start entering with group size %d...",
|
||||
sync->num_peers));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start group size %d", sync->num_peers);
|
||||
|
||||
if (0 == ompi_group_size (group)) {
|
||||
/* nothing more to do. this is an empty start epoch */
|
||||
@ -387,9 +379,8 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int assert, ompi_win_t *win
|
||||
ompi_osc_rdma_peer_t *peer = sync->peer_list.peers[i];
|
||||
|
||||
if (pending_post->rank == peer->rank) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"found queued post from %d. still expecting posts from %d processes",
|
||||
peer->rank, (int) (group_size - state->num_post_msgs - 1)));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found queued post from %d. still expecting posts "
|
||||
"from %d processes", peer->rank, (int) (group_size - state->num_post_msgs - 1));
|
||||
opal_list_remove_item (&module->pending_posts, &pending_post->super);
|
||||
OBJ_RELEASE(pending_post);
|
||||
/* only one thread can process post messages so there is no need of atomics here */
|
||||
@ -401,9 +392,8 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int assert, ompi_win_t *win
|
||||
|
||||
/* wait for all post messages to arrive */
|
||||
while (state->num_post_msgs != group_size) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"Waiting for post messages. Have %d of %d",
|
||||
(int) state->num_post_msgs, group_size));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "waiting for post messages. have %d of %d",
|
||||
(int) state->num_post_msgs, group_size);
|
||||
for (int i = 0 ; i < OMPI_OSC_RDMA_POST_PEER_MAX ; ++i) {
|
||||
/* no post at this index (yet) */
|
||||
if (0 == state->post_peers[i]) {
|
||||
@ -420,7 +410,7 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int assert, ompi_win_t *win
|
||||
state->num_post_msgs = group_size;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_start complete"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start complete");
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_SUCCESS;
|
||||
@ -434,7 +424,7 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
|
||||
ompi_group_t *group;
|
||||
int group_size;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_complete entering..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete: %s", win->w_name);
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
if (OMPI_OSC_RDMA_SYNC_TYPE_PSCW != sync->type) {
|
||||
@ -497,8 +487,7 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
|
||||
/* release our reference to peers in this group */
|
||||
ompi_osc_rdma_release_peers (peers, group_size);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_complete complete"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete complete");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -510,13 +499,11 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win)
|
||||
ompi_group_t *group;
|
||||
int group_size;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_wait entering..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait: %s", win->w_name);
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
if (NULL == module->pw_group) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_wait_atomic no post group"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no matching post");
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
@ -524,9 +511,8 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win)
|
||||
group_size = ompi_group_size (module->pw_group);
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_wait_atomic group size %d, complete messages %d",
|
||||
group_size, (int) state->num_complete_msgs));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "waiting on complete message. have %d of %d",
|
||||
(int) state->num_complete_msgs, group_size);
|
||||
|
||||
while (group_size != state->num_complete_msgs) {
|
||||
ompi_osc_rdma_progress (module);
|
||||
@ -542,8 +528,7 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win)
|
||||
ompi_group_decrement_proc_count(group);
|
||||
OBJ_RELEASE(group);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_wait complete"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait complete");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -556,13 +541,11 @@ int ompi_osc_rdma_test_atomic (ompi_win_t *win, int *flag)
|
||||
ompi_group_t *group;
|
||||
int group_size;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_test_atomic entering..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "test: %s", win->w_name);
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
if (NULL == module->pw_group) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_test_atomic no post group"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no matching post");
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
@ -572,8 +555,8 @@ int ompi_osc_rdma_test_atomic (ompi_win_t *win, int *flag)
|
||||
*flag = (group_size == state->num_complete_msgs);
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_test_atomic flag %d", *flag));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "checking on complete message. have %d of %d",
|
||||
(int) state->num_complete_msgs, group_size);
|
||||
|
||||
if (!*flag) {
|
||||
ompi_osc_rdma_progress (module);
|
||||
@ -598,13 +581,11 @@ int ompi_osc_rdma_fence_atomic (int assert, ompi_win_t *win)
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"osc rdma: fence start"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fence: %d, %s", assert, win->w_name);
|
||||
|
||||
/* can't enter an active target epoch while a lock is active */
|
||||
if (ompi_osc_rdma_in_passive_epoch (module)) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"osc rdma: could not enter fence. already in an access epoch"));
|
||||
if (ompi_osc_rdma_in_passive_epoch (module) || module->pw_group) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "can not start fence epoch due to conflicting epoch");
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
@ -624,8 +605,7 @@ int ompi_osc_rdma_fence_atomic (int assert, ompi_win_t *win)
|
||||
|
||||
/* short-circuit the noprecede case */
|
||||
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"osc rdma: fence end (short circuit)"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fence complete (short circuit)");
|
||||
/* no communication can occur until a peer has entered the same fence epoch. for now
|
||||
* a barrier is used to ensure this is the case. */
|
||||
ret = module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module);
|
||||
@ -644,8 +624,7 @@ int ompi_osc_rdma_fence_atomic (int assert, ompi_win_t *win)
|
||||
module->all_sync.type = OMPI_OSC_RDMA_SYNC_TYPE_NONE;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"osc rdma: fence end: %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fence complete");
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
|
@ -34,14 +34,19 @@ int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, struct mca_btl_b
|
||||
char *ptr = data;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "reading state data from endpoint %p. source: 0x%" PRIx64 ", len: %lu",
|
||||
(void *) endpoint, source_address, (unsigned long) len);
|
||||
|
||||
if (module->selected_btl->btl_register_mem && len >= module->selected_btl->btl_get_local_registration_threshold) {
|
||||
ret = ompi_osc_rdma_frag_alloc (module, len, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "error allocating a fragment!"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "error allocating temporary buffer");
|
||||
return ret;
|
||||
}
|
||||
|
||||
local_handle = frag->handle;
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocated temporary buffer %p in fragment %p", ptr,
|
||||
(void *) frag);
|
||||
}
|
||||
|
||||
assert (!(source_address & ALIGNMENT_MASK(module->selected_btl->btl_get_alignment)));
|
||||
@ -58,7 +63,7 @@ int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, struct mca_btl_b
|
||||
} while (1);
|
||||
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS > ret)) {
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "btl get failed with ret = %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "btl get failed with opal error code %d", ret);
|
||||
|
||||
if (frag) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
@ -72,6 +77,8 @@ int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, struct mca_btl_b
|
||||
ompi_osc_rdma_progress (module);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished reading state data from endpoint %p", (void *) endpoint);
|
||||
|
||||
opal_memchecker_base_mem_defined (ptr, len);
|
||||
|
||||
if (frag) {
|
||||
@ -144,6 +151,8 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
|
||||
|
||||
subreq = NULL;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "scheduling rdma on non-contiguous datatype(s)");
|
||||
|
||||
/* prepare convertors for the source and target. these convertors will be used to determine the
|
||||
* contiguous segments within the source and target. */
|
||||
OBJ_CONSTRUCT(&remote_convertor, opal_convertor_t);
|
||||
@ -188,11 +197,6 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
|
||||
/* we already checked that the target was large enough. this should be impossible */
|
||||
assert (0 != local_iov_count);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((40, ompi_osc_base_framework.framework_output, "local index = %d, local = {%p, %lu}, "
|
||||
"source_index = %d, source = {%p, %lu}", local_iov_index, local_iovec[local_iov_index].iov_base,
|
||||
(unsigned long) local_iovec[local_iov_index].iov_len, remote_iov_index, remote_iovec[remote_iov_index].iov_base,
|
||||
(unsigned long) remote_iovec[remote_iov_index].iov_len));
|
||||
|
||||
/* determine how much to transfer in this operation */
|
||||
rdma_len = min(min(local_iovec[local_iov_index].iov_len, remote_iovec[remote_iov_index].iov_len), max_rdma_len);
|
||||
|
||||
@ -210,6 +214,10 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
|
||||
subreq = request;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
|
||||
local_iovec[local_iov_index].iov_base, remote_iovec[remote_iov_index].iov_base,
|
||||
(unsigned long) remote_iovec[remote_iov_index].iov_len);
|
||||
|
||||
ret = rdma_fn (sync, peer, (uint64_t) (intptr_t) remote_iovec[remote_iov_index].iov_base, remote_handle,
|
||||
local_iovec[local_iov_index].iov_base, rdma_len, subreq);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
@ -252,6 +260,8 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)");
|
||||
|
||||
/* clean up convertors */
|
||||
opal_convertor_cleanup (&local_convertor);
|
||||
OBJ_DESTRUCT(&local_convertor);
|
||||
@ -292,6 +302,10 @@ static inline int ompi_osc_rdma_master (ompi_osc_rdma_sync_t *sync, void *local_
|
||||
(void) ompi_datatype_get_extent (remote_datatype, &lb, &extent);
|
||||
remote_address += lb;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing rdma on contiguous region. local: %p, "
|
||||
"remote: 0x%lx, length: %lu", local_address, (unsigned long) remote_address,
|
||||
rdma_len);
|
||||
|
||||
do {
|
||||
ret = rdma_fn (sync, peer, remote_address, remote_handle, local_address, rdma_len, request);
|
||||
if (OPAL_LIKELY(OPAL_SUCCESS == ret)) {
|
||||
@ -313,7 +327,7 @@ static int ompi_osc_rdma_copy_local (const void *source, int source_count, ompi_
|
||||
{
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "performing local copy from %p -> %p", source, target));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing local copy from %p -> %p", source, target);
|
||||
|
||||
opal_atomic_mb ();
|
||||
ret = ompi_datatype_sndrcv (source, source_count, source_datatype, target, target_count, target_datatype);
|
||||
@ -344,6 +358,9 @@ static void ompi_osc_rdma_put_complete (struct mca_btl_base_module_t *btl, struc
|
||||
ompi_osc_rdma_request_complete (request, status);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "btl put complete on sync %p. local "
|
||||
"address %p. opal status %d", (void *) sync, local_address, status);
|
||||
|
||||
if (frag) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
} else {
|
||||
@ -360,10 +377,12 @@ static void ompi_osc_rdma_aggregate_put_complete (struct mca_btl_base_module_t *
|
||||
ompi_osc_rdma_aggregation_t *aggregation = (ompi_osc_rdma_aggregation_t *) context;
|
||||
ompi_osc_rdma_sync_t *sync = aggregation->sync;
|
||||
ompi_osc_rdma_frag_t *frag = aggregation->frag;
|
||||
ompi_osc_rdma_request_t *request = NULL, *next;
|
||||
|
||||
assert (OPAL_SUCCESS == status);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "aggregate put complete %p on sync %p. local address %p. status %d",
|
||||
(void *) aggregation, (void *) sync, local_address, status);
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
ompi_osc_rdma_aggregation_return (aggregation);
|
||||
|
||||
@ -380,8 +399,8 @@ static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_pee
|
||||
ompi_osc_rdma_module_t *module = sync->module;
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "initiating the btl put of %llu bytes to remote "
|
||||
"address %" PRIx64 ", sync object %p...", (unsigned long long) size, target_address, (void *) sync));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating btl put of %lu bytes to remote address %" PRIx64 ", sync "
|
||||
"object %p...", (unsigned long) size, target_address, (void *) sync);
|
||||
|
||||
/* flag outstanding rma requests */
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
@ -406,7 +425,7 @@ static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_pee
|
||||
}
|
||||
} while (1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "btl put failed with code %d", ret));
|
||||
OSC_RDMA_VERBOSE(10, "btl put failed with opal error code %d", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -415,6 +434,11 @@ static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggrega
|
||||
void *source_buffer, size_t size)
|
||||
{
|
||||
size_t offset = aggregation->buffer_used;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "appending %lu bytes of data from %p to aggregate fragment %p with start "
|
||||
"address 0x%lx", (unsigned long) size, source_buffer, (void *) aggregation,
|
||||
(unsigned long) aggregation->target_address);
|
||||
|
||||
memcpy (aggregation->buffer + offset, source_buffer, size);
|
||||
|
||||
aggregation->buffer_used += size;
|
||||
@ -445,6 +469,9 @@ static int ompi_osc_rdma_aggregate_alloc (ompi_osc_rdma_sync_t *sync, ompi_osc_r
|
||||
return ret;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocated new aggregate fragment %p for target %d", (void *) aggregation,
|
||||
peer->rank);
|
||||
|
||||
peer->aggregate = aggregation;
|
||||
|
||||
aggregation->target_address = target_address;
|
||||
@ -493,6 +520,9 @@ static int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
|
||||
ret = ompi_osc_rdma_aggregate_alloc (sync, peer, target_address, target_handle, source_buffer, size, request,
|
||||
OMPI_OSC_RDMA_TYPE_PUT);
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
|
||||
if (request) {
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -540,15 +570,13 @@ static void ompi_osc_rdma_get_complete (struct mca_btl_base_module_t *btl, struc
|
||||
ompi_osc_rdma_sync_t *sync = request->sync;
|
||||
void *origin_addr = request->origin_addr;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((status ? 10 : 60, ompi_osc_base_framework.framework_output, "btl get operation complete with status %d",
|
||||
status));
|
||||
OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "btl get complete on sync %p. local "
|
||||
"address %p. origin %p. opal status %d", (void *) sync, local_address, origin_addr, status);
|
||||
|
||||
assert (OPAL_SUCCESS == status);
|
||||
|
||||
if (NULL != frag) {
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == status)) {
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "copying %lu bytes from temporary buffer %p to destination %p",
|
||||
request->len, (void *) source, origin_addr));
|
||||
memcpy (origin_addr, (void *) source, request->len);
|
||||
}
|
||||
|
||||
@ -571,6 +599,8 @@ int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flusing aggregate fragment %p", (void *) aggregation);
|
||||
|
||||
assert (OMPI_OSC_RDMA_TYPE_PUT == aggregation->type);
|
||||
|
||||
ret = ompi_osc_rdma_put_real (aggregation->sync, peer, aggregation->target_address, aggregation->target_handle,
|
||||
@ -614,8 +644,8 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
|
||||
request->origin_addr = target_buffer;
|
||||
request->sync = sync;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "initiating get from remote ptr %" PRIx64 " to local ptr %p",
|
||||
source_address, target_buffer));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating get of %lu bytes from remote ptr %" PRIx64 " to local ptr %p",
|
||||
size, source_address, target_buffer);
|
||||
|
||||
if ((module->selected_btl->btl_register_mem && size > module->selected_btl->btl_get_local_registration_threshold) ||
|
||||
(((uint64_t) target_buffer | size | source_address) & btl_alignment_mask)) {
|
||||
@ -631,15 +661,11 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
} else {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "using internal buffer %p in fragment %p for get", ptr, (void *) frag);
|
||||
local_handle = frag->handle;
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "initiating btl get... source: %" PRIx64
|
||||
" (handle 0x%llx, 0x%llx), %" PRIu64 ", destination: %p, %" PRIu64, source_address,
|
||||
((unsigned long long *) source_handle)[0], ((unsigned long long *) source_handle)[1],
|
||||
aligned_len, ptr, aligned_len));
|
||||
|
||||
ompi_osc_rdma_sync_rdma_inc (sync);
|
||||
|
||||
do {
|
||||
@ -662,7 +688,7 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
|
||||
}
|
||||
} while (1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "btl get failed with ret = %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "btl get failed with opal error code %d", ret);
|
||||
|
||||
ompi_osc_rdma_cleanup_rdma (sync, frag, local_handle, request);
|
||||
|
||||
@ -747,15 +773,15 @@ int ompi_osc_rdma_put (const void *origin_addr, int origin_count, struct ompi_da
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "put: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr,
|
||||
origin_count, origin_datatype->name, target_rank, (int) target_disp, target_count,
|
||||
target_datatype->name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "put: 0x%lx, %d, %s, %d, %d, %d, %s, %s",
|
||||
(unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank,
|
||||
(int) target_disp, target_count, target_datatype->name, win->w_name));
|
||||
|
||||
return ompi_osc_rdma_put_w_req (sync, origin_addr, origin_count, origin_datatype, peer, target_disp,
|
||||
target_count, target_datatype, NULL);
|
||||
}
|
||||
@ -771,16 +797,14 @@ int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, struct ompi_d
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "rput: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr, origin_count,
|
||||
origin_datatype->name, target_rank, (int) target_disp, target_count, target_datatype->name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "rput: 0x%lx, %d, %s, %d, %d, "
|
||||
"%d, %s, %s", (unsigned long) origin_addr, origin_count,
|
||||
origin_datatype->name, target_rank, (int) target_disp, target_count,
|
||||
target_datatype->name, win->w_name));
|
||||
|
||||
OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request);
|
||||
|
||||
rdma_request->type = OMPI_OSC_RDMA_TYPE_PUT;
|
||||
@ -805,16 +829,15 @@ int ompi_osc_rdma_get (void *origin_addr, int origin_count, struct ompi_datatype
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "get: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr,
|
||||
origin_count, origin_datatype->name, source_rank, (int) source_disp, source_count,
|
||||
source_datatype->name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, source_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "get: 0x%lx, %d, %s, %d, %d, "
|
||||
"%d, %s, %s", (unsigned long) origin_addr, origin_count,
|
||||
origin_datatype->name, source_rank, (int) source_disp, source_count,
|
||||
source_datatype->name, win->w_name));
|
||||
|
||||
return ompi_osc_rdma_get_w_req (sync, origin_addr, origin_count, origin_datatype, peer,
|
||||
source_disp, source_count, source_datatype, NULL);
|
||||
}
|
||||
@ -830,16 +853,15 @@ int ompi_osc_rdma_rget (void *origin_addr, int origin_count, struct ompi_datatyp
|
||||
ompi_osc_rdma_sync_t *sync;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "rget: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr,
|
||||
origin_count, origin_datatype->name, source_rank, (int) source_disp, source_count,
|
||||
source_datatype->name, win->w_name);
|
||||
|
||||
sync = ompi_osc_rdma_module_sync_lookup (module, source_rank, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == sync)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "rget: 0x%lx, %d, %s, %d, %d, "
|
||||
"%d, %s, %s", (unsigned long) origin_addr, origin_count,
|
||||
origin_datatype->name, source_rank, (int) source_disp, source_count,
|
||||
source_datatype->name, win->w_name));
|
||||
|
||||
OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request);
|
||||
|
||||
rdma_request->type = OMPI_OSC_RDMA_TYPE_GET;
|
||||
|
@ -59,14 +59,14 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o
|
||||
ompi_osc_rdma_region_t *region;
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "getting remote address for peer %d target_disp %lu",
|
||||
peer->rank, (unsigned long) target_disp));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "getting remote address for peer %d target_disp %lu. peer flags: 0x%x",
|
||||
peer->rank, (unsigned long) target_disp, peer->flags);
|
||||
|
||||
if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) {
|
||||
ret = ompi_osc_rdma_find_dynamic_region (module, peer, (uint64_t) target_disp, length, ®ion);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
|
||||
"could not retrieve region for %" PRIx64 " from window rank %d", (uint64_t) target_disp, peer->rank));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not retrieve region for %" PRIx64 " from window rank %d",
|
||||
(uint64_t) target_disp, peer->rank);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -79,18 +79,17 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o
|
||||
|
||||
*remote_address = ex_peer->super.base + disp_unit * target_disp;
|
||||
if (OPAL_UNLIKELY(*remote_address + length > (ex_peer->super.base + size))) {
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "remote address range 0x%" PRIx64 " - 0x%" PRIx64
|
||||
" is out of range. Valid address range is 0x%" PRIx64 " - 0x%" PRIx64 " (%" PRIu64 " bytes)",
|
||||
*remote_address, *remote_address + length, ex_peer->super.base, ex_peer->super.base + size,
|
||||
(uint64_t) size));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "remote address range 0x%" PRIx64 " - 0x%" PRIx64
|
||||
" is out of range. Valid address range is 0x%" PRIx64 " - 0x%" PRIx64 " (%" PRIu64 " bytes)",
|
||||
*remote_address, *remote_address + length, ex_peer->super.base, ex_peer->super.base + size,
|
||||
(uint64_t) size);
|
||||
return OMPI_ERR_RMA_RANGE;
|
||||
}
|
||||
|
||||
*remote_handle = ex_peer->super.base_handle;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output,
|
||||
"remote address: 0x%" PRIx64 ", handle: %p", *remote_address, (void *) *remote_handle));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "remote address: 0x%" PRIx64 ", handle: %p", *remote_address, (void *) *remote_handle);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -348,6 +348,8 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
|
||||
ompi_osc_rdma_peer_t *my_peer;
|
||||
int ret, my_rank;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating private internal state");
|
||||
|
||||
my_rank = ompi_comm_rank (module->comm);
|
||||
|
||||
local_rank_array_size = sizeof (ompi_osc_rdma_rank_data_t) * RANK_ARRAY_COUNT(module);
|
||||
@ -462,6 +464,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
return allocate_state_single (module, base, size);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating shared internal state");
|
||||
|
||||
local_rank_array_size = sizeof (ompi_osc_rdma_rank_data_t) * RANK_ARRAY_COUNT (module);
|
||||
leader_peer_data_size = module->region_size * module->node_count;
|
||||
|
||||
@ -511,8 +515,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size);
|
||||
free (data_file);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output,
|
||||
"failed to create shared memory segment"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment");
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -525,8 +528,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
|
||||
module->segment_base = opal_shmem_segment_attach (&module->seg_ds);
|
||||
if (NULL == module->segment_base) {
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output,
|
||||
"failed to attach to the shared memory segment"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to attach to the shared memory segment");
|
||||
ret = OPAL_ERROR;
|
||||
break;
|
||||
}
|
||||
@ -545,9 +547,6 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
/* initialize my state */
|
||||
memset (module->state, 0, module->state_size);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "registering segment range: %p - %p (%lu bytes)",
|
||||
module->segment_base, (char *) module->segment_base + total_size, total_size));
|
||||
|
||||
/* just go ahead and register the whole segment */
|
||||
ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, module->segment_base, total_size, MCA_BTL_REG_FLAG_ACCESS_ANY,
|
||||
&module->state_handle);
|
||||
@ -677,8 +676,8 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
|
||||
}
|
||||
|
||||
if (NULL != selected_btl) {
|
||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Selected BTL: %s",
|
||||
selected_btl->btl_component->btl_version.mca_component_name));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "selected btl: %s",
|
||||
selected_btl->btl_component->btl_version.mca_component_name);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -769,12 +768,13 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
|
||||
}
|
||||
|
||||
if (NULL == selected_btl) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no suitable btls found");
|
||||
/* no btl = no rdma/atomics */
|
||||
return OMPI_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Selected BTL: %s",
|
||||
selected_btl->btl_component->btl_version.mca_component_name));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "selected btl: %s",
|
||||
selected_btl->btl_component->btl_version.mca_component_name);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -821,7 +821,7 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module)
|
||||
module->region_size, MPI_BYTE, module->local_leaders,
|
||||
module->local_leaders->c_coll.coll_gather_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Gather failed %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "leader allgather failed with ompi error code %d", ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -867,9 +867,8 @@ static int ompi_osc_rdma_create_groups (ompi_osc_rdma_module_t *module)
|
||||
|
||||
/* create a shared communicator to handle communication about the local segment */
|
||||
ret = ompi_comm_split_type (module->comm, MPI_COMM_TYPE_SHARED, 0, NULL, &module->shared_comm);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output,
|
||||
"failed to create a shared memory communicator. ret = %d", ret));
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create a shared memory communicator. error code %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -879,9 +878,8 @@ static int ompi_osc_rdma_create_groups (ompi_osc_rdma_module_t *module)
|
||||
|
||||
ret = ompi_comm_split (module->comm, (0 == local_rank) ? 0 : MPI_UNDEFINED, comm_rank, &module->local_leaders,
|
||||
false);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output,
|
||||
"failed to create local leaders communicator. ret = %d", ret));
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create local leaders communicator. error code %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -894,7 +892,7 @@ static int ompi_osc_rdma_create_groups (ompi_osc_rdma_module_t *module)
|
||||
ret = module->shared_comm->c_coll.coll_bcast (values, 2, MPI_INT, 0, module->shared_comm,
|
||||
module->shared_comm->c_coll.coll_bcast_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "failed to broadcast local data"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to broadcast local data. error code %d", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -1024,8 +1022,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
return ret;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "creating RDMA window of flavor %d with id %d",
|
||||
flavor, ompi_comm_get_cid(module->comm)));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "creating osc/rdma window of flavor %d with id %d",
|
||||
flavor, ompi_comm_get_cid(module->comm));
|
||||
|
||||
/* peer data */
|
||||
if (world_size > init_limit) {
|
||||
@ -1046,7 +1044,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
/* find rdma capable endpoints */
|
||||
ret = ompi_osc_rdma_query_btls (module->comm, &module->selected_btl);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Failed finding RDMA endpoints"));
|
||||
ompi_osc_rdma_free (win);
|
||||
return ret;
|
||||
}
|
||||
@ -1083,7 +1080,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
/* fill in our part */
|
||||
ret = allocate_state_shared (module, base, size);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Failed to allocate state"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to allocate internal state");
|
||||
ompi_osc_rdma_free (win);
|
||||
return ret;
|
||||
}
|
||||
@ -1100,7 +1097,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
|
||||
ret = ompi_osc_rdma_share_data (module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Failed to share data"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
|
||||
ompi_osc_rdma_free (win);
|
||||
return ret;
|
||||
}
|
||||
@ -1109,10 +1106,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
/* for now the leader is always rank 0 in the communicator */
|
||||
module->leader = ompi_osc_rdma_module_peer (module, 0);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
|
||||
"rdma component creating window with id %d",
|
||||
ompi_comm_get_cid(module->comm)));
|
||||
|
||||
/* lock data */
|
||||
if (module->no_locks) {
|
||||
win->w_flags |= OMPI_WIN_NO_LOCKS;
|
||||
@ -1156,8 +1149,9 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
return ret;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
|
||||
"done creating rdma window %d", ompi_comm_get_cid(module->comm)));
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d",
|
||||
ompi_comm_get_cid(module->comm));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -47,9 +47,9 @@ static inline ompi_osc_rdma_region_t *ompi_osc_rdma_find_region_containing (ompi
|
||||
|
||||
region_bound = (intptr_t) (region->base + region->len);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "Checking memory region %p-%p against %p-%p (index %d) (min_index = %d, max_index = %d)",
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "checking memory region %p-%p against %p-%p (index %d) (min_index = %d, max_index = %d)",
|
||||
(void *) base, (void *) bound, (void *) region->base, (void *)(region->base + region->len), mid_index,
|
||||
min_index, max_index));
|
||||
min_index, max_index);
|
||||
|
||||
if (region->base > base) {
|
||||
return ompi_osc_rdma_find_region_containing (regions, min_index, mid_index-1, base, bound, region_size, region_index);
|
||||
@ -104,6 +104,8 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
region_count = module->state->region_count & 0xffffffffL;
|
||||
@ -153,8 +155,8 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
|
||||
region->base = OPAL_ALIGN((intptr_t) base - page_size + 1, page_size, intptr_t);
|
||||
region->len = OPAL_ALIGN(len, page_size, size_t);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "Attaching memory region %p-%p at index %d",
|
||||
base, (void *)((intptr_t) base + len), region_index));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "attaching dynamic memory region {%p, %p} at index %d",
|
||||
base, (void *)((intptr_t) base + len), region_index);
|
||||
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
mca_btl_base_registration_handle_t *handle;
|
||||
@ -174,13 +176,14 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
|
||||
|
||||
module->dynamic_handles[region_index].refcnt = 1;
|
||||
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
for (int i = 0 ; i < region_count + 1 ; ++i) {
|
||||
region = (ompi_osc_rdma_region_t *) ((intptr_t) module->state->regions + i * module->region_size);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "Cache[%d] = {%p, %lu}",
|
||||
i, (void *) region->base, (unsigned long) region->len));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, " dynamic region %d: {%p, %lu}", i,
|
||||
(void *) region->base, (unsigned long) region->len);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
opal_atomic_mb ();
|
||||
/* the region state has changed */
|
||||
@ -189,6 +192,8 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
|
||||
ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach complete");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -208,6 +213,8 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach: %s, %p", win->w_name, base);
|
||||
|
||||
/* the upper 4 bytes of the region count are an instance counter */
|
||||
region_count = module->state->region_count & 0xffffffffL;
|
||||
region_id = module->state->region_count >> 32;
|
||||
@ -216,20 +223,22 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
|
||||
region_count - 1, (intptr_t) base, (intptr_t) base + 1,
|
||||
module->region_size, ®ion_index);
|
||||
if (NULL == region) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not find dynamic memory region starting at %p", base);
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (--module->dynamic_handles[region_index].refcnt > 0) {
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* lock the region so it can't change while a peer is reading it */
|
||||
ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "Detaching memory region %p-%p at index %d",
|
||||
base, (void *)((intptr_t) base + region->len), region_index));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "detaching dynamic memory region {%p, %p} from index %d",
|
||||
base, (void *)((intptr_t) base + region->len), region_index);
|
||||
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
ompi_osc_rdma_deregister (module, module->dynamic_handles[region_index].btl_handle);
|
||||
@ -253,6 +262,8 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -274,6 +285,8 @@ static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module,
|
||||
uint64_t source_address;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "refreshing dynamic memory regions for target %d", peer->super.rank);
|
||||
|
||||
/* this loop is meant to prevent us from reading data while the remote side is in attach */
|
||||
do {
|
||||
osc_rdma_counter_t remote_value;
|
||||
@ -287,15 +300,11 @@ static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module,
|
||||
|
||||
region_id = remote_value >> 32;
|
||||
region_count = remote_value & 0xffffffffl;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "remote memory region: 0x%lx, 0x%lx",
|
||||
(unsigned long) region_id, (unsigned long) region_count));
|
||||
/* check if the region is changing */
|
||||
} while (0xffffffffl == region_count);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "target has region_id 0x%lx, region_count 0x%lx "
|
||||
"(cached: 0x%x, 0x%x)", (unsigned long) region_id, (unsigned long) region_count, peer->region_id,
|
||||
peer->region_count));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "target region: id 0x%lx, count 0x%lx (cached: 0x%x, 0x%x)",
|
||||
(unsigned long) region_id, (unsigned long) region_count, peer->region_id, peer->region_count);
|
||||
|
||||
if (0 == region_count) {
|
||||
return OMPI_ERR_RMA_RANGE;
|
||||
@ -308,7 +317,7 @@ static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module,
|
||||
unsigned region_len = module->region_size * region_count;
|
||||
void *temp;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "dynamic memory cache is out of data. reloading from peer"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "dynamic memory cache is out of data. reloading from peer");
|
||||
|
||||
/* allocate only enough space for the remote regions */
|
||||
temp = realloc (peer->regions, region_len);
|
||||
@ -340,6 +349,8 @@ static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module,
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished refreshing dynamic memory regions for target %d", peer->super.rank);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -351,8 +362,8 @@ int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_
|
||||
ompi_osc_rdma_region_t *regions;
|
||||
int ret, region_count;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "locating dynamic memory region matching: %"
|
||||
PRIx64 "-%" PRIx64 " (len %lu)", base, base + len, (unsigned long) len));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "locating dynamic memory region matching: {%" PRIx64 ", %" PRIx64 "}"
|
||||
" (len %lu)", base, base + len, (unsigned long) len);
|
||||
|
||||
ret = ompi_osc_rdma_refresh_dynamic_region (module, dy_peer);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
|
@ -53,8 +53,8 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod
|
||||
volatile bool atomic_complete = false;
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "decrementing shared lock %" PRIx64 " by %lx\n", lock,
|
||||
(unsigned long) value));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing shared lock %" PRIx64 " on peer %d. value 0x%lx", lock,
|
||||
peer->rank, (unsigned long) value);
|
||||
|
||||
/* spin until the lock has been acquired */
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
@ -103,11 +103,14 @@ static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *mod
|
||||
ompi_osc_rdma_lock_t value, ptrdiff_t offset,
|
||||
ompi_osc_rdma_lock_t check)
|
||||
{
|
||||
intptr_t lock = (intptr_t) peer->state + offset;
|
||||
uint64_t lock = (uint64_t) peer->state + offset;
|
||||
volatile bool atomic_complete;
|
||||
ompi_osc_rdma_lock_t *temp;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "acquiring shared lock %" PRIx64 " on peer %d. value 0x%lx", lock,
|
||||
peer->rank, (unsigned long) value);
|
||||
|
||||
/* spin until the lock has been acquired */
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
ompi_osc_rdma_frag_t *frag;
|
||||
@ -123,7 +126,7 @@ static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *mod
|
||||
peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS > ret)) {
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "failed to increment shared lock. ret: %d", ret));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "failed to increment shared lock. opal error code %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -134,13 +137,14 @@ static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *mod
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "incremented shared lock 0x%lx by 0x%lx. Old value 0x%lx",
|
||||
(unsigned long) lock, (unsigned long) value, (unsigned long) *temp));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "shared lock incremented. old value 0x%lx", (unsigned long) *temp);
|
||||
|
||||
if (!(*temp & check)) {
|
||||
break;
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "another peer has exclusive access to lock");
|
||||
|
||||
/* NTH: i think this is correct. backoff! */
|
||||
ompi_osc_rdma_lock_release_shared (module, peer, -value, offset);
|
||||
ompi_osc_rdma_progress (module);
|
||||
@ -151,8 +155,8 @@ static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *mod
|
||||
ompi_osc_rdma_lock_t lock_state;
|
||||
do {
|
||||
lock_state = ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, value);
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "incremented local shared lock by 0x%lx. Old value 0x%lx",
|
||||
(unsigned long) value, (unsigned long) lock_state));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "local shared lock incremented. old value 0x%lx",
|
||||
(unsigned long) lock_state);
|
||||
if (!(lock_state & check)) {
|
||||
break;
|
||||
}
|
||||
@ -162,6 +166,8 @@ static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *mod
|
||||
} while (1);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "shared lock acquired");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -187,6 +193,9 @@ static inline int ompi_osc_rdma_lock_try_acquire_exclusive (ompi_osc_rdma_module
|
||||
volatile bool atomic_complete;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "trying to acquire exclusive lock %" PRIx64 " on peer %d", lock,
|
||||
peer->rank);
|
||||
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
int result;
|
||||
@ -198,6 +207,7 @@ static inline int ompi_osc_rdma_lock_try_acquire_exclusive (ompi_osc_rdma_module
|
||||
|
||||
/* set the temporary value so we can detect success. note that a lock should never be -1 */
|
||||
atomic_complete = false;
|
||||
*temp = 1;
|
||||
ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->state_endpoint, temp, lock, frag->handle,
|
||||
peer->state_handle, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE, 0, 0,
|
||||
ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL);
|
||||
@ -212,8 +222,13 @@ static inline int ompi_osc_rdma_lock_try_acquire_exclusive (ompi_osc_rdma_module
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "got %lx when attempting compare and swap %" PRIx64 " on %p",
|
||||
(unsigned long) *temp, lock, (void *) peer));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
if (*temp == 0) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "exclusive lock acquired");
|
||||
} else {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "could not acquire exclusive lock");
|
||||
}
|
||||
#endif
|
||||
result = (*temp != 0);
|
||||
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
@ -268,6 +283,8 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t *
|
||||
volatile bool atomic_complete = false;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock %" PRIx64 " on peer %d", lock, peer->rank);
|
||||
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
if (module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS) {
|
||||
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, lock, peer->state_handle, MCA_BTL_ATOMIC_ADD,
|
||||
@ -280,6 +297,7 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t *
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS > ret)) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "error releasing exclusive lock");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -288,13 +306,11 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t *
|
||||
ompi_osc_rdma_progress (module);
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "unlocked target lock on peer %p %" PRIx64 " using value 0x%lx"
|
||||
PRIx64, (void *) peer, lock, (unsigned long) -OMPI_OSC_RDMA_LOCK_EXCLUSIVE));
|
||||
} else {
|
||||
ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "exclusive lock released");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -41,8 +41,7 @@ int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
|
||||
|
||||
assert (0 <= target);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_flush starting..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush: %d, %s", target, win->w_name);
|
||||
|
||||
if (ompi_comm_rank (module->comm) == target) {
|
||||
/* nothing to flush. call one round of progress */
|
||||
@ -54,9 +53,8 @@ int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
|
||||
|
||||
lock = ompi_osc_rdma_module_sync_lookup (module, target, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == lock || OMPI_OSC_RDMA_SYNC_TYPE_LOCK != lock->type)) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_flush: target %d is not locked in window %s",
|
||||
target, win->w_name));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "flush: target %d is not locked in window %s",
|
||||
target, win->w_name);
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
@ -65,6 +63,8 @@ int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
|
||||
/* finish all outstanding fragments */
|
||||
ompi_osc_rdma_sync_rdma_complete (lock);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush on target %d complete", target);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -82,8 +82,7 @@ int ompi_osc_rdma_flush_all (struct ompi_win_t *win)
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_flush_all entering..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all: %s", win->w_name);
|
||||
|
||||
/* globally complete all outstanding rdma requests */
|
||||
if (OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) {
|
||||
@ -93,13 +92,13 @@ int ompi_osc_rdma_flush_all (struct ompi_win_t *win)
|
||||
/* flush all locks */
|
||||
ret = opal_hash_table_get_first_key_uint32 (&module->outstanding_locks, &key, (void **) &lock, &node);
|
||||
while (OPAL_SUCCESS == ret) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "flushing lock %p", (void *) lock);
|
||||
ompi_osc_rdma_sync_rdma_complete (lock);
|
||||
ret = opal_hash_table_get_next_key_uint32 (&module->outstanding_locks, &key, (void **) &lock,
|
||||
node, &node);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_flush_all complete"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all complete");
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -124,7 +123,7 @@ static inline int ompi_osc_rdma_lock_atomic_internal (ompi_osc_rdma_module_t *mo
|
||||
|
||||
if (MPI_LOCK_EXCLUSIVE == lock->sync.lock.type) {
|
||||
do {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "Incrementing global exclusive lock"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "incrementing global exclusive lock");
|
||||
/* lock the master lock. this requires no rank has a global shared lock */
|
||||
ret = ompi_osc_rdma_lock_acquire_shared (module, module->leader, 1, offsetof (ompi_osc_rdma_state_t, global_lock), 0xffffffff00000000L);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
@ -132,7 +131,7 @@ static inline int ompi_osc_rdma_lock_atomic_internal (ompi_osc_rdma_module_t *mo
|
||||
continue;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "Acquiring exclusive lock from peer"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "acquiring exclusive lock on peer");
|
||||
ret = ompi_osc_rdma_lock_try_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, local_lock));
|
||||
if (ret) {
|
||||
/* release the global lock */
|
||||
@ -147,7 +146,7 @@ static inline int ompi_osc_rdma_lock_atomic_internal (ompi_osc_rdma_module_t *mo
|
||||
} else {
|
||||
do {
|
||||
/* go right to the target to acquire a shared lock */
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "Incrementing local shared lock"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "incrementing global shared lock");
|
||||
ret = ompi_osc_rdma_lock_acquire_shared (module, peer, 1, offsetof (ompi_osc_rdma_state_t, local_lock),
|
||||
OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
@ -165,10 +164,13 @@ static inline int ompi_osc_rdma_unlock_atomic_internal (ompi_osc_rdma_module_t *
|
||||
ompi_osc_rdma_sync_t *lock)
|
||||
{
|
||||
if (MPI_LOCK_EXCLUSIVE == lock->sync.lock.type) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock on peer");
|
||||
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, local_lock));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global exclusive lock");
|
||||
ompi_osc_rdma_lock_release_shared (module, module->leader, -1, offsetof (ompi_osc_rdma_state_t, global_lock));
|
||||
peer->flags &= ~OMPI_OSC_RDMA_PEER_EXCLUSIVE;
|
||||
} else {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global shared lock");
|
||||
ompi_osc_rdma_lock_release_shared (module, peer, -1, offsetof (ompi_osc_rdma_state_t, local_lock));
|
||||
}
|
||||
|
||||
@ -182,13 +184,13 @@ int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t
|
||||
ompi_osc_rdma_sync_t *lock;
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock: %d, %d, %d, %s", lock_type, target, assert, win->w_name);
|
||||
|
||||
if (module->no_locks) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attemoted to lock with no_locks set"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted to lock with no_locks set");
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "osc rdma: lock %d %d", target, lock_type));
|
||||
|
||||
if (module->all_sync.epoch_active && (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type || MPI_LOCK_EXCLUSIVE == lock_type)) {
|
||||
/* impossible to get an exclusive lock while holding a global shared lock or in a active
|
||||
* target access epoch */
|
||||
@ -225,6 +227,8 @@ int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t
|
||||
OBJ_RELEASE(lock);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock %d complete", target);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -238,11 +242,12 @@ int ompi_osc_rdma_unlock_atomic (int target, ompi_win_t *win)
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock: %d, %s", target, win->w_name);
|
||||
|
||||
lock = ompi_osc_rdma_module_lock_find (module, target, &peer);
|
||||
if (OPAL_UNLIKELY(NULL == lock)) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_unlock: target %d is not locked in window %s",
|
||||
target, win->w_name));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "target %d is not locked in window %s",
|
||||
target, win->w_name);
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
@ -259,8 +264,7 @@ int ompi_osc_rdma_unlock_atomic (int target, ompi_win_t *win)
|
||||
/* release our reference to this peer */
|
||||
OBJ_RELEASE(peer);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_unlock: unlock of %d complete", target));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock %d complete", target);
|
||||
|
||||
--module->passive_target_access_epoch;
|
||||
|
||||
@ -280,18 +284,20 @@ int ompi_osc_rdma_lock_all_atomic (int assert, struct ompi_win_t *win)
|
||||
ompi_osc_rdma_sync_t *lock;
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock_all: %d, %s", assert, win->w_name);
|
||||
|
||||
if (module->no_locks) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attemoted to lock with no_locks set"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted to lock with no_locks set");
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
if (module->all_sync.epoch_active) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attempted "
|
||||
"to lock all when active target epoch is %s and lock all epoch is %s",
|
||||
(OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type && module->all_sync.epoch_active) ?
|
||||
"active" : "inactive",
|
||||
(OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) ? "active" : "inactive"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted lock_all when active target epoch is %s "
|
||||
"and lock all epoch is %s",
|
||||
(OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type && module->all_sync.epoch_active) ?
|
||||
"active" : "inactive",
|
||||
(OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) ? "active" : "inactive");
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
@ -330,6 +336,8 @@ int ompi_osc_rdma_lock_all_atomic (int assert, struct ompi_win_t *win)
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock_all complete");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -338,16 +346,13 @@ int ompi_osc_rdma_unlock_all_atomic (struct ompi_win_t *win)
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_sync_t *lock;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_unlock_all entering..."));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock_all: %s", win->w_name);
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
lock = &module->all_sync;
|
||||
if (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != lock->type) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_unlock_all: not locked in window %s",
|
||||
win->w_name));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "not locked in window %s", win->w_name);
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
@ -370,7 +375,7 @@ int ompi_osc_rdma_unlock_all_atomic (struct ompi_win_t *win)
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_unlock_all complete"));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock_all complete");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
|
||||
int ret, disp_unit;
|
||||
char *peer_data;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "configuring peer for rank %d", peer->rank));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "configuring peer for rank %d", peer->rank);
|
||||
|
||||
if (module->selected_btl->btl_register_mem) {
|
||||
registration_handle_size = module->selected_btl->btl_registration_handle_size;
|
||||
@ -117,8 +117,8 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "reading rank data from array rank: %d pointer: 0x%"
|
||||
PRIx64 ", size: %lu", node_rank, array_pointer, sizeof (rank_data)));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "reading region data from rank: %d pointer: 0x%" PRIx64
|
||||
", size: %lu", node_rank, array_pointer, sizeof (rank_data));
|
||||
|
||||
ret = ompi_osc_get_data_blocking (module, array_endpoint, array_pointer, (mca_btl_base_registration_handle_t *) array_peer_data->btl_handle_data,
|
||||
&rank_data, sizeof (rank_data));
|
||||
@ -176,9 +176,9 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
|
||||
|
||||
ompi_osc_rdma_region_t *base_region = (ompi_osc_rdma_region_t *) peer_data;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "peer %d: remote base region: 0x%" PRIx64
|
||||
", size: %" PRId64 ", flags: 0x%x, disp_unit: %d", peer->rank, base_region->base, base_region->len,
|
||||
peer->flags, disp_unit));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "peer %d: remote base region: 0x%" PRIx64 ", size: %" PRId64
|
||||
", flags: 0x%x, disp_unit: %d", peer->rank, base_region->base, base_region->len,
|
||||
peer->flags, disp_unit);
|
||||
|
||||
if (ompi_osc_rdma_peer_local_base (peer)) {
|
||||
/* for now we store the local address in the standard place. do no overwrite it */
|
||||
@ -226,7 +226,7 @@ static struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup_internal (struct o
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "looking up peer data for rank %d", peer_id));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "looking up peer data for rank %d", peer_id);
|
||||
|
||||
peer = ompi_osc_module_get_peer (module, peer_id);
|
||||
if (NULL != peer) {
|
||||
|
@ -138,8 +138,8 @@ static inline void ompi_osc_rdma_sync_rdma_inc (ompi_osc_rdma_sync_t *rdma_sync)
|
||||
{
|
||||
ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "INC: there are %ld outstanding RDMA operations",
|
||||
(unsigned long) rdma_sync->outstanding_rdma));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "inc: there are %ld outstanding rdma operations",
|
||||
(unsigned long) rdma_sync->outstanding_rdma);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -151,8 +151,8 @@ static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t *rdma_sync)
|
||||
{
|
||||
ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma, -1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "DEC: there are %ld outstanding RDMA operations",
|
||||
(unsigned long) rdma_sync->outstanding_rdma));
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "dec: there are %ld outstanding rdma operations",
|
||||
(unsigned long) rdma_sync->outstanding_rdma);
|
||||
}
|
||||
|
||||
#endif /* OSC_RDMA_SYNC_H */
|
||||
|
@ -207,4 +207,6 @@ typedef struct ompi_osc_rdma_aggregation_t ompi_osc_rdma_aggregation_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(ompi_osc_rdma_aggregation_t);
|
||||
|
||||
#define OSC_RDMA_VERBOSE(x, ...) OPAL_OUTPUT_VERBOSE((x, ompi_osc_base_framework.framework_output, __VA_ARGS__))
|
||||
|
||||
#endif /* OMPI_OSC_RDMA_TYPES_H */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user