1
1

Merge pull request #1046 from hjelmn/osc_rdma_fixes

osc/rdma: bug fixes
Этот коммит содержится в:
Nathan Hjelm 2015-10-20 16:44:38 -06:00
родитель f60c506c68 b8ee05d352
Коммит 763744a32c
8 изменённых файлов: 32 добавлений и 36 удалений

Просмотреть файл

@ -342,7 +342,7 @@ static inline void _ompi_osc_rdma_deregister (ompi_osc_rdma_module_t *module, mc
#define ompi_osc_rdma_deregister(...) _ompi_osc_rdma_deregister(__VA_ARGS__, __LINE__, __FILE__)
static inline void ompi_osc_rdma_progress (ompi_osc_rdma_module_t *module) {
module->selected_btl->btl_component->btl_progress ();
opal_progress ();
}
/**
@ -475,7 +475,7 @@ static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync)
}
do {
module->selected_btl->btl_component->btl_progress ();
opal_progress ();
} while (sync->outstanding_rdma);
}

Просмотреть файл

@ -146,6 +146,8 @@ static void ompi_osc_rdma_acc_get_complete (struct mca_btl_base_module_t *btl, s
/* accumulate the data */
if (&ompi_mpi_op_replace.op != request->op) {
ompi_op_reduce (request->op, request->origin_addr, (void *) source, request->origin_count, request->origin_dt);
} else {
memcpy ((void *) source, request->origin_addr, request->len);
}
/* initiate the put of the accumulated data */
@ -214,7 +216,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
ompi_osc_rdma_sync_rdma_inc (sync);
if (&ompi_mpi_op_replace.op != op || result) {
if (&ompi_mpi_op_replace.op != op || OMPI_OSC_RDMA_TYPE_GET_ACC == request->type) {
/* align the target address */
target_address = target_address & ~btl_alignment_mask;
@ -283,9 +285,10 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
if (NULL == request) {
OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, request);
request->internal = true;
request->type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC;
}
request->type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC;
if (source_datatype) {
(void) ompi_datatype_get_extent (source_datatype, &lb, &extent);
source_buffer = (void *)((intptr_t) source_buffer + lb);

Просмотреть файл

@ -365,12 +365,6 @@ static void ompi_osc_rdma_aggregate_put_complete (struct mca_btl_base_module_t *
assert (OPAL_SUCCESS == status);
ompi_osc_rdma_frag_complete (frag);
OPAL_LIST_FOREACH_SAFE(request, next, &aggregation->requests, ompi_osc_rdma_request_t) {
opal_list_remove_item (&aggregation->requests, (opal_list_item_t *) request);
ompi_osc_rdma_request_complete (request, status);
}
ompi_osc_rdma_aggregation_return (aggregation);
/* make sure the aggregation is returned before marking the operation as complete */
@ -426,7 +420,8 @@ static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggrega
aggregation->buffer_used += size;
if (request) {
opal_list_append (&aggregation->requests, (opal_list_item_t *) request);
/* the local buffer is now available */
ompi_osc_rdma_request_complete (request, 0);
}
}

Просмотреть файл

@ -77,7 +77,7 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o
int disp_unit = (module->same_disp_unit) ? module->disp_unit : ex_peer->disp_unit;
size_t size = (module->same_size) ? module->size : (size_t) ex_peer->size;
*remote_address = ex_peer->super.base +disp_unit * target_disp;
*remote_address = ex_peer->super.base + disp_unit * target_disp;
if (OPAL_UNLIKELY(*remote_address + length > (ex_peer->super.base + size))) {
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "remote address range 0x%" PRIx64 " - 0x%" PRIx64
" is out of range. Valid address range is 0x%" PRIx64 " - 0x%" PRIx64 " (%" PRIu64 " bytes)",

Просмотреть файл

@ -606,6 +606,10 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
ex_peer->size = temp[i].size;
}
if (my_rank == peer_rank) {
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
}
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
if (temp[i].size) {
ex_peer->super.base = (uint64_t) (uintptr_t) module->segment_base + offset;
@ -1172,6 +1176,7 @@ static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *i
OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
module->no_locks = true;
win->w_flags |= OMPI_WIN_NO_LOCKS;
} else if (!temp && module->no_locks) {
int world_size = ompi_comm_size (module->comm);
int init_limit = world_size > 256 ? 256 : world_size;
@ -1183,6 +1188,7 @@ static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *i
}
module->no_locks = false;
win->w_flags &= ~OMPI_WIN_NO_LOCKS;
}
/* enforce collectiveness... */
@ -1204,16 +1210,4 @@ static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **
return OMPI_SUCCESS;
}
static void ompi_osc_rdma_aggregation_construct (ompi_osc_rdma_aggregation_t *aggregation)
{
OBJ_CONSTRUCT(&aggregation->requests, opal_list_t);
aggregation->buffer_used = 0;
}
static void ompi_osc_rdma_aggregation_destruct (ompi_osc_rdma_aggregation_t *aggregation)
{
OBJ_DESTRUCT(&aggregation->requests);
}
OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, ompi_osc_rdma_aggregation_construct,
ompi_osc_rdma_aggregation_destruct);
OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, NULL, NULL);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -212,8 +212,8 @@ static inline int ompi_osc_rdma_lock_try_acquire_exclusive (ompi_osc_rdma_module
}
}
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "got %lx when attempting compare and swap %" PRIx64 " complete %d",
(unsigned long) *temp, lock, atomic_complete));
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "got %lx when attempting compare and swap %" PRIx64 " on %p",
(unsigned long) *temp, lock, (void *) peer));
result = (*temp != 0);
ompi_osc_rdma_frag_complete (frag);
@ -289,8 +289,8 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t *
}
}
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "unlocked target lock %" PRIx64 " with value 0x%lx. old value 0x%"
PRIx64, lock, (unsigned long) -OMPI_OSC_RDMA_LOCK_EXCLUSIVE, ((uint64_t *) temp)[0]));
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "unlocked target lock on peer %p %" PRIx64 " using value 0x%lx"
PRIx64, (void *) peer, lock, (unsigned long) -OMPI_OSC_RDMA_LOCK_EXCLUSIVE));
} else {
ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock);
}

Просмотреть файл

@ -182,6 +182,11 @@ int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t
ompi_osc_rdma_sync_t *lock;
int ret = OMPI_SUCCESS;
if (module->no_locks) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attemoted to lock with no_locks set"));
return OMPI_ERR_RMA_SYNC;
}
OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "osc rdma: lock %d %d", target, lock_type));
if (module->all_sync.epoch_active && (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type || MPI_LOCK_EXCLUSIVE == lock_type)) {
@ -275,10 +280,12 @@ int ompi_osc_rdma_lock_all_atomic (int assert, struct ompi_win_t *win)
ompi_osc_rdma_sync_t *lock;
int ret = OMPI_SUCCESS;
OPAL_THREAD_LOCK(&module->lock);
if (module->no_locks) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attemoted to lock with no_locks set"));
return OMPI_ERR_RMA_SYNC;
}
/* Check if no_locks is set. TODO: we also need to track whether we are in an
* active target epoch. Fence can make this tricky to track. */
OPAL_THREAD_LOCK(&module->lock);
if (module->all_sync.epoch_active) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attempted "
"to lock all when active target epoch is %s and lock all epoch is %s",

Просмотреть файл

@ -202,9 +202,6 @@ struct ompi_osc_rdma_aggregation_t {
/** type */
int type;
/** list of associated requests */
opal_list_t requests;
};
typedef struct ompi_osc_rdma_aggregation_t ompi_osc_rdma_aggregation_t;