1
1

Fix a number of issues in the new one sided code.

- Fix several typos is osc/rdma.

 - Fix a locking issue in osc/sm that was caused by an incorrect
   assumption about the semantics of opal_atomic_add_32.

 - Always unlock the accumulation lock in osc/sm.

 - The base of a processes shared memory window should be NULL if
   the size is zero. Fixed.

cmr=v1.7.5:ticket=trac:4304

This commit was SVN r30853.

The following Trac tickets were found above:
  Ticket 4304 --> https://svn.open-mpi.org/trac/ompi/ticket/4304
Этот коммит содержится в:
Nathan Hjelm 2014-02-26 15:33:18 +00:00
родитель 4e282a3295
Коммит 30b61a3333
6 изменённых файлов: 46 добавлений и 25 удалений

Просмотреть файл

@ -152,6 +152,9 @@ typedef struct udcm_module {
/* This module's modex message */
modex_msg_t modex;
/** The channel is being monitored */
bool channel_monitored;
} udcm_module_t;
/*
@ -401,7 +404,7 @@ static void udcm_component_register(void)
0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&udcm_timeout);
udcm_max_retry = 10;
udcm_max_retry = 25;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_udcm_max_retry", "Maximum number of times "
"to retry sending a udcm connection message",
@ -607,6 +610,7 @@ static int udcm_module_init (udcm_module_t *m, mca_btl_openib_module_t *btl)
/* Monitor the fd associated with the completion channel */
ompi_btl_openib_fd_monitor(m->cm_channel->fd, OPAL_EV_READ,
udcm_cq_event_dispatch, m);
m->channel_monitored = true;
OBJ_CONSTRUCT(&m->cm_lock, opal_mutex_t);
OBJ_CONSTRUCT(&m->cm_send_lock, opal_mutex_t);
@ -705,11 +709,13 @@ static int udcm_module_finalize(mca_btl_openib_module_t *btl,
m->cm_exiting = true;
/* stop monitoring the channel's fd before destroying the listen qp */
ompi_btl_openib_fd_unmonitor(m->cm_channel->fd, udcm_unmonitor, (void *)&barrier);
if (m->channel_monitored) {
/* stop monitoring the channel's fd before destroying the listen qp */
ompi_btl_openib_fd_unmonitor(m->cm_channel->fd, udcm_unmonitor, (void *)&barrier);
while (0 == barrier) {
sched_yield();
while (0 == barrier) {
sched_yield();
}
}
opal_mutex_lock (&m->cm_lock);

Просмотреть файл

@ -103,7 +103,7 @@ typedef struct ompi_osc_rdma_header_complete_t ompi_osc_rdma_header_complete_t;
struct ompi_osc_rdma_header_get_acc_t {
ompi_osc_rdma_header_base_t base;
int16_t tag;
uint16_t tag;
uint32_t count;
uint64_t len;
uint64_t displacement;
@ -114,7 +114,7 @@ typedef struct ompi_osc_rdma_header_get_acc_t ompi_osc_rdma_header_get_acc_t;
struct ompi_osc_rdma_header_cswap_t {
ompi_osc_rdma_header_base_t base;
int16_t tag;
uint16_t tag;
uint32_t len;
uint64_t displacement;

Просмотреть файл

@ -113,7 +113,7 @@ static inline int ompi_osc_rdma_lock_self (ompi_osc_rdma_module_t *module, ompi_
module->shared_count++;
}
lock->lock_acks_received = 1;
lock->lock_acks_received++;
} else {
/* queue the lock */
queue_lock (module, my_rank, lock->type, lock->serial_number);
@ -415,7 +415,7 @@ int ompi_osc_rdma_unlock_all (struct ompi_win_t *win)
/* reset all fragment counters */
memset (module->epoch_outgoing_frag_count, 0, ompi_comm_size(module->comm) * sizeof (module->epoch_outgoing_frag_count[0]));
memset (module->passive_eager_send_active, 0, ompi_comm_size(module->comm) * module->passive_eager_send_active[0]);
memset (module->passive_eager_send_active, 0, ompi_comm_size(module->comm) * sizeof (module->passive_eager_send_active[0]));
opal_list_remove_item (&module->outstanding_locks, &lock->super);
OBJ_RELEASE(lock);
@ -477,6 +477,12 @@ static int ompi_osc_rdma_flush_lock (ompi_osc_rdma_module_t *module, ompi_osc_rd
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
/* start all sendreqs to target */
ret = ompi_osc_rdma_frag_flush_target (module, i);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
} else {
flush_req.frag_count = module->epoch_outgoing_frag_count[target];
@ -486,12 +492,12 @@ static int ompi_osc_rdma_flush_lock (ompi_osc_rdma_module_t *module, ompi_osc_rd
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
/* start all sendreqs to target */
ret = ompi_osc_rdma_frag_flush_all (module);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
/* start all sendreqs to target */
ret = ompi_osc_rdma_frag_flush_target (module, target);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
/* wait for all the requests and the flush ack (meaning remote completion) */
@ -521,9 +527,7 @@ int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
"ompi_osc_rdma_flush starting..."));
if (ompi_comm_rank (module->comm) == target) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"calling opal_progress. incoming complete = %d",
module->frag_request->req_complete));
/* nothing to flush */
opal_progress ();
return OMPI_SUCCESS;
}
@ -665,7 +669,7 @@ static inline int activate_lock (ompi_osc_rdma_module_t *module, int requestor,
}
}
lock->lock_acks_received = 1;
lock->lock_acks_received++;
opal_condition_broadcast (&module->cond);
return OMPI_SUCCESS;

Просмотреть файл

@ -198,8 +198,8 @@ ompi_osc_sm_rget_accumulate(void *origin_addr,
if (OMPI_SUCCESS != ret || op == &ompi_mpi_op_no_op.op) goto done;
if (op == &ompi_mpi_op_replace.op) {
return ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt,
remote_address, target_count, target_dt);
ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt,
remote_address, target_count, target_dt);
} else {
ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt,
remote_address, target_count, target_dt,
@ -354,8 +354,8 @@ ompi_osc_sm_get_accumulate(void *origin_addr,
if (OMPI_SUCCESS != ret || op == &ompi_mpi_op_no_op.op) goto done;
if (op == &ompi_mpi_op_replace.op) {
return ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt,
remote_address, target_count, target_dt);
ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt,
remote_address, target_count, target_dt);
} else {
ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt,
remote_address, target_count, target_dt,

Просмотреть файл

@ -291,8 +291,12 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
for (i = 0, total = state_size ; i < ompi_comm_size(comm) ; ++i) {
module->sizes[i] = rbuf[i];
module->bases[i] = ((char *) module->segment_base) + total;
total += rbuf[i];
if (module->sizes[i]) {
module->bases[i] = ((char *) module->segment_base) + total;
total += rbuf[i];
} else {
module->bases[i] = NULL;
}
}
free(rbuf);

Просмотреть файл

@ -1,5 +1,8 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -22,8 +25,10 @@ lk_fetch_add32(ompi_osc_sm_module_t *module,
size_t offset,
uint32_t delta)
{
/* opal_atomic_add_32 is an add then fetch so delta needs to be subtracted out to get the
* old value */
return opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
delta);
delta) - delta;
}
@ -149,8 +154,10 @@ ompi_osc_sm_unlock(int target,
ret = OMPI_SUCCESS;
} else if (module->outstanding_locks[target] == lock_exclusive) {
ret = end_exclusive(module, target);
module->outstanding_locks[target] = lock_none;
} else if (module->outstanding_locks[target] == lock_shared) {
ret = end_shared(module, target);
module->outstanding_locks[target] = lock_none;
} else {
ret = MPI_ERR_RMA_SYNC;
}