1
1

Merge pull request #4552 from hjelmn/asm_cleanup2

Add atomic fetch-and-op and compare-exchange functions
Этот коммит содержится в:
bosilca 2017-11-30 22:29:38 -05:00 коммит произвёл GitHub
родитель 0fcc996c41 7893248c5a
Коммит 5cb72aa568
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
110 изменённых файлов: 1534 добавлений и 1466 удалений

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -378,7 +378,7 @@ int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data,
* a read only memory).
*/
if( NULL != pArgs ) {
OPAL_THREAD_ADD32(&pArgs->ref_count, 1);
OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, 1);
dest_data->args = pArgs;
}
return OMPI_SUCCESS;
@ -396,7 +396,7 @@ int32_t ompi_datatype_release_args( ompi_datatype_t* pData )
ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args;
assert( 0 < pArgs->ref_count );
OPAL_THREAD_ADD32(&pArgs->ref_count, -1);
OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, -1);
if( 0 == pArgs->ref_count ) {
/* There are some duplicated datatypes around that have a pointer to this
* args. We will release them only when the last datatype will dissapear.
@ -487,7 +487,8 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype,
void* recursive_buffer;
if (NULL == packed_description) {
if (opal_atomic_bool_cmpset (&datatype->packed_description, NULL, (void *) 1)) {
void *_tmp_ptr = NULL;
if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (void *) &_tmp_ptr, (void *) 1)) {
if( ompi_datatype_is_predefined(datatype) ) {
packed_description = malloc(2 * sizeof(int));
} else if( NULL == args ) {

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -356,7 +356,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group,
ompi_proc_t *real_proc =
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc));
if (opal_atomic_bool_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) {
if (opal_atomic_compare_exchange_strong_ptr (group->grp_proc_pointers + peer_id, &proc, real_proc)) {
OBJ_RETAIN(real_proc);
}

Просмотреть файл

@ -314,7 +314,7 @@ libnbc_module_destruct(ompi_coll_libnbc_module_t *module)
/* if we ever were used for a collective op, do the progress cleanup. */
if (true == module->comm_registered) {
int32_t tmp =
OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, -1);
OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, -1);
if (0 == tmp) {
opal_progress_unregister(ompi_coll_libnbc_progress);
}

Просмотреть файл

@ -618,7 +618,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t
/* register progress */
if (need_register) {
int32_t tmp =
OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, 1);
OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, 1);
if (tmp == 1) {
opal_progress_register(ompi_coll_libnbc_progress);
}

Просмотреть файл

@ -120,7 +120,7 @@ static int
mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
{
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
if( 1 == opal_atomic_add_32(&monitoring_module->is_initialized, 1) ) {
if( 1 == opal_atomic_add_fetch_32(&monitoring_module->is_initialized, 1) ) {
MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm);
monitoring_module->data = mca_common_monitoring_coll_new(comm);
OPAL_MONITORING_PRINT_INFO("coll_module_enabled");
@ -132,7 +132,7 @@ static int
mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
{
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
if( 0 == opal_atomic_sub_32(&monitoring_module->is_initialized, 1) ) {
if( 0 == opal_atomic_sub_fetch_32(&monitoring_module->is_initialized, 1) ) {
MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm);
mca_common_monitoring_coll_release(monitoring_module->data);
monitoring_module->data = NULL;

Просмотреть файл

@ -68,7 +68,7 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&module->coll_count, 1);
internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1);
/*
** DATATYPE and SIZES

Просмотреть файл

@ -44,7 +44,7 @@ barrier_hypercube_top(struct ompi_communicator_t *comm,
request->type = OMPI_COLL_PORTALS4_TYPE_BARRIER;
count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
ret = PtlCTAlloc(mca_coll_portals4_component.ni_h,
&request->u.barrier.rtr_ct_h);

Просмотреть файл

@ -176,7 +176,7 @@ bcast_kary_tree_top(void *buff, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
/*
@ -513,7 +513,7 @@ bcast_pipeline_top(void *buff, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
/*
** DATATYPE and SIZES

Просмотреть файл

@ -582,7 +582,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
/* Setup Common Parameters */
/**********************************/
request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( comm, portals4_module, request->u.gather.root_rank );
bmtree = portals4_module->cached_in_order_bmtree;
@ -879,7 +879,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank);
request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
ret = setup_gather_buffers_linear(comm, request, portals4_module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }

Просмотреть файл

@ -69,7 +69,7 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&module->coll_count, 1);
internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1);
/*
** DATATYPE and SIZES

Просмотреть файл

@ -399,7 +399,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank);
request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
request->u.scatter.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
ret = setup_scatter_buffers_linear(comm, request, portals4_module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }

Просмотреть файл

@ -358,7 +358,7 @@ extern uint32_t mca_coll_sm_one;
* Macro to release an in-use flag from this process
*/
#define FLAG_RELEASE(flag) \
(void)opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1)
opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1)
/**
* Macro to copy a single segment in from a user buffer to a shared

Просмотреть файл

@ -101,7 +101,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm,
if (0 != rank) {
/* Get parent *in* buffer */
parent = &data->mcb_barrier_control_parent[buffer_set];
(void)opal_atomic_add(parent, 1);
opal_atomic_add (parent, 1);
SPIN_CONDITION(0 != *me_out, exit_label2);
*me_out = 0;

Просмотреть файл

@ -463,7 +463,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
OBJ_RETAIN(sm_module->previous_reduce_module);
/* Indicate that we have successfully attached and setup */
(void)opal_atomic_add(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
opal_atomic_add (&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
/* Wait for everyone in this communicator to attach and setup */
opal_output_verbose(10, ompi_coll_base_framework.framework_output,

Просмотреть файл

@ -209,7 +209,7 @@ static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar,
int mca_common_monitoring_init( void )
{
if( !mca_common_monitoring_enabled ) return OMPI_ERROR;
if( 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */
if( 1 < opal_atomic_add_fetch_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */
char hostname[OPAL_MAXHOSTNAMELEN] = "NA";
/* Initialize constant */
@ -229,7 +229,7 @@ int mca_common_monitoring_init( void )
void mca_common_monitoring_finalize( void )
{
if( ! mca_common_monitoring_enabled || /* Don't release if not last */
0 < opal_atomic_sub_32(&mca_common_monitoring_hold, 1) ) return;
0 < opal_atomic_sub_fetch_32(&mca_common_monitoring_hold, 1) ) return;
OPAL_MONITORING_PRINT_INFO("common_component_finish");
/* Dump monitoring informations */
@ -503,21 +503,21 @@ void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag)
/* Keep tracks of the data_size distribution */
if( 0 == data_size ) {
opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram], 1);
opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram], 1);
} else {
int log2_size = log10(data_size)/log10_2;
if(log2_size > max_size_histogram - 2) /* Avoid out-of-bound write */
log2_size = max_size_histogram - 2;
opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1);
opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1);
}
/* distinguishses positive and negative tags if requested */
if( (tag < 0) && (mca_common_monitoring_filter()) ) {
opal_atomic_add_size_t(&filtered_pml_data[world_rank], data_size);
opal_atomic_add_size_t(&filtered_pml_count[world_rank], 1);
opal_atomic_add_fetch_size_t(&filtered_pml_data[world_rank], data_size);
opal_atomic_add_fetch_size_t(&filtered_pml_count[world_rank], 1);
} else { /* if filtered monitoring is not activated data is aggregated indifferently */
opal_atomic_add_size_t(&pml_data[world_rank], data_size);
opal_atomic_add_size_t(&pml_count[world_rank], 1);
opal_atomic_add_fetch_size_t(&pml_data[world_rank], data_size);
opal_atomic_add_fetch_size_t(&pml_count[world_rank], 1);
}
}
@ -564,11 +564,11 @@ void mca_common_monitoring_record_osc(int world_rank, size_t data_size,
if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */
if( SEND == dir ) {
opal_atomic_add_size_t(&osc_data_s[world_rank], data_size);
opal_atomic_add_size_t(&osc_count_s[world_rank], 1);
opal_atomic_add_fetch_size_t(&osc_data_s[world_rank], data_size);
opal_atomic_add_fetch_size_t(&osc_count_s[world_rank], 1);
} else {
opal_atomic_add_size_t(&osc_data_r[world_rank], data_size);
opal_atomic_add_size_t(&osc_count_r[world_rank], 1);
opal_atomic_add_fetch_size_t(&osc_data_r[world_rank], data_size);
opal_atomic_add_fetch_size_t(&osc_count_r[world_rank], 1);
}
}
@ -650,8 +650,8 @@ void mca_common_monitoring_record_coll(int world_rank, size_t data_size)
{
if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */
opal_atomic_add_size_t(&coll_data[world_rank], data_size);
opal_atomic_add_size_t(&coll_count[world_rank], 1);
opal_atomic_add_fetch_size_t(&coll_data[world_rank], data_size);
opal_atomic_add_fetch_size_t(&coll_count[world_rank], 1);
}
static int mca_common_monitoring_get_coll_count(const struct mca_base_pvar_t *pvar,

Просмотреть файл

@ -236,8 +236,8 @@ void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data
return;
}
#endif /* OPAL_ENABLE_DEBUG */
opal_atomic_add_size_t(&data->o2a_size, size);
opal_atomic_add_size_t(&data->o2a_count, 1);
opal_atomic_add_fetch_size_t(&data->o2a_size, size);
opal_atomic_add_fetch_size_t(&data->o2a_count, 1);
}
int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar,
@ -277,8 +277,8 @@ void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data
return;
}
#endif /* OPAL_ENABLE_DEBUG */
opal_atomic_add_size_t(&data->a2o_size, size);
opal_atomic_add_size_t(&data->a2o_count, 1);
opal_atomic_add_fetch_size_t(&data->a2o_size, size);
opal_atomic_add_fetch_size_t(&data->a2o_count, 1);
}
int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar,
@ -318,8 +318,8 @@ void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data
return;
}
#endif /* OPAL_ENABLE_DEBUG */
opal_atomic_add_size_t(&data->a2a_size, size);
opal_atomic_add_size_t(&data->a2a_count, 1);
opal_atomic_add_fetch_size_t(&data->a2a_size, size);
opal_atomic_add_fetch_size_t(&data->a2a_count, 1);
}
int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar,

Просмотреть файл

@ -1,7 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -296,9 +296,10 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me,
int
ompi_mtl_portals4_flowctl_trigger(void)
{
int32_t _tmp_value = 0;
int ret;
if (true == OPAL_ATOMIC_BOOL_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) {
if (true == OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ompi_mtl_portals4.flowctl.flowctl_active, &_tmp_value, 1)) {
/* send trigger to root */
ret = PtlPut(ompi_mtl_portals4.zero_md_h,
0,
@ -346,7 +347,7 @@ start_recover(void)
int64_t epoch_counter;
ompi_mtl_portals4.flowctl.flowctl_active = true;
epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);
epoch_counter = opal_atomic_add_fetch_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"Entering flowctl_start_recover %ld",

Просмотреть файл

@ -53,14 +53,14 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target,
int32_t frag_count;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
while (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
ompi_mtl_portals4_progress();
}
#endif
frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl;
ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count);
ret = OPAL_THREAD_ADD_FETCH32(&(request->pending_reply), frag_count);
for (i = 0 ; i < frag_count ; i++) {
opal_free_list_item_t *tmp;
@ -385,14 +385,14 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev,
opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag,
&rndv_get_frag->super);
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1);
ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_reply), -1);
if (ret > 0) {
return OMPI_SUCCESS;
}
assert(ptl_request->pending_reply == 0);
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
#endif
/* make sure the data is in the right place. Use _ucount for
@ -468,7 +468,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
ptl_request->super.type = portals4_req_recv;
ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
#if OPAL_ENABLE_DEBUG
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->hdr_data = 0;
#endif
ptl_request->buffer_ptr = (free_after) ? start : NULL;
@ -549,7 +549,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
}
#if OPAL_ENABLE_DEBUG
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->hdr_data = 0;
#endif
ptl_request->super.type = portals4_req_recv;

Просмотреть файл

@ -45,7 +45,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
(ompi_mtl_portals4_isend_request_t*) ptl_base_request;
if (PTL_EVENT_GET == ev->type) {
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1);
ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1);
if (ret > 0) {
/* wait for other gets */
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret));
@ -94,7 +94,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
ompi_mtl_portals4_flowctl_trigger();
return OMPI_SUCCESS;
@ -124,7 +124,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
if ((eager == ompi_mtl_portals4.protocol) ||
(ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) {
val = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1);
val = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1);
}
if (0 == val) {
add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */
@ -161,7 +161,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
ptl_request->me_h = PTL_INVALID_HANDLE;
add++;
}
val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add);
val = OPAL_THREAD_ADD_FETCH32((int32_t*)&ptl_request->event_count, add);
assert(val <= 3);
if (val == 3) {
@ -174,7 +174,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
*complete = true;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_free_list_return (&ompi_mtl_portals4.flowctl.pending_fl,
&ptl_request->pending->super);
@ -422,15 +422,15 @@ ompi_mtl_portals4_pending_list_progress()
while ((!ompi_mtl_portals4.flowctl.flowctl_active) &&
(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
val = OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1);
val = OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1);
if (val < 0) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
return;
}
item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends);
if (OPAL_UNLIKELY(NULL == item)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
return;
}
@ -456,7 +456,7 @@ ompi_mtl_portals4_pending_list_progress()
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
}
}
}
@ -492,7 +492,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
if (OMPI_SUCCESS != ret) return ret;
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1);
ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*)&ompi_mtl_portals4.opcount, 1);
ptl_request->buffer_ptr = (free_after) ? start : NULL;
ptl_request->length = length;
ptl_request->event_count = 0;
@ -520,15 +520,15 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
pending->ptl_proc = ptl_proc;
pending->ptl_request = ptl_request;
if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
if (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
return OMPI_SUCCESS;
}
if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
ompi_mtl_portals4_pending_list_progress();
@ -536,7 +536,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
}
if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
return OMPI_SUCCESS;

Просмотреть файл

@ -61,7 +61,7 @@
static inline void* \
ompi_osc_monitoring_## template ##_set_template (ompi_osc_base_module_t*module) \
{ \
if( 1 == opal_atomic_add_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \
if( 1 == opal_atomic_add_fetch_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \
/* Saves the original module functions in \
* ompi_osc_monitoring_module_## template ##_template \
*/ \

Просмотреть файл

@ -99,7 +99,7 @@ ompi_osc_portals4_complete(struct ompi_win_t *win)
PTL_SUM,
PTL_INT32_T);
if (ret != OMPI_SUCCESS) return ret;
OPAL_THREAD_ADD64(&module->opcount, 1);
OPAL_THREAD_ADD_FETCH64(&module->opcount, 1);
}
ret = ompi_osc_portals4_complete_all(module);
@ -144,7 +144,7 @@ ompi_osc_portals4_post(struct ompi_group_t *group,
PTL_SUM,
PTL_INT32_T);
if (ret != OMPI_SUCCESS) return ret;
OPAL_THREAD_ADD64(&module->opcount, 1);
OPAL_THREAD_ADD_FETCH64(&module->opcount, 1);
}
} else {
module->post_group = NULL;

Просмотреть файл

@ -206,7 +206,7 @@ segmentedPut(int64_t *opcount,
ptl_size_t bytes_put = 0;
do {
opal_atomic_add_64(opcount, 1);
opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(put_length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -222,7 +222,7 @@ segmentedPut(int64_t *opcount,
user_ptr,
hdr_data);
if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1);
opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlPut failed with return value %d",
__FUNCTION__, __LINE__, ret);
@ -251,7 +251,7 @@ segmentedGet(int64_t *opcount,
ptl_size_t bytes_gotten = 0;
do {
opal_atomic_add_64(opcount, 1);
opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(get_length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -266,7 +266,7 @@ segmentedGet(int64_t *opcount,
target_offset + bytes_gotten,
user_ptr);
if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1);
opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlGet failed with return value %d",
__FUNCTION__, __LINE__, ret);
@ -297,7 +297,7 @@ segmentedAtomic(int64_t *opcount,
ptl_size_t sent = 0;
do {
opal_atomic_add_64(opcount, 1);
opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -315,7 +315,7 @@ segmentedAtomic(int64_t *opcount,
ptl_op,
ptl_dt);
if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1);
opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlAtomic failed with return value %d",
__FUNCTION__, __LINE__, ret);
@ -348,7 +348,7 @@ segmentedFetchAtomic(int64_t *opcount,
ptl_size_t sent = 0;
do {
opal_atomic_add_64(opcount, 1);
opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -367,7 +367,7 @@ segmentedFetchAtomic(int64_t *opcount,
ptl_op,
ptl_dt);
if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1);
opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlFetchAtomic failed with return value %d",
__FUNCTION__, __LINE__, ret);
@ -399,7 +399,7 @@ segmentedSwap(int64_t *opcount,
ptl_size_t sent = 0;
do {
opal_atomic_add_64(opcount, 1);
opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -419,7 +419,7 @@ segmentedSwap(int64_t *opcount,
PTL_SWAP,
ptl_dt);
if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1);
opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlSwap failed with return value %d",
__FUNCTION__, __LINE__, ret);
@ -547,7 +547,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module,
return ret;
}
opal_atomic_add_64(&module->opcount, 1);
opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
@ -564,7 +564,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module,
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d PtlGet() failed: ret = %d",
__FUNCTION__, __LINE__, ret));
opal_atomic_add_64(&module->opcount, -1);
opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
@ -716,7 +716,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module,
return ret;
}
opal_atomic_add_64(&module->opcount, 1);
opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
@ -735,7 +735,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module,
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d PtlPut() failed: ret = %d",
__FUNCTION__, __LINE__, ret));
opal_atomic_add_64(&module->opcount, -1);
opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
@ -1252,7 +1252,7 @@ put_to_noncontig(int64_t *opcount,
/* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(opcount, 1);
opal_atomic_add_fetch_64(opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1270,7 +1270,7 @@ put_to_noncontig(int64_t *opcount,
user_ptr,
0);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(opcount, -1);
opal_atomic_add_fetch_64(opcount, -1);
return ret;
}
@ -1361,7 +1361,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1);
opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1379,7 +1379,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
user_ptr,
0);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(&module->opcount, -1);
opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
@ -1479,7 +1479,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */
atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max);
opal_atomic_add_64(&module->opcount, 1);
opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1501,7 +1501,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module,
ptl_op,
ptl_dt);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(&module->opcount, -1);
opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
@ -1586,7 +1586,7 @@ get_from_noncontig(int64_t *opcount,
/* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(opcount, 1);
opal_atomic_add_fetch_64(opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1602,7 +1602,7 @@ get_from_noncontig(int64_t *opcount,
offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
user_ptr);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(opcount, -1);
opal_atomic_add_fetch_64(opcount, -1);
return ret;
}
@ -1687,7 +1687,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1);
opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1703,7 +1703,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
user_ptr);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(&module->opcount, -1);
opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
@ -1817,7 +1817,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1);
opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
@ -1844,7 +1844,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module,
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlSwap failed with return value %d",
__FUNCTION__, __LINE__, ret);
opal_atomic_add_64(&module->opcount, -1);
opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
@ -1969,7 +1969,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1);
opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
@ -1995,7 +1995,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlFetchAtomic failed with return value %d",
__FUNCTION__, __LINE__, ret);
opal_atomic_add_64(&module->opcount, -1);
opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
@ -2411,7 +2411,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
do {
size_t msg_length = MIN(module->atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Atomic", __FUNCTION__, __LINE__));
@ -2428,7 +2428,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
ptl_op,
ptl_dt);
if (OMPI_SUCCESS != ret) {
(void)opal_atomic_add_64(&module->opcount, -1);
(void)opal_atomic_add_fetch_64(&module->opcount, -1);
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
@ -3149,7 +3149,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
do {
size_t msg_length = MIN(module->atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Atomic", __FUNCTION__, __LINE__));
@ -3166,7 +3166,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
ptl_op,
ptl_dt);
if (OMPI_SUCCESS != ret) {
(void)opal_atomic_add_64(&module->opcount, -1);
(void)opal_atomic_add_fetch_64(&module->opcount, -1);
return ret;
}
sent += msg_length;
@ -3541,7 +3541,7 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__));
@ -3613,7 +3613,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__));
ret = PtlSwap(module->md_h,
@ -3635,7 +3635,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
md_offset = (ptl_size_t) result_addr;
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Get", __FUNCTION__, __LINE__));
ret = PtlGet(module->md_h,
@ -3648,7 +3648,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
NULL);
} else {
ptl_size_t result_md_offset, origin_md_offset;
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) {

Просмотреть файл

@ -230,8 +230,8 @@ process:
}
req = (ompi_osc_portals4_request_t*) ev.user_ptr;
opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength);
ops = opal_atomic_add_32(&req->ops_committed, 1);
opal_atomic_add_fetch_size_t(&req->super.req_status._ucount, ev.mlength);
ops = opal_atomic_add_fetch_32(&req->ops_committed, 1);
if (ops == req->ops_expected) {
ompi_request_complete(&req->super, true);
}

Просмотреть файл

@ -43,7 +43,7 @@ lk_cas64(ompi_osc_portals4_module_t *module,
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = PtlSwap(module->md_h,
(ptl_size_t) result_val,
@ -76,7 +76,7 @@ lk_write64(ompi_osc_portals4_module_t *module,
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = PtlPut(module->md_h,
(ptl_size_t) &write_val,
@ -106,7 +106,7 @@ lk_add64(ompi_osc_portals4_module_t *module,
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
(void)opal_atomic_add_64(&module->opcount, 1);
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = PtlFetchAtomic(module->md_h,
(ptl_size_t) result_val,

Просмотреть файл

@ -8,7 +8,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
@ -145,15 +145,11 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer
static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value)
{
int32_t peer_flags, new_flags;
do {
peer_flags = peer->flags;
if (value) {
new_flags = peer_flags | flag;
} else {
new_flags = peer_flags & ~flag;
}
} while (!OPAL_ATOMIC_BOOL_CMPSET_32 (&peer->flags, peer_flags, new_flags));
if (value) {
OPAL_ATOMIC_OR_FETCH32 (&peer->flags, flag);
} else {
OPAL_ATOMIC_AND_FETCH32 (&peer->flags, ~flag);
}
}
static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value)
@ -518,7 +514,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking active incoming complete. module %p, count = %d",
(void *) module, (int) module->active_incoming_frag_count + 1));
new_value = OPAL_THREAD_ADD32(&module->active_incoming_frag_count, 1);
new_value = OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, 1);
if (new_value >= 0) {
OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast(&module->cond);
@ -530,7 +526,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking passive incoming complete. module %p, source = %d, count = %d",
(void *) module, source, (int) peer->passive_incoming_frag_count + 1));
new_value = OPAL_THREAD_ADD32((int32_t *) &peer->passive_incoming_frag_count, 1);
new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &peer->passive_incoming_frag_count, 1);
if (0 == new_value) {
OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast(&module->cond);
@ -554,7 +550,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
*/
static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module)
{
int32_t new_value = OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, 1);
int32_t new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, 1);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_outgoing_completion: outgoing_frag_count = %d", new_value));
if (new_value >= 0) {
@ -578,12 +574,12 @@ static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module)
*/
static inline void ompi_osc_signal_outgoing (ompi_osc_pt2pt_module_t *module, int target, int count)
{
OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, -count);
OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, -count);
if (MPI_PROC_NULL != target) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target,
count, module->epoch_outgoing_frag_count[target] + count));
OPAL_THREAD_ADD32((int32_t *) (module->epoch_outgoing_frag_count + target), count);
OPAL_THREAD_ADD_FETCH32((int32_t *) (module->epoch_outgoing_frag_count + target), count);
}
}
@ -721,7 +717,7 @@ static inline int get_tag(ompi_osc_pt2pt_module_t *module)
/* the LSB of the tag is used be the receiver to determine if the
message is a passive or active target (ie, where to mark
completion). */
int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->tag_counter, 4);
int32_t tmp = OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &module->tag_counter, 4);
return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch);
}

Просмотреть файл

@ -183,7 +183,7 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
incoming_reqs));
/* set our complete condition for incoming requests */
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -incoming_reqs);
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -incoming_reqs);
/* wait for completion */
while (module->outgoing_frag_count < 0 || module->active_incoming_frag_count < 0) {
@ -272,7 +272,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"found unexpected post from %d",
peer->rank));
OPAL_THREAD_ADD32 (&sync->sync_expected, -1);
OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
ompi_osc_pt2pt_peer_set_unex (peer, false);
}
}
@ -574,12 +574,12 @@ void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, i
frag_count, module->active_incoming_frag_count, module->num_complete_msgs));
/* the current fragment is not part of the frag_count so we need to add it here */
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -frag_count);
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -frag_count);
/* make sure the signal count is written before changing the complete message count */
opal_atomic_wmb ();
if (0 == OPAL_THREAD_ADD32(&module->num_complete_msgs, 1)) {
if (0 == OPAL_THREAD_ADD_FETCH32(&module->num_complete_msgs, 1)) {
OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast (&module->cond);
OPAL_THREAD_UNLOCK(&module->lock);

Просмотреть файл

@ -62,7 +62,7 @@ static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request)
/* update the cbdata for ompi_osc_pt2pt_comm_complete */
request->req_complete_cb_data = pt2pt_request->module;
if (0 == OPAL_THREAD_ADD32(&pt2pt_request->outstanding_requests, -1)) {
if (0 == OPAL_THREAD_ADD_FETCH32(&pt2pt_request->outstanding_requests, -1)) {
ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR);
}

Просмотреть файл

@ -667,7 +667,7 @@ static int accumulate_cb (ompi_request_t *request)
rank = acc_data->peer;
}
if (0 == OPAL_THREAD_ADD32(&acc_data->request_count, -1)) {
if (0 == OPAL_THREAD_ADD_FETCH32(&acc_data->request_count, -1)) {
/* no more requests needed before the buffer can be accumulated */
if (acc_data->source) {
@ -716,9 +716,9 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os
/* NTH: ensure we don't leave wait/process_flush/etc until this
* accumulate operation is complete. */
if (active_target) {
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -1);
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -1);
} else {
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1);
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
}
pending_acc->active_target = active_target;
@ -1353,7 +1353,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source,
"process_flush header = {.frag_count = %d}", flush_header->frag_count));
/* increase signal count by incoming frags */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count);
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: process_flush: received message from %d. passive_incoming_frag_count = %d",
@ -1372,7 +1372,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source,
}
/* signal incomming will increment this counter */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1);
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
return sizeof (*flush_header);
}
@ -1387,7 +1387,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source,
"process_unlock header = {.frag_count = %d}", unlock_header->frag_count));
/* increase signal count by incoming frags */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count);
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: processing unlock request from %d. frag count = %d, processed_count = %d",
@ -1406,7 +1406,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source,
}
/* signal incoming will increment this counter */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1);
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
return sizeof (*unlock_header);
}

Просмотреть файл

@ -1,7 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -105,8 +105,8 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, om
"osc pt2pt: flushing active fragment to target %d. pending: %d",
active_frag->target, active_frag->pending));
if (opal_atomic_bool_cmpset (&peer->active_frag, active_frag, NULL)) {
if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) {
if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &active_frag, NULL)) {
if (0 != OPAL_THREAD_ADD_FETCH32(&active_frag->pending, -1)) {
/* communication going on while synchronizing; this is an rma usage bug */
return OMPI_ERR_RMA_SYNC;
}

Просмотреть файл

@ -51,7 +51,7 @@ static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_frag_t* buffer)
{
opal_atomic_wmb ();
if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) {
if (0 == OPAL_THREAD_ADD_FETCH32(&buffer->pending, -1)) {
opal_atomic_mb ();
return ompi_osc_pt2pt_frag_start(module, buffer);
}
@ -67,7 +67,7 @@ static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (omp
/* to ensure ordering flush the buffer on the peer */
curr = peer->active_frag;
if (NULL != curr && opal_atomic_bool_cmpset (&peer->active_frag, curr, NULL)) {
if (NULL != curr && opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &curr, NULL)) {
/* If there's something pending, the pending finish will
start the buffer. Otherwise, we need to start it now. */
int ret = ompi_osc_pt2pt_frag_finish (module, curr);
@ -142,11 +142,11 @@ static inline int _ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, i
curr->pending_long_sends = long_send;
peer->active_frag = curr;
} else {
OPAL_THREAD_ADD32(&curr->header->num_ops, 1);
OPAL_THREAD_ADD_FETCH32(&curr->header->num_ops, 1);
curr->pending_long_sends += long_send;
}
OPAL_THREAD_ADD32(&curr->pending, 1);
OPAL_THREAD_ADD_FETCH32(&curr->pending, 1);
} else {
curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len);
if (OPAL_UNLIKELY(NULL == curr)) {

Просмотреть файл

@ -64,7 +64,7 @@ static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, omp
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock);
if (!acquired) {
@ -91,7 +91,7 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank);
int lock_type = lock->sync.lock.type;
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -99,9 +99,9 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
"ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status));
if (MPI_LOCK_EXCLUSIVE == lock_type) {
OPAL_THREAD_ADD32(&module->lock_status, 1);
OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
ompi_osc_pt2pt_activate_next_lock (module);
} else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) {
} else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
ompi_osc_pt2pt_activate_next_lock (module);
}
@ -128,7 +128,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp
return OMPI_SUCCESS;
}
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -145,7 +145,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp
ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_THREAD_ADD32(&lock->sync_expected, -1);
OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, -1);
} else {
ompi_osc_pt2pt_peer_set_locked (peer, true);
}
@ -163,7 +163,7 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_header_unlock_t unlock_req;
int ret;
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -207,7 +207,7 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module,
int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1);
int ret;
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -744,14 +744,13 @@ static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, in
break;
}
if (opal_atomic_bool_cmpset_32 (&module->lock_status, lock_status, lock_status + 1)) {
if (opal_atomic_compare_exchange_strong_32 (&module->lock_status, &lock_status, lock_status + 1)) {
break;
}
lock_status = module->lock_status;
} while (1);
} else {
queue = !opal_atomic_bool_cmpset_32 (&module->lock_status, 0, -1);
int32_t _tmp_value = 0;
queue = !opal_atomic_compare_exchange_strong_32 (&module->lock_status, &_tmp_value, -1);
}
if (queue) {
@ -909,9 +908,9 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source,
}
if (-1 == module->lock_status) {
OPAL_THREAD_ADD32(&module->lock_status, 1);
OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
ompi_osc_pt2pt_activate_next_lock (module);
} else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) {
} else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
ompi_osc_pt2pt_activate_next_lock (module);
}

Просмотреть файл

@ -166,7 +166,7 @@ static inline void ompi_osc_pt2pt_sync_wait_expected (ompi_osc_pt2pt_sync_t *syn
static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync)
{
int32_t new_value = OPAL_THREAD_ADD32 (&sync->sync_expected, -1);
int32_t new_value = OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
if (0 == new_value) {
OPAL_THREAD_LOCK(&sync->lock);
if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) {

Просмотреть файл

@ -516,7 +516,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
subreq->internal = true;
subreq->parent_request = request;
if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
}
if (result_datatype) {
@ -557,7 +557,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
if (request) {
/* release our reference so the request can complete */
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
}
if (source_datatype) {

Просмотреть файл

@ -8,7 +8,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
@ -285,7 +285,9 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result);
assert (OMPI_SUCCESS == ret);
} else {
result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank);
ompi_osc_rdma_lock_t _tmp_value = 0;
result = !ompi_osc_rdma_lock_compare_exchange ((osc_rdma_counter_t *) target, &_tmp_value, 1 + (osc_rdma_counter_t) my_rank);
}
if (OPAL_LIKELY(0 == result)) {

Просмотреть файл

@ -217,7 +217,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
subreq->parent_request = request;
if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
}
} else if (!alloc_reqs) {
subreq = request;
@ -232,7 +232,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) {
if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
}
if (alloc_reqs) {
@ -266,7 +266,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
ompi_osc_rdma_request_complete (request, OMPI_SUCCESS);
}
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
}
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)");
@ -551,7 +551,7 @@ static int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
/* increment the outstanding request counter in the request object */
if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
cbcontext = (void *) ((intptr_t) request | 1);
request->sync = sync;
} else {
@ -643,12 +643,12 @@ static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_
subreq->internal = true;
subreq->type = OMPI_OSC_RDMA_TYPE_RDMA;
subreq->parent_request = request;
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OMPI_OSC_RDMA_REQUEST_RETURN(subreq);
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
}
return ret;

Просмотреть файл

@ -35,7 +35,7 @@ static inline void ompi_osc_rdma_cleanup_rdma (ompi_osc_rdma_sync_t *sync, ompi_
}
if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
}
ompi_osc_rdma_sync_rdma_dec (sync);

Просмотреть файл

@ -37,7 +37,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_rdma_frag_t);
static inline void ompi_osc_rdma_frag_complete (ompi_osc_rdma_frag_t *frag)
{
if (0 == OPAL_THREAD_ADD32(&frag->pending, -1)) {
if (0 == OPAL_THREAD_ADD_FETCH32(&frag->pending, -1)) {
opal_atomic_rmb ();
ompi_osc_rdma_deregister (frag->module, frag->handle);
@ -113,7 +113,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size
curr->top += request_len;
curr->remain_len -= request_len;
OPAL_THREAD_ADD32(&curr->pending, 1);
OPAL_THREAD_ADD_FETCH32(&curr->pending, 1);
OPAL_THREAD_UNLOCK(&module->lock);

Просмотреть файл

@ -17,7 +17,8 @@
static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock)
{
return !ompi_osc_rdma_lock_cmpset (lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
ompi_osc_rdma_lock_t _tmp_value = 0;
return !ompi_osc_rdma_lock_compare_exchange (lock, &_tmp_value, OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
}
static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock)

Просмотреть файл

@ -201,14 +201,13 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer,
int32_t flags;
opal_atomic_mb ();
flags = peer->flags;
do {
flags = peer->flags;
if (flags & flag) {
return false;
}
} while (!OPAL_THREAD_BOOL_CMPSET_32 (&peer->flags, flags, flags | flag));
} while (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&peer->flags, &flags, flags | flag));
return true;
}
@ -221,7 +220,7 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer,
*/
static inline void ompi_osc_rdma_peer_clear_flag (ompi_osc_rdma_peer_t *peer, int flag)
{
OPAL_ATOMIC_AND32(&peer->flags, ~flag);
OPAL_ATOMIC_AND_FETCH32(&peer->flags, ~flag);
opal_atomic_mb ();
}

Просмотреть файл

@ -48,7 +48,7 @@ static int request_complete (struct ompi_request_t *request)
{
ompi_osc_rdma_request_t *parent_request = ((ompi_osc_rdma_request_t *) request)->parent_request;
if (parent_request && 0 == OPAL_THREAD_ADD32 (&parent_request->outstanding_requests, -1)) {
if (parent_request && 0 == OPAL_THREAD_ADD_FETCH32 (&parent_request->outstanding_requests, -1)) {
ompi_osc_rdma_request_complete (parent_request, OMPI_SUCCESS);
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -25,7 +25,7 @@ typedef int64_t osc_rdma_base_t;
typedef int64_t osc_rdma_size_t;
typedef int64_t osc_rdma_counter_t;
#define ompi_osc_rdma_counter_add opal_atomic_add_64
#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_64
#else
@ -33,7 +33,7 @@ typedef int32_t osc_rdma_base_t;
typedef int32_t osc_rdma_size_t;
typedef int32_t osc_rdma_counter_t;
#define ompi_osc_rdma_counter_add opal_atomic_add_32
#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_32
#endif
@ -48,18 +48,18 @@ static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value
int64_t new;
opal_atomic_mb ();
new = opal_atomic_add_64 (p, value) - value;
new = opal_atomic_add_fetch_64 (p, value) - value;
opal_atomic_mb ();
return new;
}
static inline int ompi_osc_rdma_lock_cmpset (volatile int64_t *p, int64_t comp, int64_t value)
static inline int ompi_osc_rdma_lock_compare_exchange (volatile int64_t *p, int64_t *comp, int64_t value)
{
int ret;
opal_atomic_mb ();
ret = opal_atomic_bool_cmpset_64 (p, comp, value);
ret = opal_atomic_compare_exchange_strong_64 (p, comp, value);
opal_atomic_mb ();
return ret;
@ -76,19 +76,19 @@ static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value
int32_t new;
opal_atomic_mb ();
/* opal_atomic_add_32 differs from normal atomics in that is returns the new value */
new = opal_atomic_add_32 (p, value) - value;
/* opal_atomic_add_fetch_32 differs from normal atomics in that is returns the new value */
new = opal_atomic_add_fetch_32 (p, value) - value;
opal_atomic_mb ();
return new;
}
static inline int ompi_osc_rdma_lock_cmpset (volatile int32_t *p, int32_t comp, int32_t value)
static inline int ompi_osc_rdma_lock_compare_exchange (volatile int32_t *p, int32_t *comp, int32_t value)
{
int ret;
opal_atomic_mb ();
ret = opal_atomic_bool_cmpset_32 (p, comp, value);
ret = opal_atomic_compare_exchange_strong_32 (p, comp, value);
opal_atomic_mb ();
return ret;

Просмотреть файл

@ -1,7 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
@ -130,10 +130,11 @@ ompi_osc_sm_start(struct ompi_group_t *group,
ompi_osc_sm_module_t *module =
(ompi_osc_sm_module_t*) win->w_osc_module;
int my_rank = ompi_comm_rank (module->comm);
void *_tmp_ptr = NULL;
OBJ_RETAIN(group);
if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, NULL, group)) {
if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) {
OBJ_RELEASE(group);
return OMPI_ERR_RMA_SYNC;
}
@ -150,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group,
for (int i = 0 ; i < size ; ++i) {
int rank_byte = ranks[i] >> OSC_SM_POST_BITS;
osc_sm_post_type_t old, rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);
osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);
/* wait for rank to post */
while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
@ -160,9 +161,11 @@ ompi_osc_sm_start(struct ompi_group_t *group,
opal_atomic_rmb ();
do {
old = module->posts[my_rank][rank_byte];
} while (!opal_atomic_bool_cmpset ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit));
#if OPAL_HAVE_ATOMIC_MATH_64
(void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit);
#else
(void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit);
#endif
}
free (ranks);
@ -185,7 +188,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
opal_atomic_mb();
group = module->start_group;
if (NULL == group || !OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, group, NULL)) {
if (NULL == group || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, &group, NULL)) {
return OMPI_ERR_RMA_SYNC;
}
@ -198,7 +201,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
gsize = ompi_group_size(group);
for (int i = 0 ; i < gsize ; ++i) {
(void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1);
(void) opal_atomic_add_fetch_32(&module->node_states[ranks[i]].complete_count, 1);
}
free (ranks);
@ -244,7 +247,7 @@ ompi_osc_sm_post(struct ompi_group_t *group,
gsize = ompi_group_size(module->post_group);
for (int i = 0 ; i < gsize ; ++i) {
(void) opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
}
opal_atomic_wmb ();

Просмотреть файл

@ -26,9 +26,9 @@ lk_fetch_add32(ompi_osc_sm_module_t *module,
size_t offset,
uint32_t delta)
{
/* opal_atomic_add_32 is an add then fetch so delta needs to be subtracted out to get the
/* opal_atomic_add_fetch_32 is an add then fetch so delta needs to be subtracted out to get the
* old value */
return opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
return opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
delta) - delta;
}
@ -39,7 +39,7 @@ lk_add32(ompi_osc_sm_module_t *module,
size_t offset,
uint32_t delta)
{
opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
delta);
}

Просмотреть файл

@ -81,7 +81,7 @@ int mca_pml_base_bsend_init(bool thread_safe)
{
size_t tmp;
if(OPAL_THREAD_ADD32(&mca_pml_bsend_init, 1) > 1)
if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init, 1) > 1)
return OMPI_SUCCESS;
/* initialize static objects */
@ -109,7 +109,7 @@ int mca_pml_base_bsend_init(bool thread_safe)
*/
int mca_pml_base_bsend_fini(void)
{
if(OPAL_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0)
if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init,-1) > 0)
return OMPI_SUCCESS;
if(NULL != mca_pml_bsend_allocator)

Просмотреть файл

@ -261,7 +261,7 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
*/
#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \
if (sendreq->req_state == -1) { \
OPAL_THREAD_ADD32(&sendreq->req_state, 1); \
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, 1); \
}
/* Now check the error state. This request can be in error if the

Просмотреть файл

@ -328,7 +328,7 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl,
* protocol has req_state == 0 and as such should not be
* decremented.
*/
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
}
if(send_request_pml_complete_check(sendreq) == false)

Просмотреть файл

@ -206,7 +206,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
(void *) des->des_remote,
des->des_remote_count, 0);
}
OPAL_THREAD_SUB_SIZE_T(&recvreq->req_pipeline_depth, 1);
OPAL_THREAD_SUB_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1);
#if PML_BFO
btl->btl_free(btl, des);
@ -217,7 +217,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
#endif /* PML_BFO */
/* check completion status */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) {
/* schedule additional rdma operations */
@ -388,7 +388,7 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
#endif /* PML_BFO */
/* is receive request complete */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
recv_request_pml_complete_check(recvreq);
MCA_PML_BFO_RDMA_FRAG_RETURN(frag);
@ -506,7 +506,7 @@ void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq
recvreq->req_recv.req_base.req_datatype);
);
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */
if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) {
@ -668,7 +668,7 @@ void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq
recvreq->req_recv.req_base.req_datatype);
);
}
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */
if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) {
@ -903,7 +903,7 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
#endif /* PML_BFO */
/* update request state */
recvreq->req_rdma_offset += size;
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1);
recvreq->req_rdma[rdma_idx].length -= size;
bytes_remaining -= size;
} else {

Просмотреть файл

@ -70,12 +70,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t);
static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
{
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1;
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1;
}
static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
{
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0;
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
}
/**

Просмотреть файл

@ -207,10 +207,10 @@ mca_pml_bfo_rndv_completion_request( mca_bml_base_btl_t* bml_btl,
&(sendreq->req_send.req_base), PERUSE_SEND );
}
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
/* advance the request */
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
send_request_pml_complete_check(sendreq);
@ -287,7 +287,7 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
(void *) des->des_local,
des->des_local_count, 0);
if (OPAL_LIKELY(0 < req_bytes_delivered)) {
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
}
send_request_pml_complete_check(sendreq);
@ -360,8 +360,8 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
des->des_local_count,
sizeof(mca_pml_bfo_frag_hdr_t));
OPAL_THREAD_SUB_SIZE_T(&sendreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
OPAL_THREAD_SUB_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
#if PML_BFO
MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
@ -1164,7 +1164,7 @@ cannot_pack:
range->range_btls[btl_idx].length -= size;
range->range_send_length -= size;
range->range_send_offset += size;
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1);
if(range->range_send_length == 0) {
range = get_next_send_range(sendreq, range);
prev_bytes_remaining = 0;
@ -1226,7 +1226,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
#endif /* PML_BFO */
/* check for request completion */
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
send_request_pml_complete_check(sendreq);
@ -1335,7 +1335,7 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
size_t i, size = 0;
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
}
#if PML_BFO
MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq);

Просмотреть файл

@ -78,12 +78,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t);
static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq)
{
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1;
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1;
}
static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq)
{
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0;
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0;
}
static inline void
@ -445,7 +445,7 @@ mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq )
sendreq->req_pipeline_depth = 0;
sendreq->req_bytes_delivered = 0;
sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD_FETCH32(
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
#if PML_BFO
sendreq->req_restartseq = 0; /* counts up restarts */

Просмотреть файл

@ -151,7 +151,7 @@ int mca_pml_ob1_isend(const void *buf,
}
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) {
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
}
if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
@ -220,7 +220,7 @@ int mca_pml_ob1_send(const void *buf,
}
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) {
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
}
/**

Просмотреть файл

@ -56,7 +56,7 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void)
static int mca_pml_ob1_progress_needed = 0;
int mca_pml_ob1_enable_progress(int32_t count)
{
int32_t progress_count = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count);
int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count);
if( 1 < progress_count )
return 0; /* progress was already on */
@ -119,7 +119,7 @@ int mca_pml_ob1_progress(void)
}
if( 0 != completed_requests ) {
j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests);
j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests);
if( 0 == j ) {
opal_progress_unregister(mca_pml_ob1_progress);
}

Просмотреть файл

@ -445,7 +445,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
* protocol has req_state == 0 and as such should not be
* decremented.
*/
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
}
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */

Просмотреть файл

@ -190,7 +190,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, -1);
OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, -1);
assert ((uint64_t) rdma_size == frag->rdma_length);
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
@ -198,7 +198,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
if (OPAL_LIKELY(0 < rdma_size)) {
/* check completion status */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, rdma_size);
if (recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) {
/* schedule additional rdma operations */
@ -373,7 +373,7 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_
}
} else {
/* is receive request complete */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
/* TODO: re-add order */
mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc,
bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag,
@ -524,7 +524,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
recvreq->req_recv.req_base.req_datatype);
);
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */
if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) {
@ -601,7 +601,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
* known that the data has been copied out of the descriptor. */
des->des_cbfunc(NULL, NULL, des, 0);
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */
if(recv_request_pml_complete_check(recvreq) == false &&
@ -815,7 +815,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq
recvreq->req_recv.req_base.req_count,
recvreq->req_recv.req_base.req_datatype);
);
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
}
/* check completion status */
if(recv_request_pml_complete_check(recvreq) == false &&
@ -1024,7 +1024,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
/* update request state */
recvreq->req_rdma_offset += size;
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, 1);
recvreq->req_rdma[rdma_idx].length -= size;
bytes_remaining -= size;
} else {

Просмотреть файл

@ -64,12 +64,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t);
static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
{
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1;
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1;
}
static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
{
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0;
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
}
/**

Просмотреть файл

@ -205,10 +205,10 @@ mca_pml_ob1_rndv_completion_request( mca_bml_base_btl_t* bml_btl,
&(sendreq->req_send.req_base), PERUSE_SEND );
}
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
/* advance the request */
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
send_request_pml_complete_check(sendreq);
@ -261,7 +261,7 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length)
/* count bytes of user data actually delivered and check for request completion */
if (OPAL_LIKELY(0 < rdma_length)) {
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
}
send_request_pml_complete_check(sendreq);
@ -313,8 +313,8 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
des->des_segment_count,
sizeof(mca_pml_ob1_frag_hdr_t));
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, -1);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
if(send_request_pml_complete_check(sendreq) == false) {
mca_pml_ob1_send_request_schedule(sendreq);
@ -1044,7 +1044,7 @@ cannot_pack:
range->range_btls[btl_idx].length -= size;
range->range_send_length -= size;
range->range_send_offset += size;
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1);
if(range->range_send_length == 0) {
range = get_next_send_range(sendreq, range);
prev_bytes_remaining = 0;
@ -1060,7 +1060,7 @@ cannot_pack:
range->range_btls[btl_idx].length -= size;
range->range_send_length -= size;
range->range_send_offset += size;
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1);
if(range->range_send_length == 0) {
range = get_next_send_range(sendreq, range);
prev_bytes_remaining = 0;
@ -1126,7 +1126,7 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b
0, 0);
/* check for request completion */
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
send_request_pml_complete_check(sendreq);
} else {
@ -1200,7 +1200,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_rdma_frag_t* frag;
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) {
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
}
sendreq->req_recv.pval = hdr->hdr_recv_req.pval;

Просмотреть файл

@ -76,12 +76,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_range_t);
static inline bool lock_send_request(mca_pml_ob1_send_request_t *sendreq)
{
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1;
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1;
}
static inline bool unlock_send_request(mca_pml_ob1_send_request_t *sendreq)
{
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0;
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0;
}
static inline void
@ -485,7 +485,7 @@ mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
return OMPI_ERR_UNREACH;
}
seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
seqn = OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn);
}

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
@ -100,6 +100,8 @@ int ompi_request_default_wait_any(size_t count,
num_requests_null_inactive = 0;
for (i = 0; i < count; i++) {
void *_tmp_ptr = REQUEST_PENDING;
request = requests[i];
/* Check for null or completed persistent request. For
@ -110,7 +112,7 @@ int ompi_request_default_wait_any(size_t count,
continue;
}
if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) {
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync) ) {
assert(REQUEST_COMPLETE(request));
completed = i;
*index = i;
@ -136,6 +138,8 @@ int ompi_request_default_wait_any(size_t count,
* user.
*/
for(i = completed-1; (i+1) > 0; i--) {
void *tmp_ptr = &sync;
request = requests[i];
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -146,7 +150,7 @@ int ompi_request_default_wait_any(size_t count,
* Otherwise, the request has been completed meanwhile, and it
* has been atomically marked as REQUEST_COMPLETE.
*/
if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &tmp_ptr, REQUEST_PENDING) ) {
*index = i;
}
}
@ -211,6 +215,8 @@ int ompi_request_default_wait_all( size_t count,
WAIT_SYNC_INIT(&sync, count);
rptr = requests;
for (i = 0; i < count; i++) {
void *_tmp_ptr = REQUEST_PENDING;
request = *rptr++;
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -218,7 +224,7 @@ int ompi_request_default_wait_all( size_t count,
continue;
}
if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) {
if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) {
if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) {
failed++;
}
@ -246,6 +252,8 @@ int ompi_request_default_wait_all( size_t count,
if (MPI_STATUSES_IGNORE != statuses) {
/* fill out status and free request if required */
for( i = 0; i < count; i++, rptr++ ) {
void *_tmp_ptr = &sync;
request = *rptr;
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -260,7 +268,7 @@ int ompi_request_default_wait_all( size_t count,
* mark the request as pending then it is neither failed nor complete, and
* we must stop altering it.
*/
if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) {
if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) {
/*
* Per MPI 2.2 p 60:
* Allows requests to be marked as MPI_ERR_PENDING if they are
@ -306,6 +314,8 @@ int ompi_request_default_wait_all( size_t count,
int rc;
/* free request if required */
for( i = 0; i < count; i++, rptr++ ) {
void *_tmp_ptr = &sync;
request = *rptr;
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -320,7 +330,7 @@ int ompi_request_default_wait_all( size_t count,
/* If the request is still pending due to a failed request
* then skip it in this loop.
*/
if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) {
if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) {
/*
* Per MPI 2.2 p 60:
* Allows requests to be marked as MPI_ERR_PENDING if they are
@ -398,6 +408,8 @@ int ompi_request_default_wait_some(size_t count,
num_requests_null_inactive = 0;
num_requests_done = 0;
for (size_t i = 0; i < count; i++, rptr++) {
void *_tmp_ptr = REQUEST_PENDING;
request = *rptr;
/*
* Check for null or completed persistent request.
@ -407,7 +419,7 @@ int ompi_request_default_wait_some(size_t count,
num_requests_null_inactive++;
continue;
}
indices[i] = OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync);
indices[i] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync);
if( !indices[i] ) {
/* If the request is completed go ahead and mark it as such */
assert( REQUEST_COMPLETE(request) );
@ -434,6 +446,8 @@ int ompi_request_default_wait_some(size_t count,
rptr = requests;
num_requests_done = 0;
for (size_t i = 0; i < count; i++, rptr++) {
void *_tmp_ptr = &sync;
request = *rptr;
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -454,7 +468,7 @@ int ompi_request_default_wait_some(size_t count,
*/
if( !indices[i] ){
indices[num_requests_done++] = i;
} else if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
} else if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING) ) {
indices[num_requests_done++] = i;
}
}

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -396,10 +396,12 @@ static inline int ompi_request_free(ompi_request_t** request)
static inline void ompi_request_wait_completion(ompi_request_t *req)
{
if (opal_using_threads () && !REQUEST_COMPLETE(req)) {
void *_tmp_ptr = REQUEST_PENDING;
ompi_wait_sync_t sync;
WAIT_SYNC_INIT(&sync, 1);
if (OPAL_ATOMIC_BOOL_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) {
SYNC_WAIT(&sync);
} else {
/* completed before we had a chance to swap in the sync object */
@ -439,7 +441,9 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
if (0 == rc) {
if( OPAL_LIKELY(with_signal) ) {
if(!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) {
void *_tmp_ptr = REQUEST_PENDING;
if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) {
ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete,
REQUEST_COMPLETED);
/* In the case where another thread concurrently changed the request to REQUEST_PENDING */

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reseved.
* $COPYRIGHT$
*
@ -76,7 +76,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost;
}
#if OPAL_HAVE_ATOMIC_CMPSET_128
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
/* Add one element to the FIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the
@ -85,14 +85,12 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
opal_list_item_t *item)
{
opal_counted_pointer_t tail;
opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value};
item->opal_list_next = &fifo->opal_fifo_ghost;
do {
tail.value = fifo->opal_fifo_tail.value;
if (opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, item)) {
if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) {
break;
}
} while (1);
@ -102,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
if (&fifo->opal_fifo_ghost == tail.data.item) {
/* update the head */
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value};
opal_update_counted_pointer (&fifo->opal_fifo_head, head, item);
opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item);
} else {
/* update previous item */
tail.data.item->opal_list_next = item;
@ -116,29 +114,28 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
*/
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{
opal_list_item_t *item, *next;
opal_counted_pointer_t head, tail;
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail;
do {
head.value = fifo->opal_fifo_head.value;
tail.value = fifo->opal_fifo_tail.value;
opal_atomic_rmb ();
item = (opal_list_item_t *) head.data.item;
next = (opal_list_item_t *) item->opal_list_next;
if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) {
if (ghost == tail.data.item && ghost == item) {
return NULL;
}
/* the head or next pointer are in an inconsistent state. keep looping. */
if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item &&
&fifo->opal_fifo_ghost == next) {
if (tail.data.item != item && ghost != tail.data.item && ghost == next) {
head.value = fifo->opal_fifo_head.value;
continue;
}
/* try popping the head */
if (opal_update_counted_pointer (&fifo->opal_fifo_head, head, next)) {
if (opal_update_counted_pointer (&fifo->opal_fifo_head, &head, next)) {
break;
}
} while (1);
@ -146,14 +143,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
opal_atomic_wmb ();
/* check for tail and head consistency */
if (&fifo->opal_fifo_ghost == next) {
if (ghost == next) {
/* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */
if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) {
if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, ghost)) {
/* tail was changed by a push operation. wait for the item's next pointer to be se then
* update the head */
/* wait for next pointer to be updated by push */
while (&fifo->opal_fifo_ghost == item->opal_list_next) {
while (ghost == item->opal_list_next) {
opal_atomic_rmb ();
}
@ -166,7 +163,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
head.value = fifo->opal_fifo_head.value;
next = (opal_list_item_t *) item->opal_list_next;
assert (&fifo->opal_fifo_ghost == head.data.item);
assert (ghost == head.data.item);
fifo->opal_fifo_head.data.item = next;
opal_atomic_wmb ();
@ -215,14 +212,14 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
*/
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{
opal_list_item_t *item, *next;
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
#if OPAL_HAVE_ATOMIC_LLSC_PTR
/* use load-linked store-conditional to avoid ABA issues */
do {
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
if (&fifo->opal_fifo_ghost == item) {
if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) {
if (ghost == item) {
if (ghost == fifo->opal_fifo_tail.data.item) {
return NULL;
}
@ -239,7 +236,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
#else
/* protect against ABA issues by "locking" the head */
do {
if (opal_atomic_bool_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) {
if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) {
break;
}
@ -249,7 +246,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
opal_atomic_wmb();
item = opal_fifo_head (fifo);
if (&fifo->opal_fifo_ghost == item) {
if (ghost == item) {
fifo->opal_fifo_head.data.counter = 0;
return NULL;
}
@ -258,9 +255,11 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
fifo->opal_fifo_head.data.item = next;
#endif
if (&fifo->opal_fifo_ghost == next) {
if (!opal_atomic_bool_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) {
while (&fifo->opal_fifo_ghost == item->opal_list_next) {
if (ghost == next) {
void *tmp = item;
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) {
while (ghost == item->opal_list_next) {
opal_atomic_rmb ();
}

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reseved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -36,8 +36,8 @@
BEGIN_C_DECLS
/* NTH: temporarily suppress warnings about this not being defined */
#if !defined(OPAL_HAVE_ATOMIC_CMPSET_128)
#define OPAL_HAVE_ATOMIC_CMPSET_128 0
#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128)
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0
#endif
/**
@ -50,7 +50,7 @@ union opal_counted_pointer_t {
/** list item pointer */
volatile opal_list_item_t * volatile item;
} data;
#if OPAL_HAVE_ATOMIC_CMPSET_128 && HAVE_OPAL_INT128_T
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T
/** used for atomics when there is a cmpset that can operate on
* two 64-bit values */
opal_int128_t value;
@ -59,19 +59,19 @@ union opal_counted_pointer_t {
typedef union opal_counted_pointer_t opal_counted_pointer_t;
#if OPAL_HAVE_ATOMIC_CMPSET_128
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
/* Add one element to the FIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the
* list (if the list was empty before this operation).
*/
static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t old,
static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *old,
opal_list_item_t *item)
{
opal_counted_pointer_t new_p;
new_p.data.item = item;
new_p.data.counter = old.data.counter + 1;
return opal_atomic_bool_cmpset_128 (&addr->value, old.value, new_p.value);
new_p.data.counter = old->data.counter + 1;
return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value);
}
#endif
@ -110,7 +110,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
}
#if OPAL_HAVE_ATOMIC_CMPSET_128
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
/* Add one element to the LIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the
@ -119,14 +119,14 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
opal_list_item_t *item)
{
do {
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
do {
item->opal_list_next = next;
opal_atomic_wmb ();
/* to protect against ABA issues it is sufficient to only update the counter in pop */
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) {
if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) {
return next;
}
/* DO some kind of pause to release the bus */
@ -141,17 +141,17 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
opal_counted_pointer_t old_head;
opal_list_item_t *item;
old_head.data.counter = lifo->opal_lifo_head.data.counter;
opal_atomic_rmb ();
old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
do {
old_head.data.counter = lifo->opal_lifo_head.data.counter;
opal_atomic_rmb ();
old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item;
item = (opal_list_item_t *) old_head.data.item;
if (item == &lifo->opal_lifo_ghost) {
return NULL;
}
if (opal_update_counted_pointer (&lifo->opal_lifo_head, old_head,
if (opal_update_counted_pointer (&lifo->opal_lifo_head, &old_head,
(opal_list_item_t *) item->opal_list_next)) {
opal_atomic_wmb ();
item->opal_list_next = NULL;
@ -169,13 +169,15 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
opal_list_item_t *item)
{
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
/* item free acts as a mini lock to avoid ABA problems */
item->item_free = 1;
do {
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
item->opal_list_next = next;
opal_atomic_wmb();
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) {
if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) {
opal_atomic_wmb ();
/* now safe to pop this item */
item->item_free = 0;
@ -236,8 +238,11 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
*/
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
{
opal_list_item_t *item;
while ((item = (opal_list_item_t *) lifo->opal_lifo_head.data.item) != &lifo->opal_lifo_ghost) {
opal_list_item_t *item, *head, *ghost = &lifo->opal_lifo_ghost;
item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
while (item != ghost) {
/* ensure it is safe to pop the head */
if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) {
continue;
@ -245,14 +250,16 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
opal_atomic_wmb ();
head = item;
/* try to swap out the head pointer */
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, item,
(void *) item->opal_list_next)) {
if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &head,
(void *) item->opal_list_next)) {
break;
}
/* NTH: don't need another atomic here */
item->item_free = 0;
item = head;
/* Do some kind of pause to release the bus */
}

Просмотреть файл

@ -144,7 +144,7 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx)
/* Spot check: ensure this item is only on the list that we
just insertted it into */
(void)opal_atomic_add( &(item->opal_list_item_refcount), 1 );
opal_atomic_add ( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list;
#endif

Просмотреть файл

@ -509,7 +509,7 @@ static inline opal_list_item_t *opal_list_remove_item
#if OPAL_ENABLE_DEBUG
/* Spot check: ensure that this item is still only on one list */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 );
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
assert(0 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = NULL;
#endif
@ -575,7 +575,7 @@ static inline void _opal_list_append(opal_list_t *list, opal_list_item_t *item
/* Spot check: ensure this item is only on the list that we just
appended it to */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 );
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list;
#endif
@ -625,7 +625,7 @@ static inline void opal_list_prepend(opal_list_t *list,
/* Spot check: ensure this item is only on the list that we just
prepended it to */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 );
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list;
#endif
@ -686,7 +686,7 @@ static inline opal_list_item_t *opal_list_remove_first(opal_list_t *list)
/* Spot check: ensure that the item we're returning is now on no
lists */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 );
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
assert(0 == item->opal_list_item_refcount);
#endif
@ -746,7 +746,7 @@ static inline opal_list_item_t *opal_list_remove_last(opal_list_t *list)
/* Spot check: ensure that the item we're returning is now on no
lists */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 );
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
assert(0 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = NULL;
#endif
@ -789,7 +789,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos
/* Spot check: double check that this item is only on the list
that we just added it to */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 );
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list;
#endif

Просмотреть файл

@ -510,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
static inline int opal_obj_update(opal_object_t *object, int inc)
{
return OPAL_THREAD_ADD32(&object->obj_reference_count, inc);
return OPAL_THREAD_ADD_FETCH32(&object->obj_reference_count, inc);
}
END_C_DECLS

Просмотреть файл

@ -210,7 +210,7 @@ void opal_tree_add_child(opal_tree_item_t *parent_item,
/* Spot check: ensure this item is only on the list that we just
appended it to */
OPAL_THREAD_ADD32( &(new_item->opal_tree_item_refcount), 1 );
OPAL_THREAD_ADD_FETCH32( &(new_item->opal_tree_item_refcount), 1 );
assert(1 == new_item->opal_tree_item_refcount);
new_item->opal_tree_item_belong_to = new_item->opal_tree_container;
#endif

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2010 ARM ltd. All rights reserved.
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -104,12 +107,12 @@ void opal_atomic_isync(void)
#if (OPAL_GCC_INLINE_ASSEMBLY && (OPAL_ASM_ARM_VERSION >= 6))
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
int32_t ret, tmp;
int32_t prev, tmp;
bool ret;
__asm__ __volatile__ (
"1: ldrex %0, [%2] \n"
@ -120,11 +123,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
" bne 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
/* these two functions aren't inlined in the non-gcc case because then
@ -132,51 +137,50 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
bool rc;
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval);
rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
}
#if (OPAL_ASM_SUPPORT_64BIT == 1)
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
int64_t ret;
int tmp;
int64_t prev;
int tmp;
bool ret;
__asm__ __volatile__ (
"1: ldrexd %0, %H0, [%2] \n"
" cmp %0, %3 \n"
" it eq \n"
" cmpeq %H0, %H3 \n"
" bne 2f \n"
" strexd %1, %4, %H4, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
"2: \n"
__asm__ __volatile__ (
"1: ldrexd %0, %H0, [%2] \n"
" cmp %0, %3 \n"
" it eq \n"
" cmpeq %H0, %H3 \n"
" bne 2f \n"
" strexd %1, %4, %H4, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
"2: \n"
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
/* these two functions aren't inlined in the non-gcc case because then
@ -184,91 +188,65 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
bool rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
}
#endif
#define OPAL_HAVE_ATOMIC_ADD_32 1
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc)
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int inc)
{
int32_t t;
int tmp;
int32_t t, old;
int tmp;
__asm__ __volatile__(
"1: ldrex %0, [%2] \n"
" add %0, %0, %3 \n"
" strex %1, %0, [%2] \n"
" cmp %1, #0 \n"
__asm__ __volatile__(
"1: ldrex %1, [%3] \n"
" add %0, %1, %4 \n"
" strex %2, %0, [%3] \n"
" cmp %2, #0 \n"
" bne 1b \n"
: "=&r" (t), "=&r" (tmp)
: "=&r" (t), "=&r" (old), "=&r" (tmp)
: "r" (v), "r" (inc)
: "cc", "memory");
return t;
return old;
}
#define OPAL_HAVE_ATOMIC_SUB_32 1
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec)
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int dec)
{
int32_t t;
int tmp;
int32_t t, old;
int tmp;
__asm__ __volatile__(
"1: ldrex %0, [%2] \n"
" sub %0, %0, %3 \n"
" strex %1, %0, [%2] \n"
" cmp %1, #0 \n"
__asm__ __volatile__(
"1: ldrex %1, [%3] \n"
" sub %0, %1, %4 \n"
" strex %2, %0, [%3] \n"
" cmp %2, #0 \n"
" bne 1b \n"
: "=&r" (t), "=&r" (tmp)
: "=&r" (t), "=&r" (old), "=&r" (tmp)
: "r" (v), "r" (dec)
: "cc", "memory");
return t;
}
#else /* OPAL_ASM_ARM_VERSION <=5 or no GCC inline assembly */
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define __kuser_cmpxchg (*((int (*)(int, int, volatile int*))(0xffff0fc0)))
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
return !(__kuser_cmpxchg(oldval, newval, addr));
}
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
/* kernel function includes all necessary memory barriers */
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
}
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
/* kernel function includes all necessary memory barriers */
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
return t;
}
#endif

Просмотреть файл

@ -29,10 +29,10 @@
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_LLSC_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_ATOMIC_SWAP_64 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
@ -82,10 +82,10 @@ static inline void opal_atomic_isync (void)
*
*********************************************************************/
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
int32_t ret, tmp;
int32_t prev, tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
" cmp %w0, %w3 \n"
@ -93,11 +93,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
" stxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
@ -119,10 +121,10 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
int32_t ret, tmp;
int32_t prev, tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
" cmp %w0, %w3 \n"
@ -130,18 +132,20 @@ static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
" stxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
int32_t ret, tmp;
int32_t prev, tmp;
bool ret;
__asm__ __volatile__ ("1: ldxr %w0, [%2] \n"
" cmp %w0, %w3 \n"
@ -149,11 +153,13 @@ static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
" stlxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
@ -179,11 +185,11 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
return ret == 0;
}
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
int64_t ret;
int64_t prev;
int tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
" cmp %0, %3 \n"
@ -191,11 +197,13 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
" stxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == oldval);
*oldval = prev;
return ret;
}
static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval)
@ -218,11 +226,11 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
int64_t ret;
int64_t prev;
int tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
" cmp %0, %3 \n"
@ -230,19 +238,21 @@ static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
" stxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == oldval);
*oldval = prev;
return ret;
}
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
int64_t ret;
int64_t prev;
int tmp;
bool ret;
__asm__ __volatile__ ("1: ldxr %0, [%2] \n"
" cmp %0, %3 \n"
@ -250,11 +260,13 @@ static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
" stlxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
ret = (prev == oldval);
*oldval = prev;
return ret;
}
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
@ -281,20 +293,20 @@ static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
}
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
{ \
type newval; \
type newval, old; \
int32_t tmp; \
\
__asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \
" " inst " %" reg "0, %" reg "0, %" reg "3 \n" \
" stxr %w1, %" reg "0, [%2] \n" \
" cbnz %w1, 1b \n" \
: "=&r" (newval), "=&r" (tmp) \
__asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \
" " inst " %" reg "0, %" reg "1, %" reg "4 \n" \
" stxr %w2, %" reg "0, [%3] \n" \
" cbnz %w2, 1b \n" \
: "=&r" (newval), "=&r" (old), "=&r" (tmp) \
: "r" (addr), "r" (value) \
: "cc", "memory"); \
\
return newval; \
return old; \
}
OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w")

Просмотреть файл

@ -40,11 +40,11 @@
*
* - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers
* - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
* - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly"
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly"
* - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/compare-exchange can be done "atomicly"
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/compare-exchange can be done "atomicly"
*
* Note that for the Atomic math, atomic add/sub may be implemented as
* C code using opal_atomic_bool_cmpset. The appearance of atomic
* C code using opal_atomic_compare_exchange. The appearance of atomic
* operation will be upheld in these cases.
*/
@ -107,8 +107,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
*********************************************************************/
#if !OPAL_GCC_INLINE_ASSEMBLY
#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 0
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 0
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0
#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0
#define OPAL_HAVE_INLINE_ATOMIC_AND_32 0
#define OPAL_HAVE_INLINE_ATOMIC_OR_32 0
@ -123,8 +123,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
#define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0
#else
#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1
#define OPAL_HAVE_INLINE_ATOMIC_AND_32 1
#define OPAL_HAVE_INLINE_ATOMIC_OR_32 1
@ -187,14 +187,14 @@ enum {
/* compare and set operations can't really be emulated from software,
so if these defines aren't already set, they should be set to 0
now */
#ifndef OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_HAVE_ATOMIC_CMPSET_32 0
#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0
#endif
#ifndef OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_HAVE_ATOMIC_CMPSET_64 0
#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0
#endif
#ifndef OPAL_HAVE_ATOMIC_CMPSET_128
#define OPAL_HAVE_ATOMIC_CMPSET_128 0
#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0
#endif
#ifndef OPAL_HAVE_ATOMIC_LLSC_32
#define OPAL_HAVE_ATOMIC_LLSC_32 0
@ -270,7 +270,7 @@ void opal_atomic_wmb(void);
/**********************************************************************
*
* Atomic spinlocks - always inlined, if have atomic cmpset
* Atomic spinlocks - always inlined, if have atomic compare-and-swap
*
*********************************************************************/
@ -280,7 +280,7 @@ void opal_atomic_wmb(void);
#define OPAL_HAVE_ATOMIC_SPINLOCKS 0
#endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
/**
* Initialize a lock to value
@ -330,7 +330,7 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock);
#if OPAL_HAVE_ATOMIC_SPINLOCKS == 0
#undef OPAL_HAVE_ATOMIC_SPINLOCKS
#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
#define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1
#endif
@ -347,48 +347,48 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock);
#endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
static inline
#endif
bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, int32_t oldval,
int32_t newval);
bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval,
int32_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
static inline
#endif
bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, int32_t oldval,
int32_t newval);
bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval,
int32_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
static inline
#endif
bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval,
int32_t newval);
bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval,
int32_t newval);
#endif
#if !defined(OPAL_HAVE_ATOMIC_CMPSET_64) && !defined(DOXYGEN)
#define OPAL_HAVE_ATOMIC_CMPSET_64 0
#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN)
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0
#endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_64
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
static inline
#endif
bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, int64_t oldval,
int64_t newval);
bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval,
int64_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
static inline
#endif
bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, int64_t oldval,
int64_t newval);
bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval,
int64_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
static inline
#endif
bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval,
int64_t newval);
bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval,
int64_t newval);
#endif
@ -397,45 +397,25 @@ bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval,
#define OPAL_HAVE_ATOMIC_MATH_32 0
#endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_CMPSET_32
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides
a static inline version of it (in assembly). If we have to fall
back on cmpset 32, that too will be inline. */
#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_add_32(volatile int32_t *addr, int delta);
#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value);
#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value);
#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value);
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides
a static inline version of it (in assembly). If we have to fall
back to cmpset 32, that too will be inline. */
#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta);
static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta);
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int delta);
static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value);
static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value);
static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value);
static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value);
static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value);
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value);
static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta);
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int delta);
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */
#if ! OPAL_HAVE_ATOMIC_MATH_32
/* fix up the value of opal_have_atomic_math_32 to allow for C versions */
#undef OPAL_HAVE_ATOMIC_MATH_32
#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#endif
#ifndef OPAL_HAVE_ATOMIC_MATH_64
@ -443,45 +423,24 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta);
#define OPAL_HAVE_ATOMIC_MATH_64 0
#endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_CMPSET_64
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides
a static inline version of it (in assembly). If we have to fall
back to cmpset 64, that too will be inline */
#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta);
#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value);
#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value);
#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value);
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides
a static inline version of it (in assembly). If we have to fall
back to cmpset 64, that too will be inline */
#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta);
static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta);
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta);
static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value);
static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value);
static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value);
static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value);
static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value);
static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta);
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta);
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */
#if ! OPAL_HAVE_ATOMIC_MATH_64
/* fix up the value of opal_have_atomic_math_64 to allow for C versions */
#undef OPAL_HAVE_ATOMIC_MATH_64
#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#endif
/* provide a size_t add/subtract. When in debug mode, make it an
@ -491,114 +450,141 @@ int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta);
*/
#if defined(DOXYGEN) || OPAL_ENABLE_DEBUG
static inline size_t
opal_atomic_add_size_t(volatile size_t *addr, size_t delta)
opal_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta)
{
#if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_add_32((int32_t*) addr, delta);
return (size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_add_64((int64_t*) addr, delta);
return (size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta);
#else
#error "Unknown size_t size"
#endif
}
static inline size_t
opal_atomic_sub_size_t(volatile size_t *addr, size_t delta)
opal_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta)
{
#if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_sub_32((int32_t*) addr, delta);
return (size_t) opal_atomic_fetch_add_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_sub_64((int64_t*) addr, delta);
return (size_t) opal_atomic_fetch_add_64((int64_t*) addr, delta);
#else
#error "Unknown size_t size"
#endif
}
static inline size_t
opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta)
{
#if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta);
#else
#error "Unknown size_t size"
#endif
}
static inline size_t
opal_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta)
{
#if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_fetch_sub_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_fetch_sub_64((int64_t*) addr, delta);
#else
#error "Unknown size_t size"
#endif
}
#else
#if SIZEOF_SIZE_T == 4
#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_32((int32_t*) addr, delta))
#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_32((int32_t*) addr, delta))
#elif SIZEOF_SIZE_T ==8
#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_64((int64_t*) addr, delta))
#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_64((int64_t*) addr, delta))
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((volatile int32_t *) addr, delta))
#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_32((volatile int32_t *) addr, delta))
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((volatile int32_t *) addr, delta))
#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_32((volatile int32_t *) addr, delta))
#elif SIZEOF_SIZE_T == 8
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((volatile int64_t *) addr, delta))
#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_64((volatile int64_t *) addr, delta))
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((volatile int64_t *) addr, delta))
#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_64((volatile int64_t *) addr, delta))
#else
#error "Unknown size_t size"
#endif
#endif
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
/* these are always done with inline functions, so always mark as
static inline */
static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length);
static inline bool opal_atomic_bool_cmpset_acq_xx(volatile void* addr,
int64_t oldval, int64_t newval,
size_t length);
static inline bool opal_atomic_bool_cmpset_rel_xx(volatile void* addr,
int64_t oldval, int64_t newval,
size_t length);
static inline bool opal_atomic_bool_cmpset_ptr(volatile void* addr,
void* oldval,
void* newval);
static inline bool opal_atomic_bool_cmpset_acq_ptr(volatile void* addr,
void* oldval,
void* newval);
static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
void* oldval,
void* newval);
static inline bool opal_atomic_compare_exchange_strong_xx (volatile void *addr, void *oldval,
int64_t newval, size_t length);
static inline bool opal_atomic_compare_exchange_strong_acq_xx (volatile void *addr, void *oldval,
int64_t newval, size_t length);
static inline bool opal_atomic_compare_exchange_strong_rel_xx (volatile void *addr, void *oldval,
int64_t newval, size_t length);
static inline bool opal_atomic_compare_exchange_strong_ptr (volatile void* addr, void *oldval,
void *newval);
static inline bool opal_atomic_compare_exchange_strong_acq_ptr (volatile void* addr, void *oldval,
void *newval);
static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval,
void *newval);
/**
* Atomic compare and set of pointer with relaxed semantics. This
* Atomic compare and set of generic type with relaxed semantics. This
* macro detect at compile time the type of the first argument and
* choose the correct function to be called.
*
* \note This macro should only be used for integer types.
*
* @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>.
* @param oldval Comparison value address of <TYPE>.
* @param newval New value to set if comparision is true <TYPE>.
*
* See opal_atomic_bool_cmpset_* for pseudo-code.
* See opal_atomic_compare_exchange_* for pseudo-code.
*/
#define opal_atomic_bool_cmpset( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_bool_cmpset_xx( (volatile void*)(ADDR), (intptr_t)(OLDVAL), \
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
#define opal_atomic_compare_exchange_strong( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_compare_exchange_strong_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
/**
* Atomic compare and set of pointer with acquire semantics. This
* macro detect at compile time the type of the first argument
* and choose the correct function to be called.
* Atomic compare and set of generic type with acquire semantics. This
* macro detect at compile time the type of the first argument and
* choose the correct function to be called.
*
* \note This macro should only be used for integer types.
*
* @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>.
* @param oldval Comparison value address of <TYPE>.
* @param newval New value to set if comparision is true <TYPE>.
*
* See opal_atomic_bool_cmpset_acq_* for pseudo-code.
* See opal_atomic_compare_exchange_acq_* for pseudo-code.
*/
#define opal_atomic_bool_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_bool_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
#define opal_atomic_compare_exchange_strong_acq( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_compare_exchange_strong_acq_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
/**
* Atomic compare and set of pointer with release semantics. This
* macro detect at compile time the type of the first argument
* and choose the correct function to b
* Atomic compare and set of generic type with release semantics. This
* macro detect at compile time the type of the first argument and
* choose the correct function to be called.
*
* \note This macro should only be used for integer types.
*
* @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>.
* @param oldval Comparison value address of <TYPE>.
* @param newval New value to set if comparision is true <TYPE>.
*
* See opal_atomic_bool_cmpsetrel_* for pseudo-code.
* See opal_atomic_compare_exchange_rel_* for pseudo-code.
*/
#define opal_atomic_bool_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_bool_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
#define opal_atomic_compare_exchange_strong_rel( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_compare_exchange_strong_rel_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */
#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64)
@ -606,15 +592,11 @@ static inline void opal_atomic_add_xx(volatile void* addr,
int32_t value, size_t length);
static inline void opal_atomic_sub_xx(volatile void* addr,
int32_t value, size_t length);
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta );
static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta );
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta );
static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta );
#else
#error Atomic arithmetic on pointers not supported
#endif
static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta );
static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta );
static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta );
static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta );
/**
* Atomically increment the content depending on the type. This

Просмотреть файл

@ -34,20 +34,30 @@
*
* Some architectures do not provide support for the 64 bits
* atomic operations. Until we find a better solution let's just
* undefine all those functions if there is no 64 bit cmpset
* undefine all those functions if there is no 64 bit compare-exchange
*
*********************************************************************/
#if OPAL_HAVE_ATOMIC_CMPSET_32
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
{ \
type oldval; \
do { \
oldval = *addr; \
} while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, oldval operation value)); \
\
return oldval; \
}
#if !defined(OPAL_HAVE_ATOMIC_SWAP_32)
#define OPAL_HAVE_ATOMIC_SWAP_32 1
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr,
int32_t newval)
{
int32_t old;
int32_t old = *addr;
do {
old = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, old, newval));
} while (!opal_atomic_compare_exchange_strong_32 (addr, &old, newval));
return old;
}
@ -55,161 +65,91 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr,
#if !defined(OPAL_HAVE_ATOMIC_ADD_32)
#define OPAL_HAVE_ATOMIC_ADD_32 1
static inline int32_t
opal_atomic_add_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval + delta));
return (oldval + delta);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add)
#endif /* OPAL_HAVE_ATOMIC_ADD_32 */
#if !defined(OPAL_HAVE_ATOMIC_AND_32)
#define OPAL_HAVE_ATOMIC_AND_32 1
static inline int32_t
opal_atomic_and_32(volatile int32_t *addr, int32_t value)
{
int32_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval & value));
return (oldval & value);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and)
#endif /* OPAL_HAVE_ATOMIC_AND_32 */
#if !defined(OPAL_HAVE_ATOMIC_OR_32)
#define OPAL_HAVE_ATOMIC_OR_32 1
static inline int32_t
opal_atomic_or_32(volatile int32_t *addr, int32_t value)
{
int32_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval | value));
return (oldval | value);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or)
#endif /* OPAL_HAVE_ATOMIC_OR_32 */
#if !defined(OPAL_HAVE_ATOMIC_XOR_32)
#define OPAL_HAVE_ATOMIC_XOR_32 1
static inline int32_t
opal_atomic_xor_32(volatile int32_t *addr, int32_t value)
{
int32_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval ^ value));
return (oldval ^ value);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor)
#endif /* OPAL_HAVE_ATOMIC_XOR_32 */
#if !defined(OPAL_HAVE_ATOMIC_SUB_32)
#define OPAL_HAVE_ATOMIC_SUB_32 1
static inline int32_t
opal_atomic_sub_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval - delta));
return (oldval - delta);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub)
#endif /* OPAL_HAVE_ATOMIC_SUB_32 */
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */
#if OPAL_HAVE_ATOMIC_CMPSET_64
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#if !defined(OPAL_HAVE_ATOMIC_SWAP_64)
#define OPAL_HAVE_ATOMIC_SWAP_64 1
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr,
int64_t newval)
{
int64_t old;
int64_t old = *addr;
do {
old = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, old, newval));
} while (!opal_atomic_compare_exchange_strong_64 (addr, &old, newval));
return old;
}
#endif /* OPAL_HAVE_ATOMIC_SWAP_32 */
#if !defined(OPAL_HAVE_ATOMIC_ADD_64)
#define OPAL_HAVE_ATOMIC_ADD_64 1
static inline int64_t
opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval + delta));
return (oldval + delta);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add)
#endif /* OPAL_HAVE_ATOMIC_ADD_64 */
#if !defined(OPAL_HAVE_ATOMIC_AND_64)
#define OPAL_HAVE_ATOMIC_AND_64 1
static inline int64_t
opal_atomic_and_64(volatile int64_t *addr, int64_t value)
{
int64_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval & value));
return (oldval & value);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and)
#endif /* OPAL_HAVE_ATOMIC_AND_64 */
#if !defined(OPAL_HAVE_ATOMIC_OR_64)
#define OPAL_HAVE_ATOMIC_OR_64 1
static inline int64_t
opal_atomic_or_64(volatile int64_t *addr, int64_t value)
{
int64_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval | value));
return (oldval | value);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or)
#endif /* OPAL_HAVE_ATOMIC_OR_64 */
#if !defined(OPAL_HAVE_ATOMIC_XOR_64)
#define OPAL_HAVE_ATOMIC_XOR_64 1
static inline int64_t
opal_atomic_xor_64(volatile int64_t *addr, int64_t value)
{
int64_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval ^ value));
return (oldval ^ value);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor)
#endif /* OPAL_HAVE_ATOMIC_XOR_64 */
#if !defined(OPAL_HAVE_ATOMIC_SUB_64)
#define OPAL_HAVE_ATOMIC_SUB_64 1
static inline int64_t
opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do {
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval - delta));
return (oldval - delta);
}
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub)
#endif /* OPAL_HAVE_ATOMIC_SUB_64 */
#else
@ -222,130 +162,70 @@ opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
#define OPAL_HAVE_ATOMIC_SUB_64 0
#endif
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */
#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
#if (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
static inline bool
opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length)
{
switch( length ) {
#if OPAL_HAVE_ATOMIC_CMPSET_32
case 4:
return opal_atomic_bool_cmpset_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
#if OPAL_HAVE_ATOMIC_CMPSET_64
case 8:
return opal_atomic_bool_cmpset_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
}
abort();
/* This should never happen, so deliberately abort (hopefully
leaving a corefile for analysis) */
}
static inline bool
opal_atomic_bool_cmpset_acq_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length)
{
switch( length ) {
#if OPAL_HAVE_ATOMIC_CMPSET_32
case 4:
return opal_atomic_bool_cmpset_acq_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
#if OPAL_HAVE_ATOMIC_CMPSET_64
case 8:
return opal_atomic_bool_cmpset_acq_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
}
/* This should never happen, so deliberately abort (hopefully
leaving a corefile for analysis) */
abort();
}
static inline bool
opal_atomic_bool_cmpset_rel_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length)
{
switch( length ) {
#if OPAL_HAVE_ATOMIC_CMPSET_32
case 4:
return opal_atomic_bool_cmpset_rel_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
#if OPAL_HAVE_ATOMIC_CMPSET_64
case 8:
return opal_atomic_bool_cmpset_rel_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
}
/* This should never happen, so deliberately abort (hopefully
leaving a corefile for analysis) */
abort();
}
static inline bool
opal_atomic_bool_cmpset_ptr(volatile void* addr,
void* oldval,
void* newval)
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
return opal_atomic_bool_cmpset_32((int32_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
return opal_atomic_bool_cmpset_64((int64_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \
static inline bool \
opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \
int64_t newval, const size_t length) \
{ \
switch (length) { \
case 4: \
return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \
(int32_t *) oldval, (int32_t) newval); \
case 8: \
return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, \
(int64_t *) oldval, (int64_t) newval); \
} \
abort(); \
}
#elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \
static inline bool \
opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \
int64_t newval, const size_t length) \
{ \
switch (length) { \
case 4: \
return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \
(int32_t *) oldval, (int32_t) newval); \
abort(); \
}
#else
abort();
#error "Platform does not have required atomic compare-and-swap functionality"
#endif
}
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_)
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_)
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_)
static inline bool
opal_atomic_bool_cmpset_acq_ptr(volatile void* addr,
void* oldval,
void* newval)
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
return opal_atomic_bool_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
return opal_atomic_bool_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \
static inline bool \
opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \
{ \
return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \
}
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \
static inline bool \
opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \
{ \
return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \
}
#else
abort();
#error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics"
#endif
}
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_)
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_)
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)
static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
void* oldval,
void* newval)
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
return opal_atomic_bool_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
return opal_atomic_bool_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#else
abort();
#endif
}
#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */
#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */
#if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64)
@ -383,20 +263,19 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
#if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64
static inline void
opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length)
opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length)
{
switch( length ) {
#if OPAL_HAVE_ATOMIC_ADD_32
case 4:
opal_atomic_add_32( (volatile int32_t*)addr, (int32_t)value );
(void) opal_atomic_fetch_add_32( (volatile int32_t*)addr, (int32_t)value );
break;
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */
#if OPAL_HAVE_ATOMIC_ADD_64
case 8:
opal_atomic_add_64( (volatile int64_t*)addr, (int64_t)value );
(void) opal_atomic_fetch_add_64( (volatile int64_t*)addr, (int64_t)value );
break;
#endif /* OPAL_HAVE_ATOMIC_ADD_64 */
default:
@ -413,13 +292,13 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
switch( length ) {
#if OPAL_HAVE_ATOMIC_SUB_32
case 4:
opal_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value );
(void) opal_atomic_fetch_sub_32( (volatile int32_t*)addr, (int32_t)value );
break;
#endif /* OPAL_HAVE_ATOMIC_SUB_32 */
#if OPAL_HAVE_ATOMIC_SUB_64
case 8:
opal_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value );
(void) opal_atomic_fetch_sub_64( (volatile int64_t*)addr, (int64_t)value );
break;
#endif /* OPAL_HAVE_ATOMIC_SUB_64 */
default:
@ -429,47 +308,77 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
}
}
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
static inline int32_t opal_atomic_add_ptr( volatile void* addr,
void* delta )
{
return opal_atomic_add_32((int32_t*) addr, (unsigned long) delta);
}
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
static inline int64_t opal_atomic_add_ptr( volatile void* addr,
void* delta )
{
return opal_atomic_add_64((int64_t*) addr, (unsigned long) delta);
}
#else
static inline int32_t opal_atomic_add_ptr( volatile void* addr,
void* delta )
{
abort();
return 0;
}
#define OPAL_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \
static inline type opal_atomic_ ## op ## _fetch_ ## suffix (volatile ptr_type *addr, type value) \
{ \
return opal_atomic_fetch_ ## op ## _ ## suffix (addr, value) operation value; \
}
OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int32_t, int32_t, 32)
OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int32_t, int32_t, 32)
OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32)
OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32)
OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32)
#if OPAL_HAVE_ATOMIC_MATH_64
OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int64_t, int64_t, 64)
OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int64_t, int64_t, 64)
OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64)
OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64)
OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64)
#endif
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
static inline int32_t opal_atomic_sub_ptr( volatile void* addr,
static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr,
void* delta )
{
return opal_atomic_sub_32((int32_t*) addr, (unsigned long) delta);
}
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
static inline int64_t opal_atomic_sub_ptr( volatile void* addr,
void* delta )
{
return opal_atomic_sub_64((int64_t*) addr, (unsigned long) delta);
}
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
return opal_atomic_fetch_add_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
return opal_atomic_fetch_add_64((int64_t*) addr, (unsigned long) delta);
#else
static inline int32_t opal_atomic_sub_ptr( volatile void* addr,
abort ();
return 0;
#endif
}
static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr,
void* delta )
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
return opal_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
return opal_atomic_add_fetch_64((int64_t*) addr, (unsigned long) delta);
#else
abort ();
return 0;
#endif
}
static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr,
void* delta )
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_fetch_sub_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_fetch_sub_64((int64_t*) addr, (unsigned long) delta);
#else
abort();
return 0;
}
#endif
}
static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr,
void* delta )
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_sub_fetch_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_sub_fetch_64((int64_t*) addr, (unsigned long) delta);
#else
abort();
return 0;
#endif
}
#endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */
@ -493,21 +402,20 @@ opal_atomic_lock_init( opal_atomic_lock_t* lock, int32_t value )
static inline int
opal_atomic_trylock(opal_atomic_lock_t *lock)
{
bool ret = opal_atomic_bool_cmpset_acq_32( &(lock->u.lock),
OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED);
return (ret == 0) ? 1 : 0;
int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED;
bool ret = opal_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED);
return (ret == false) ? 1 : 0;
}
static inline void
opal_atomic_lock(opal_atomic_lock_t *lock)
{
while( !opal_atomic_bool_cmpset_acq_32( &(lock->u.lock),
OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED) ) {
while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) {
/* spin */ ;
}
}
while (opal_atomic_trylock (lock)) {
while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) {
/* spin */ ;
}
}
}

Просмотреть файл

@ -33,7 +33,7 @@
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
#define OPAL_HAVE_ATOMIC_AND_32 1
#define OPAL_HAVE_ATOMIC_OR_32 1
@ -41,7 +41,7 @@
#define OPAL_HAVE_ATOMIC_SUB_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_MATH_64 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_ATOMIC_ADD_64 1
#define OPAL_HAVE_ATOMIC_AND_64 1
#define OPAL_HAVE_ATOMIC_OR_64 1
@ -81,26 +81,20 @@ static inline void opal_atomic_wmb(void)
#pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L)
#endif
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
}
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
}
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
}
static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval)
@ -110,51 +104,45 @@ static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newva
return oldval;
}
static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta)
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta)
{
return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED);
return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED);
}
static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value)
static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value)
{
return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED);
return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED);
}
static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value)
static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value)
{
return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED);
return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED);
}
static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value)
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value)
{
return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED);
return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED);
}
static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta)
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta)
{
return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED);
return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED);
}
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
}
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
}
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
}
static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval)
@ -164,52 +152,55 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva
return oldval;
}
static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta)
{
return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED);
return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED);
}
static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value)
static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value)
{
return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED);
return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED);
}
static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value)
static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value)
{
return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED);
return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED);
}
static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value)
static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value)
{
return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED);
return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED);
}
static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta)
{
return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED);
return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED);
}
#if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr,
opal_int128_t oldval, opal_int128_t newval)
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
opal_int128_t *oldval, opal_int128_t newval)
{
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
return __atomic_compare_exchange_n (addr, oldval, newval, false,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
}
#elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
/* __atomic version is not lock-free so use legacy __sync version */
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr,
opal_int128_t oldval, opal_int128_t newval)
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
opal_int128_t *oldval, opal_int128_t newval)
{
return __sync_bool_compare_and_swap (addr, oldval, newval);
opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
}
#endif

Просмотреть файл

@ -40,7 +40,7 @@
*********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
@ -84,15 +84,13 @@ static inline void opal_atomic_isync(void)
*********************************************************************/
#if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
int32_t oldval,
int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
unsigned char ret;
__asm__ __volatile__ (
SMPLOCK "cmpxchgl %3,%2 \n\t"
"sete %0 \n\t"
: "=qm" (ret), "+a" (oldval), "+m" (*addr)
: "=qm" (ret), "+a" (*oldval), "+m" (*addr)
: "q"(newval)
: "memory", "cc");
@ -101,8 +99,8 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32
#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32
#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
#if OPAL_GCC_INLINE_ASSEMBLY
@ -132,7 +130,7 @@ static inline int32_t opal_atomic_swap_32( volatile int32_t *addr,
*
* Atomically adds @i to @v.
*/
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i)
{
int ret = i;
__asm__ __volatile__(
@ -141,7 +139,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
:
:"memory", "cc"
);
return (ret+i);
return ret;
}
@ -152,7 +150,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
*
* Atomically subtracts @i from @v.
*/
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i)
{
int ret = -i;
__asm__ __volatile__(
@ -161,7 +159,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
:
:"memory", "cc"
);
return (ret-i);
return ret;
}
#endif /* OPAL_GCC_INLINE_ASSEMBLY */

Просмотреть файл

@ -40,7 +40,7 @@
*********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_LLSC_32 1
@ -53,7 +53,7 @@
#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_ATOMIC_SWAP_64 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1
#define OPAL_HAVE_ATOMIC_MATH_64 1
@ -144,24 +144,25 @@ void opal_atomic_isync(void)
#define OPAL_ASM_VALUE64(x) x
#endif
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
int32_t ret;
int32_t prev;
bool ret;
__asm__ __volatile__ (
"1: lwarx %0, 0, %2 \n\t"
" cmpw 0, %0, %3 \n\t"
" bne- 2f \n\t"
" stwcx. %4, 0, %2 \n\t"
" bne- 1b \n\t"
"2:"
: "=&r" (ret), "=m" (*addr)
: "r" OPAL_ASM_ADDR(addr), "r" (oldval), "r" (newval), "m" (*addr)
: "cc", "memory");
__asm__ __volatile__ (
"1: lwarx %0, 0, %2 \n\t"
" cmpw 0, %0, %3 \n\t"
" bne- 2f \n\t"
" stwcx. %4, 0, %2 \n\t"
" bne- 1b \n\t"
"2:"
: "=&r" (prev), "=m" (*addr)
: "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr)
: "cc", "memory");
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
@ -195,23 +196,21 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
bool rc;
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval);
rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
}
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
@ -236,20 +235,20 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval
#if OPAL_GCC_INLINE_ASSEMBLY
#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \
static inline int64_t opal_atomic_ ## type ## _64(volatile int64_t* v, int64_t val) \
static inline int64_t opal_atomic_fetch_ ## type ## _64(volatile int64_t* v, int64_t val) \
{ \
int64_t t; \
int64_t t, old; \
\
__asm__ __volatile__( \
"1: ldarx %0, 0, %3 \n\t" \
" " #instr " %0, %2, %0 \n\t" \
" stdcx. %0, 0, %3 \n\t" \
"1: ldarx %1, 0, %4 \n\t" \
" " #instr " %0, %3, %1 \n\t" \
" stdcx. %0, 0, %4 \n\t" \
" bne- 1b \n\t" \
: "=&r" (t), "=m" (*v) \
: "=&r" (t), "=&r" (old), "=m" (*v) \
: "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \
: "cc"); \
\
return t; \
return old; \
}
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add)
@ -258,23 +257,25 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or)
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor)
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf)
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
int64_t ret;
int64_t prev;
bool ret;
__asm__ __volatile__ (
"1: ldarx %0, 0, %2 \n\t"
" cmpd 0, %0, %3 \n\t"
" bne- 2f \n\t"
" stdcx. %4, 0, %2 \n\t"
" bne- 1b \n\t"
"2:"
: "=&r" (ret), "=m" (*addr)
: "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr)
: "cc", "memory");
__asm__ __volatile__ (
"1: ldarx %0, 0, %2 \n\t"
" cmpd 0, %0, %3 \n\t"
" bne- 2f \n\t"
" stdcx. %4, 0, %2 \n\t"
" bne- 1b \n\t"
"2:"
: "=&r" (prev), "=m" (*addr)
: "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr)
: "cc", "memory");
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
@ -303,29 +304,6 @@ static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
return ret;
}
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
bool rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
}
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
{
@ -352,9 +330,9 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval
#if OPAL_GCC_INLINE_ASSEMBLY
static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
int64_t prev;
int ret;
/*
@ -369,67 +347,65 @@ static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr,
* is very similar to the pure 64 bit version.
*/
__asm__ __volatile__ (
"ld r4,%2 \n\t"
"ld r5,%3 \n\t"
"1: ldarx r9, 0, %1 \n\t"
" cmpd 0, r9, r4 \n\t"
"ld r4,%3 \n\t"
"ld r5,%4 \n\t"
"1: ldarx %1, 0, %2 \n\t"
" cmpd 0, %1, r4 \n\t"
" bne- 2f \n\t"
" stdcx. r5, 0, %1 \n\t"
" stdcx. r5, 0, %2 \n\t"
" bne- 1b \n\t"
"2: \n\t"
"xor r5,r4,r9 \n\t"
"xor r5,r4,%1 \n\t"
"subfic r9,r5,0 \n\t"
"adde %0,r9,r5 \n\t"
: "=&r" (ret)
: "=&r" (ret), "+r" (prev)
: "r"OPAL_ASM_ADDR(addr),
"m"(oldval), "m"(newval)
"m"(*oldval), "m"(newval)
: "r4", "r5", "r9", "cc", "memory");
return ret;
}
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
*oldval = prev;
return (bool) ret;
}
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
#endif /* OPAL_ASM_SUPPORT_64BIT */
#if OPAL_GCC_INLINE_ASSEMBLY
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
bool rc;
rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
}
#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \
static inline int32_t opal_atomic_ ## type ## _32(volatile int32_t* v, int val) \
static inline int32_t opal_atomic_fetch_ ## type ## _32(volatile int32_t* v, int val) \
{ \
int32_t t; \
int32_t t, old; \
\
__asm__ __volatile__( \
"1: lwarx %0, 0, %3 \n\t" \
" " #instr " %0, %2, %0 \n\t" \
" stwcx. %0, 0, %3 \n\t" \
"1: lwarx %1, 0, %4 \n\t" \
" " #instr " %0, %3, %1 \n\t" \
" stwcx. %0, 0, %4 \n\t" \
" bne- 1b \n\t" \
: "=&r" (t), "=m" (*v) \
: "=&r" (t), "=&r" (old), "=m" (*v) \
: "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \
: "cc"); \
\

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -38,9 +41,9 @@
*********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
/**********************************************************************
@ -82,50 +85,49 @@ static inline void opal_atomic_isync(void)
*********************************************************************/
#if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
*
* if (*(reg(rs1)) == reg(rs2) )
* swap reg(rd), *(reg(rs1))
* else
* reg(rd) = *(reg(rs1))
*/
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
*
* if (*(reg(rs1)) == reg(rs2) )
* swap reg(rd), *(reg(rs1))
* else
* reg(rd) = *(reg(rs1))
*/
int32_t ret = newval;
int32_t prev = newval;
bool ret;
__asm__ __volatile__("casa [%1] " ASI_P ", %2, %0"
: "+r" (ret)
: "r" (addr), "r" (oldval));
return (ret == oldval);
__asm__ __volatile__("casa [%1] " ASI_P ", %2, %0"
: "+r" (prev)
: "r" (addr), "r" (*oldval));
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
bool rc;
bool rc;
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval);
opal_atomic_rmb();
rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
opal_atomic_rmb();
return rc;
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
opal_atomic_wmb();
return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
}
#if OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
*
@ -134,18 +136,20 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
* else
* reg(rd) = *(reg(rs1))
*/
int64_t ret = newval;
int64_t prev = newval;
bool ret;
__asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0"
: "+r" (ret)
: "r" (addr), "r" (oldval));
return (ret == oldval);
__asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0"
: "+r" (prev)
: "r" (addr), "r" (*oldval));
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
#else /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
*
@ -155,40 +159,41 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
* reg(rd) = *(reg(rs1))
*
*/
long long ret = newval;
int64_t prev = newval;
bool ret;
__asm__ __volatile__(
"ldx %0, %%g1 \n\t" /* g1 = ret */
"ldx %2, %%g2 \n\t" /* g2 = oldval */
"casxa [%1] " ASI_P ", %%g2, %%g1 \n\t"
"stx %%g1, %0 \n"
: "+m"(ret)
: "r"(addr), "m"(oldval)
: "+m"(prev)
: "r"(addr), "m"(*oldval)
: "%g1", "%g2"
);
return (ret == oldval);
ret = (prev == *oldval);
*oldval = prev;
return ret;
}
#endif /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
bool rc;
bool rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
opal_atomic_rmb();
rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
opal_atomic_rmb();
return rc;
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
opal_atomic_wmb();
return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
}
#endif /* OPAL_GCC_INLINE_ASSEMBLY */

Просмотреть файл

@ -53,119 +53,110 @@ static inline void opal_atomic_wmb(void)
*
*********************************************************************/
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);
int32_t prev = __sync_val_compare_and_swap (add, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
}
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);}
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);
}
#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta)
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta)
{
return __sync_add_and_fetch(addr, delta);
return __sync_fetch_and_add(addr, delta);
}
#define OPAL_HAVE_ATOMIC_AND_32 1
static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value)
static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value)
{
return __sync_and_and_fetch(addr, value);
return __sync_fetch_and_and(addr, value);
}
#define OPAL_HAVE_ATOMIC_OR_32 1
static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value)
static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value)
{
return __sync_or_and_fetch(addr, value);
return __sync_fetch_and_or(addr, value);
}
#define OPAL_HAVE_ATOMIC_XOR_32 1
static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value)
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value)
{
return __sync_xor_and_fetch(addr, value);
return __sync_fetch_and_xor(addr, value);
}
#define OPAL_HAVE_ATOMIC_SUB_32 1
static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta)
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta)
{
return __sync_sub_and_fetch(addr, delta);
return __sync_fetch_and_sub(addr, delta);
}
#if OPAL_ASM_SYNC_HAVE_64BIT
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);
int64_t prev = __sync_val_compare_and_swap (add, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
}
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);}
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);
}
#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64
#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64
#define OPAL_HAVE_ATOMIC_MATH_64 1
#define OPAL_HAVE_ATOMIC_ADD_64 1
static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta)
{
return __sync_add_and_fetch(addr, delta);
return __sync_fetch_and_add(addr, delta);
}
#define OPAL_HAVE_ATOMIC_AND_64 1
static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value)
static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value)
{
return __sync_and_and_fetch(addr, value);
return __sync_fetch_and_and(addr, value);
}
#define OPAL_HAVE_ATOMIC_OR_64 1
static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value)
static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value)
{
return __sync_or_and_fetch(addr, value);
return __sync_fetch_and_or(addr, value);
}
#define OPAL_HAVE_ATOMIC_XOR_64 1
static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value)
static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value)
{
return __sync_xor_and_fetch(addr, value);
return __sync_fetch_and_xor(addr, value);
}
#define OPAL_HAVE_ATOMIC_SUB_64 1
static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta)
{
return __sync_sub_and_fetch(addr, delta);
return __sync_fetch_and_sub(addr, delta);
}
#endif
#if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr,
opal_int128_t oldval, opal_int128_t newval)
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
opal_int128_t *oldval, opal_int128_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);
opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
}
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
#endif

Просмотреть файл

@ -40,9 +40,9 @@
*********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
/**********************************************************************
*
@ -82,14 +82,13 @@ static inline void opal_atomic_isync(void)
*********************************************************************/
#if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{
unsigned char ret;
__asm__ __volatile__ (
SMPLOCK "cmpxchgl %3,%2 \n\t"
"sete %0 \n\t"
: "=qm" (ret), "+a" (oldval), "+m" (*addr)
: "=qm" (ret), "+a" (*oldval), "+m" (*addr)
: "q"(newval)
: "memory", "cc");
@ -98,19 +97,18 @@ static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32
#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32
#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
#if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
unsigned char ret;
__asm__ __volatile__ (
SMPLOCK "cmpxchgq %3,%2 \n\t"
"sete %0 \n\t"
: "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr))
: "=qm" (ret), "+a" (*oldval), "+m" (*((volatile long*)addr))
: "q"(newval)
: "memory", "cc"
);
@ -120,13 +118,12 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
#define opal_atomic_bool_cmpset_acq_64 opal_atomic_bool_cmpset_64
#define opal_atomic_bool_cmpset_rel_64 opal_atomic_bool_cmpset_64
#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64
#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64
#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval,
opal_int128_t newval)
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval)
{
unsigned char ret;
@ -135,15 +132,14 @@ static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, op
* at the address is returned in eax:edx. */
__asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t"
"sete %0 \n\t"
: "=qm" (ret)
: "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]),
"a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1])
: "memory", "cc");
: "=qm" (ret), "+a" (((int64_t *)oldval)[0]), "+d" (((int64_t *)oldval)[1])
: "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1])
: "memory", "cc", "eax", "edx");
return (bool) ret;
}
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
@ -200,7 +196,7 @@ static inline int64_t opal_atomic_swap_64( volatile int64_t *addr,
*
* Atomically adds @i to @v.
*/
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i)
{
int ret = i;
__asm__ __volatile__(
@ -209,7 +205,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
:
:"memory", "cc"
);
return (ret+i);
return ret;
}
#define OPAL_HAVE_ATOMIC_ADD_64 1
@ -221,7 +217,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
*
* Atomically adds @i to @v.
*/
static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t* v, int64_t i)
{
int64_t ret = i;
__asm__ __volatile__(
@ -230,7 +226,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
:
:"memory", "cc"
);
return (ret+i);
return ret;
}
#define OPAL_HAVE_ATOMIC_SUB_32 1
@ -242,7 +238,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
*
* Atomically subtracts @i from @v.
*/
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i)
{
int ret = -i;
__asm__ __volatile__(
@ -251,7 +247,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
:
:"memory", "cc"
);
return (ret-i);
return ret;
}
#define OPAL_HAVE_ATOMIC_SUB_64 1
@ -263,7 +259,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
*
* Atomically subtracts @i from @v.
*/
static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i)
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t* v, int64_t i)
{
int64_t ret = -i;
__asm__ __volatile__(
@ -272,7 +268,7 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i)
:
:"memory", "cc"
);
return (ret-i);
return ret;
}
#endif /* OPAL_GCC_INLINE_ASSEMBLY */

Просмотреть файл

@ -1119,7 +1119,7 @@ int mca_btl_openib_add_procs(
}
if (nprocs_new) {
opal_atomic_add_32 (&openib_btl->num_peers, nprocs_new);
opal_atomic_add_fetch_32 (&openib_btl->num_peers, nprocs_new);
/* adjust cq sizes given the new procs */
rc = openib_btl_size_queues (openib_btl);
@ -1229,7 +1229,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
/* this is a new process to this openib btl
* account this procs if need */
opal_atomic_add_32 (&openib_btl->num_peers, 1);
opal_atomic_add_fetch_32 (&openib_btl->num_peers, 1);
rc = openib_btl_size_queues(openib_btl);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs"));

Просмотреть файл

@ -237,7 +237,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg)
/* Set the flag to fatal */
device->got_fatal_event = true;
/* It is not critical to protect the counter */
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1);
/* fall through */
case IBV_EVENT_CQ_ERR:
case IBV_EVENT_QP_FATAL:
@ -280,7 +280,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg)
openib_event_to_str((enum ibv_event_type)event_type));
/* Set the flag to indicate port error */
device->got_port_event = true;
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1);
break;
case IBV_EVENT_COMM_EST:
case IBV_EVENT_PORT_ACTIVE:
@ -470,7 +470,7 @@ void mca_btl_openib_async_fini (void)
void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device)
{
if (mca_btl_openib_component.async_evbase) {
if (1 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, 1)) {
if (1 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, 1)) {
mca_btl_openib_async_init ();
}
opal_event_set (mca_btl_openib_component.async_evbase, &device->async_event,
@ -484,7 +484,7 @@ void mca_btl_openib_async_rem_device (mca_btl_openib_device_t *device)
{
if (mca_btl_openib_component.async_evbase) {
opal_event_del (&device->async_event);
if (0 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, -1)) {
if (0 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, -1)) {
mca_btl_openib_async_fini ();
}
}

Просмотреть файл

@ -3203,7 +3203,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
credits = hdr->credits;
if(hdr->cm_seen)
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
/* Now return fragment. Don't touch hdr after this point! */
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
@ -3215,7 +3215,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(ep, erl->tail);
if(MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(tf))
break;
OPAL_THREAD_ADD32(&erl->credits, 1);
OPAL_THREAD_ADD_FETCH32(&erl->credits, 1);
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(erl->tail);
}
OPAL_THREAD_UNLOCK(&erl->lock);
@ -3233,14 +3233,14 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
MCA_BTL_IB_FRAG_RETURN(frag);
if (BTL_OPENIB_QP_TYPE_PP(rqp)) {
if (OPAL_UNLIKELY(is_credit_msg)) {
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_received, 1);
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_received, 1);
} else {
OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
}
mca_btl_openib_endpoint_post_rr(ep, cqp);
} else {
mca_btl_openib_module_t *btl = ep->endpoint_btl;
OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
mca_btl_openib_post_srr(btl, rqp);
}
}
@ -3251,10 +3251,10 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
/* If we got any credits (RDMA or send), then try to progress all
the no_credits_pending_frags lists */
if (rcredits > 0) {
OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits);
OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits);
}
if (credits > 0) {
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
}
if (rcredits + credits > 0) {
int rc;
@ -3303,7 +3303,7 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
credits = hdr->credits;
if(hdr->cm_seen)
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
/* We should not be here with eager, control, or credit messages */
assert(openib_frag_type(frag) != MCA_BTL_OPENIB_FRAG_EAGER_RDMA);
@ -3314,11 +3314,11 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
/* Otherwise, FRAG_RETURN it and repost if necessary */
MCA_BTL_IB_FRAG_RETURN(frag);
if (BTL_OPENIB_QP_TYPE_PP(rqp)) {
OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
mca_btl_openib_endpoint_post_rr(ep, cqp);
} else {
mca_btl_openib_module_t *btl = ep->endpoint_btl;
OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
mca_btl_openib_post_srr(btl, rqp);
}
@ -3327,10 +3327,10 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
/* If we got any credits (RDMA or send), then try to progress all
the no_credits_pending_frags lists */
if (rcredits > 0) {
OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits);
OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits);
}
if (credits > 0) {
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
}
if (rcredits + credits > 0) {
int rc;
@ -3523,7 +3523,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
case IBV_WC_FETCH_ADD:
OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE"));
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
OPAL_THREAD_ADD_FETCH32(&endpoint->get_tokens, 1);
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
@ -3575,7 +3575,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
n = qp_frag_to_wqe(endpoint, qp, to_com_frag(des));
if(IBV_WC_SEND == wc->opcode && !BTL_OPENIB_QP_TYPE_PP(qp)) {
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n);
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n);
/* new SRQ credit available. Try to progress pending frags*/
progress_pending_frags_srq(openib_btl, qp);
@ -3601,7 +3601,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
wc->byte_len < mca_btl_openib_component.eager_limit &&
openib_btl->eager_rdma_channels <
mca_btl_openib_component.max_eager_rdma &&
OPAL_THREAD_ADD32(&endpoint->eager_recv_count, 1) ==
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_recv_count, 1) ==
mca_btl_openib_component.eager_rdma_threshold) {
mca_btl_openib_endpoint_connect_eager_rdma(endpoint);
}
@ -3934,7 +3934,7 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp)
if(OPAL_LIKELY(0 == rc)) {
struct ibv_srq_attr srq_attr;
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post);
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post);
if(true == openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag) {
srq_attr.max_wr = openib_btl->qps[qp].u.srq_qp.rd_curr_num;

Просмотреть файл

@ -96,7 +96,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \
do { \
(SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \
(SEQ) = OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1; \
(OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \
} while(0)
@ -108,7 +108,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \
do { \
(OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \
(OLD_HEAD) = (OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \
} while(0)
#endif

Просмотреть файл

@ -212,7 +212,7 @@ endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp)
qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */
rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP);
if (0 == rc) {
opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr);
opal_atomic_add_fetch_32 (&ep_qp->qp->sd_wqe, incr);
}
} else {
ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe;
@ -373,11 +373,12 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
/* Release memory resources */
do {
void *_tmp_ptr = NULL;
/* Make sure that mca_btl_openib_endpoint_connect_eager_rdma ()
* was not in "connect" or "bad" flow (failed to allocate memory)
* and changed the pointer back to NULL
*/
if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) {
if(!opal_atomic_compare_exchange_strong_ptr(&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, (void *) 1)) {
if (NULL != endpoint->eager_rdma_local.reg) {
endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache,
&endpoint->eager_rdma_local.reg->base);
@ -766,9 +767,9 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) {
do_rdma = true;
} else {
if(OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) >
if(OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) >
(mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
return;
}
@ -781,7 +782,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
if(cm_return > 255) {
frag->hdr->cm_seen = 255;
cm_return -= 255;
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
} else {
frag->hdr->cm_seen = cm_return;
}
@ -802,14 +803,14 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr);
}
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits,
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.rd_credits,
frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits,
credits_hdr->rdma_credits);
if(do_rdma)
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
else
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
BTL_ERROR(("error posting send request errno %d says %s", rc,
strerror(errno)));
@ -823,7 +824,7 @@ static void mca_btl_openib_endpoint_eager_rdma_connect_cb(
int status)
{
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
OPAL_THREAD_ADD32(&device->non_eager_rdma_endpoints, -1);
OPAL_THREAD_ADD_FETCH32(&device->non_eager_rdma_endpoints, -1);
assert(device->non_eager_rdma_endpoints >= 0);
MCA_BTL_IB_FRAG_RETURN(descriptor);
}
@ -894,12 +895,14 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
mca_btl_openib_recv_frag_t *headers_buf;
int i, rc;
uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS;
void *_tmp_ptr = NULL;
/* Set local rdma pointer to 1 temporarily so other threads will not try
* to enter the function */
if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL,
(void*)1))
if(!opal_atomic_compare_exchange_strong_ptr (&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr,
(void *) 1)) {
return;
}
headers_buf = (mca_btl_openib_recv_frag_t*)
malloc(sizeof(mca_btl_openib_recv_frag_t) *
@ -975,22 +978,23 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1;
/* set local rdma pointer to real value */
(void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
(void*)1, buf);
endpoint->eager_rdma_local.base.pval = buf;
endpoint->eager_rdma_local.alloc_base = alloc_base;
if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) {
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
mca_btl_openib_endpoint_t **p;
void *_tmp_ptr;
OBJ_RETAIN(endpoint);
assert(((opal_object_t*)endpoint)->obj_reference_count == 2);
do {
_tmp_ptr = NULL;
p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count];
} while(!opal_atomic_bool_cmpset_ptr(p, NULL, endpoint));
} while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint));
OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1);
OPAL_THREAD_ADD_FETCH32(&openib_btl->eager_rdma_channels, 1);
/* from this point progress function starts to poll new buffer */
OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1);
OPAL_THREAD_ADD_FETCH32(&device->eager_rdma_buffers_count, 1);
return;
}
@ -1001,8 +1005,7 @@ free_headers_buf:
free(headers_buf);
unlock_rdma_local:
/* set local rdma pointer back to zero. Will retry later */
(void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
endpoint->eager_rdma_local.base.pval, NULL);
endpoint->eager_rdma_local.base.pval = NULL;
endpoint->eager_rdma_local.frags = NULL;
}

Просмотреть файл

@ -277,19 +277,19 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
static inline int32_t qp_get_wqe(mca_btl_openib_endpoint_t *ep, const int qp)
{
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, -1);
return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, -1);
}
static inline int32_t qp_put_wqe(mca_btl_openib_endpoint_t *ep, const int qp)
{
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, 1);
return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, 1);
}
static inline int32_t qp_inc_inflight_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag)
{
frag->n_wqes_inflight = 0;
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe_inflight, 1);
return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe_inflight, 1);
}
static inline void qp_inflight_wqe_to_frag(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag)
@ -303,7 +303,7 @@ static inline int qp_frag_to_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mc
{
int n;
n = frag->n_wqes_inflight;
OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, n);
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, n);
frag->n_wqes_inflight = 0;
return n;
@ -420,15 +420,15 @@ static inline int mca_btl_openib_endpoint_post_rr_nolock(
if((rc = post_recvs(ep, qp, num_post)) != OPAL_SUCCESS) {
return rc;
}
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_posted, num_post);
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_credits, num_post);
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_posted, num_post);
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_credits, num_post);
/* post buffers for credit management on credit management qp */
if((rc = post_recvs(ep, cqp, cm_received)) != OPAL_SUCCESS) {
return rc;
}
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_return, cm_received);
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received);
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_return, cm_received);
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received);
assert(ep->qps[qp].u.pp_qp.rd_credits <= rd_num &&
ep->qps[qp].u.pp_qp.rd_credits >= 0);
@ -446,14 +446,16 @@ static inline int mca_btl_openib_endpoint_post_rr(
return ret;
}
#define BTL_OPENIB_CREDITS_SEND_TRYLOCK(E, Q) \
OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 0, 1)
#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \
OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0)
#define BTL_OPENIB_GET_CREDITS(FROM, TO) \
do { \
TO = FROM; \
} while(0 == OPAL_ATOMIC_BOOL_CMPSET_32(&FROM, TO, 0))
static inline __opal_attribute_always_inline__ bool btl_openib_credits_send_trylock (mca_btl_openib_endpoint_t *ep, int qp)
{
int32_t _tmp_value = 0;
return OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ep->qps[qp].rd_credit_send_lock, &_tmp_value, 1);
}
#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \
OPAL_ATOMIC_SWAP_32 (&(E)->qps[(Q)].rd_credit_send_lock, 0)
#define BTL_OPENIB_GET_CREDITS(FROM, TO) \
TO = OPAL_ATOMIC_SWAP_32(&FROM, 0)
static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep)
@ -486,7 +488,7 @@ static inline void send_credits(mca_btl_openib_endpoint_t *ep, int qp)
return;
try_send:
if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp))
if(btl_openib_credits_send_trylock(ep, qp))
mca_btl_openib_endpoint_send_credits(ep, qp);
}
@ -530,8 +532,8 @@ ib_send_flags(uint32_t size, mca_btl_openib_endpoint_qp_t *qp, int do_signal)
static inline int
acquire_eager_rdma_send_credit(mca_btl_openib_endpoint_t *endpoint)
{
if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
if(OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
return OPAL_ERR_OUT_OF_RESOURCE;
}
@ -636,8 +638,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
prio = !prio;
if (BTL_OPENIB_QP_TYPE_PP(qp)) {
if (OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
if (OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) {
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
if (queue_frag) {
opal_list_append(&endpoint->qps[qp].no_credits_pending_frags[prio],
(opal_list_item_t *)frag);
@ -646,8 +648,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
return OPAL_ERR_OUT_OF_RESOURCE;
}
} else {
if(OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) {
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
if(OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) {
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
if (queue_frag) {
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
opal_list_append(&openib_btl->qps[qp].u.srq_qp.pending_frags[prio],
@ -682,7 +684,7 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
if(cm_return > 255) {
hdr->cm_seen = 255;
cm_return -= 255;
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
} else {
hdr->cm_seen = cm_return;
}
@ -697,18 +699,18 @@ static inline void mca_btl_openib_endpoint_credit_release (struct mca_btl_base_e
mca_btl_openib_header_t *hdr = frag->hdr;
if (BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits));
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits));
}
if (do_rdma) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
} else {
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
OPAL_THREAD_ADD32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits);
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
OPAL_THREAD_ADD_FETCH32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits);
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
} else if BTL_OPENIB_QP_TYPE_SRQ(qp){
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
}
}
}

Просмотреть файл

@ -148,9 +148,9 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
}
/* check for a get token */
if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) {
if (OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,-1) < 0) {
qp_put_wqe(ep, qp);
OPAL_THREAD_ADD32(&ep->get_tokens,1);
OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1);
return OPAL_ERR_OUT_OF_RESOURCE;
}
@ -159,7 +159,7 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) {
qp_put_wqe(ep, qp);
OPAL_THREAD_ADD32(&ep->get_tokens,1);
OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1);
return OPAL_ERROR;
}

Просмотреть файл

@ -423,7 +423,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
curr_proc,
&btl_peer_data[i]);
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, 1);
/* and here we can reach */
opal_bitmap_set_bit(reachable, i);
@ -476,7 +476,7 @@ mca_btl_portals4_del_procs(struct mca_btl_base_module_t *btl,
portals4 entry in proc_endpoints instead of the peer_data */
for (i = 0 ; i < nprocs ; ++i) {
free(btl_peer_data[i]);
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, -1);
}
if (0 == portals4_btl->portals_num_procs)
@ -537,7 +537,7 @@ mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
if (frag->me_h != PTL_INVALID_HANDLE) {
frag->me_h = PTL_INVALID_HANDLE;
}
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag);
@ -622,7 +622,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
return NULL;
}
handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
handle->key = OPAL_THREAD_ADD_FETCH64(&(portals4_btl->portals_rdma_key), 1);
handle->remote_offset = 0;
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
@ -662,7 +662,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
return NULL;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,

Просмотреть файл

@ -609,7 +609,7 @@ mca_btl_portals4_component_progress(void)
mca_btl_portals4_free(&portals4_btl->super, &frag->base);
}
if (0 != frag->size) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
portals4_btl->portals_outstanding_ops));
@ -646,7 +646,7 @@ mca_btl_portals4_component_progress(void)
}
if (0 != frag->size) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
}
@ -749,7 +749,7 @@ mca_btl_portals4_component_progress(void)
OPAL_SUCCESS);
OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
goto done;

Просмотреть файл

@ -53,16 +53,16 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
int ret;
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
mca_btl_portals4_component_progress();
}
OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
if (NULL == frag){
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,

Просмотреть файл

@ -49,9 +49,9 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"Call to mca_btl_portals4_component_progress (4)\n"));
mca_btl_portals4_component_progress();

Просмотреть файл

@ -636,7 +636,7 @@ int mca_btl_smcuda_add_procs(
/* Sync with other local procs. Force the FIFO initialization to always
* happens before the readers access it.
*/
(void)opal_atomic_add_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
(void)opal_atomic_add_fetch_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
while( n_local_procs >
mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) {
opal_progress();
@ -976,7 +976,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
* the return code indicates failure, the write has still "completed" from
* our point of view: it has been posted to a "pending send" queue.
*/
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
(void)rc; /* this is safe to ignore as the message is requeued till success */
@ -1026,7 +1026,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
* post the descriptor in the queue - post with the relative
* address
*/
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
if( OPAL_LIKELY(0 == rc) ) {
@ -1241,7 +1241,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b
* the return code indicates failure, the write has still "completed" from
* our point of view: it has been posted to a "pending send" queue.
*/
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
"Sending CUDA IPC REQ (try=%d): myrank=%d, mydev=%d, peerrank=%d",
endpoint->ipctries,

Просмотреть файл

@ -658,7 +658,7 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl,
* the return code indicates failure, the write has still "completed" from
* our point of view: it has been posted to a "pending send" queue.
*/
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
@ -980,7 +980,7 @@ void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep)
if(NULL == si) return; /* Another thread got in before us. Thats ok. */
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, -1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, -1);
MCA_BTL_SMCUDA_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data,
true, false, rc);
@ -1093,7 +1093,7 @@ int mca_btl_smcuda_component_progress(void)
if( btl_ownership ) {
MCA_BTL_SMCUDA_FRAG_RETURN(frag);
}
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, -1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, -1);
if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) {
btl_smcuda_process_pending_sends(endpoint);
}

Просмотреть файл

@ -40,7 +40,7 @@ add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend)
si = (btl_smcuda_pending_send_item_t*)i;
si->data = data;
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, +1);
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, +1);
/* if data was on pending send list then prepend it to the list to
* minimize reordering */

Просмотреть файл

@ -272,7 +272,7 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,1);
opal_atomic_add_fetch_32(&ugni_module->reg_count,1);
}
return rc;
@ -286,7 +286,7 @@ ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,-1);
opal_atomic_add_fetch_32(&ugni_module->reg_count,-1);
}
return rc;

Просмотреть файл

@ -543,7 +543,7 @@ int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device)
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
ep->dg_posted = false;
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1);
(void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, -1);
}
(void) mca_btl_ugni_ep_connect_progress (ep);

Просмотреть файл

@ -181,7 +181,7 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec
}
} while (device->dev_smsg_local_cq.active_operations);
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
(void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
}
mca_btl_ugni_device_lock (device);
@ -278,7 +278,7 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
(void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
/* send all pending messages */
BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
@ -312,7 +312,7 @@ static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
ep->remote_attr, sizeof (*ep->remote_attr),
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) {
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, 1);
(void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1);
}
return mca_btl_rc_ugni_to_opal (rc);

Просмотреть файл

@ -192,7 +192,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in
opal_atomic_mb ();
ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1);
ref_cnt = OPAL_THREAD_ADD_FETCH32(&frag->ref_cnt, -1);
if (ref_cnt) {
assert (ref_cnt > 0);
return false;

Просмотреть файл

@ -59,12 +59,13 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
mca_btl_ugni_base_frag_t frag;
mca_btl_base_segment_t seg;
bool disconnect = false;
int32_t _tmp_value = 0;
uintptr_t data_ptr;
gni_return_t rc;
uint32_t len;
int count = 0;
if (!opal_atomic_bool_cmpset_32 (&ep->smsg_progressing, 0, 1)) {
if (!opal_atomic_compare_exchange_strong_32 (&ep->smsg_progressing, &_tmp_value, 1)) {
/* already progressing (we can't support reentry here) */
return 0;
}

Просмотреть файл

@ -261,14 +261,14 @@ static inline bool mca_btl_vader_check_fboxes (void)
static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr)
{
if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_SIZE_T (&ep->send_count, 1))) {
if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_FETCH_SIZE_T (&ep->send_count, 1))) {
/* protect access to mca_btl_vader_component.segment_offset */
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size &&
mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) {
/* verify the remote side will accept another fbox */
if (0 <= opal_atomic_add_32 (&ep->fifo->fbox_available, -1)) {
if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset;
mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size;
@ -280,7 +280,7 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc
hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer);
++mca_btl_vader_component.fbox_count;
} else {
opal_atomic_add_32 (&ep->fifo->fbox_available, 1);
opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1);
}
opal_atomic_wmb ();

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2014 Los Alamos National Security, LLC.
* Copyright (c) 2010-2017 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
@ -30,8 +30,9 @@
#include "btl_vader_endpoint.h"
#include "btl_vader_frag.h"
#define vader_item_compare_exchange(x, y, z) opal_atomic_compare_exchange_strong_ptr ((volatile void **) (x), (void **) (y), (void *) (z))
#if SIZEOF_VOID_P == 8
#define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z))
#define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y))
#define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll
@ -40,7 +41,6 @@
typedef int64_t fifo_value_t;
#else
#define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z))
#define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y))
#define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl
@ -138,7 +138,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m
if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) {
opal_atomic_rmb();
if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) {
if (!vader_item_compare_exchange (&fifo->fifo_tail, &value, VADER_FIFO_FREE)) {
while (VADER_FIFO_FREE == hdr->next) {
opal_atomic_rmb ();
}

Просмотреть файл

@ -54,7 +54,7 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
vader_ctx->reg[0] = reg;
if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) {
(void)opal_atomic_add (&reg->ref_count, 1);
opal_atomic_add (&reg->ref_count, 1);
return 1;
}
@ -93,7 +93,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
/* start the new segment from the lower of the two bases */
base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base;
if (OPAL_LIKELY(0 == opal_atomic_add_32 (&reg->ref_count, -1))) {
if (OPAL_LIKELY(0 == opal_atomic_add_fetch_32 (&reg->ref_count, -1))) {
/* this pointer is not in use */
(void) xpmem_detach (reg->rcache_context);
OBJ_RELEASE(reg);
@ -143,7 +143,7 @@ void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
int32_t ref_count;
ref_count = opal_atomic_add_32 (&reg->ref_count, -1);
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
/* protect rcache access */
mca_rcache_base_vma_delete (vma_module, reg);

Просмотреть файл

@ -131,7 +131,7 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp,
}
/* increment the number of processes that are attached to the segment. */
(void)opal_atomic_add_size_t(&map->module_seg->seg_num_procs_inited, 1);
(void)opal_atomic_add_fetch_size_t(&map->module_seg->seg_num_procs_inited, 1);
/* commit the changes before we return */
opal_atomic_wmb();

Просмотреть файл

@ -131,7 +131,7 @@ void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep)
if (huge_page->path) {
int32_t count;
count = opal_atomic_add_32 (&huge_page->count, 1);
count = opal_atomic_add_fetch_32 (&huge_page->count, 1);
rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path,
getpid (), count);

Просмотреть файл

@ -232,7 +232,7 @@ static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_
/* This segment fits fully within an existing segment. */
rcache_grdma->stat_cache_hit++;
int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1);
int32_t ref_cnt = opal_atomic_add_fetch_32 (&grdma_reg->ref_count, 1);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
"returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
(void)ref_cnt;
@ -296,7 +296,7 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
/* get updated access flags */
access_flags = find_args.access_flags;
OPAL_THREAD_ADD32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1);
OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1);
}
item = opal_free_list_get_mt (&rcache_grdma->reg_list);
@ -380,7 +380,7 @@ static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr,
(opal_list_item_t*)(*reg));
}
rcache_grdma->stat_cache_found++;
opal_atomic_add_32 (&(*reg)->ref_count, 1);
opal_atomic_add_fetch_32 (&(*reg)->ref_count, 1);
} else {
rcache_grdma->stat_cache_notfound++;
}
@ -398,7 +398,7 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
int rc;
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
ref_count = opal_atomic_add_32 (&reg->ref_count, -1);
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
"returning registration %p, remaining references %d", (void *) reg, ref_count));

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше