Merge pull request #4552 from hjelmn/asm_cleanup2
Add atomic fetch-and-op and compare-exchange functions
Этот коммит содержится в:
Коммит
5cb72aa568
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -378,7 +378,7 @@ int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data,
|
||||
* a read only memory).
|
||||
*/
|
||||
if( NULL != pArgs ) {
|
||||
OPAL_THREAD_ADD32(&pArgs->ref_count, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, 1);
|
||||
dest_data->args = pArgs;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
@ -396,7 +396,7 @@ int32_t ompi_datatype_release_args( ompi_datatype_t* pData )
|
||||
ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args;
|
||||
|
||||
assert( 0 < pArgs->ref_count );
|
||||
OPAL_THREAD_ADD32(&pArgs->ref_count, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, -1);
|
||||
if( 0 == pArgs->ref_count ) {
|
||||
/* There are some duplicated datatypes around that have a pointer to this
|
||||
* args. We will release them only when the last datatype will dissapear.
|
||||
@ -487,7 +487,8 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype,
|
||||
void* recursive_buffer;
|
||||
|
||||
if (NULL == packed_description) {
|
||||
if (opal_atomic_bool_cmpset (&datatype->packed_description, NULL, (void *) 1)) {
|
||||
void *_tmp_ptr = NULL;
|
||||
if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (void *) &_tmp_ptr, (void *) 1)) {
|
||||
if( ompi_datatype_is_predefined(datatype) ) {
|
||||
packed_description = malloc(2 * sizeof(int));
|
||||
} else if( NULL == args ) {
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -356,7 +356,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group,
|
||||
ompi_proc_t *real_proc =
|
||||
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc));
|
||||
|
||||
if (opal_atomic_bool_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) {
|
||||
if (opal_atomic_compare_exchange_strong_ptr (group->grp_proc_pointers + peer_id, &proc, real_proc)) {
|
||||
OBJ_RETAIN(real_proc);
|
||||
}
|
||||
|
||||
|
@ -314,7 +314,7 @@ libnbc_module_destruct(ompi_coll_libnbc_module_t *module)
|
||||
/* if we ever were used for a collective op, do the progress cleanup. */
|
||||
if (true == module->comm_registered) {
|
||||
int32_t tmp =
|
||||
OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, -1);
|
||||
if (0 == tmp) {
|
||||
opal_progress_unregister(ompi_coll_libnbc_progress);
|
||||
}
|
||||
|
@ -618,7 +618,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t
|
||||
/* register progress */
|
||||
if (need_register) {
|
||||
int32_t tmp =
|
||||
OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, 1);
|
||||
if (tmp == 1) {
|
||||
opal_progress_register(ompi_coll_libnbc_progress);
|
||||
}
|
||||
|
@ -120,7 +120,7 @@ static int
|
||||
mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
|
||||
{
|
||||
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
|
||||
if( 1 == opal_atomic_add_32(&monitoring_module->is_initialized, 1) ) {
|
||||
if( 1 == opal_atomic_add_fetch_32(&monitoring_module->is_initialized, 1) ) {
|
||||
MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm);
|
||||
monitoring_module->data = mca_common_monitoring_coll_new(comm);
|
||||
OPAL_MONITORING_PRINT_INFO("coll_module_enabled");
|
||||
@ -132,7 +132,7 @@ static int
|
||||
mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
|
||||
{
|
||||
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
|
||||
if( 0 == opal_atomic_sub_32(&monitoring_module->is_initialized, 1) ) {
|
||||
if( 0 == opal_atomic_sub_fetch_32(&monitoring_module->is_initialized, 1) ) {
|
||||
MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm);
|
||||
mca_common_monitoring_coll_release(monitoring_module->data);
|
||||
monitoring_module->data = NULL;
|
||||
|
@ -68,7 +68,7 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
|
||||
zero_md_h = mca_coll_portals4_component.zero_md_h;
|
||||
data_md_h = mca_coll_portals4_component.data_md_h;
|
||||
|
||||
internal_count = opal_atomic_add_size_t(&module->coll_count, 1);
|
||||
internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1);
|
||||
|
||||
/*
|
||||
** DATATYPE and SIZES
|
||||
|
@ -44,7 +44,7 @@ barrier_hypercube_top(struct ompi_communicator_t *comm,
|
||||
|
||||
request->type = OMPI_COLL_PORTALS4_TYPE_BARRIER;
|
||||
|
||||
count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
|
||||
count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
|
||||
|
||||
ret = PtlCTAlloc(mca_coll_portals4_component.ni_h,
|
||||
&request->u.barrier.rtr_ct_h);
|
||||
|
@ -176,7 +176,7 @@ bcast_kary_tree_top(void *buff, int count,
|
||||
zero_md_h = mca_coll_portals4_component.zero_md_h;
|
||||
data_md_h = mca_coll_portals4_component.data_md_h;
|
||||
|
||||
internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
|
||||
internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
|
||||
|
||||
|
||||
/*
|
||||
@ -513,7 +513,7 @@ bcast_pipeline_top(void *buff, int count,
|
||||
zero_md_h = mca_coll_portals4_component.zero_md_h;
|
||||
data_md_h = mca_coll_portals4_component.data_md_h;
|
||||
|
||||
internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
|
||||
internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
|
||||
|
||||
/*
|
||||
** DATATYPE and SIZES
|
||||
|
@ -582,7 +582,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
||||
/* Setup Common Parameters */
|
||||
/**********************************/
|
||||
|
||||
request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
|
||||
request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
|
||||
|
||||
COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( comm, portals4_module, request->u.gather.root_rank );
|
||||
bmtree = portals4_module->cached_in_order_bmtree;
|
||||
@ -879,7 +879,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
||||
|
||||
i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank);
|
||||
|
||||
request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
|
||||
request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
|
||||
|
||||
ret = setup_gather_buffers_linear(comm, request, portals4_module);
|
||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||
|
@ -69,7 +69,7 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
|
||||
zero_md_h = mca_coll_portals4_component.zero_md_h;
|
||||
data_md_h = mca_coll_portals4_component.data_md_h;
|
||||
|
||||
internal_count = opal_atomic_add_size_t(&module->coll_count, 1);
|
||||
internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1);
|
||||
|
||||
/*
|
||||
** DATATYPE and SIZES
|
||||
|
@ -399,7 +399,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
||||
|
||||
i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank);
|
||||
|
||||
request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);
|
||||
request->u.scatter.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
|
||||
|
||||
ret = setup_scatter_buffers_linear(comm, request, portals4_module);
|
||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||
|
@ -358,7 +358,7 @@ extern uint32_t mca_coll_sm_one;
|
||||
* Macro to release an in-use flag from this process
|
||||
*/
|
||||
#define FLAG_RELEASE(flag) \
|
||||
(void)opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1)
|
||||
opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1)
|
||||
|
||||
/**
|
||||
* Macro to copy a single segment in from a user buffer to a shared
|
||||
|
@ -101,7 +101,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm,
|
||||
if (0 != rank) {
|
||||
/* Get parent *in* buffer */
|
||||
parent = &data->mcb_barrier_control_parent[buffer_set];
|
||||
(void)opal_atomic_add(parent, 1);
|
||||
opal_atomic_add (parent, 1);
|
||||
|
||||
SPIN_CONDITION(0 != *me_out, exit_label2);
|
||||
*me_out = 0;
|
||||
|
@ -463,7 +463,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
|
||||
OBJ_RETAIN(sm_module->previous_reduce_module);
|
||||
|
||||
/* Indicate that we have successfully attached and setup */
|
||||
(void)opal_atomic_add(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
|
||||
opal_atomic_add (&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
|
||||
|
||||
/* Wait for everyone in this communicator to attach and setup */
|
||||
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||
|
@ -209,7 +209,7 @@ static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar,
|
||||
int mca_common_monitoring_init( void )
|
||||
{
|
||||
if( !mca_common_monitoring_enabled ) return OMPI_ERROR;
|
||||
if( 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */
|
||||
if( 1 < opal_atomic_add_fetch_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */
|
||||
|
||||
char hostname[OPAL_MAXHOSTNAMELEN] = "NA";
|
||||
/* Initialize constant */
|
||||
@ -229,7 +229,7 @@ int mca_common_monitoring_init( void )
|
||||
void mca_common_monitoring_finalize( void )
|
||||
{
|
||||
if( ! mca_common_monitoring_enabled || /* Don't release if not last */
|
||||
0 < opal_atomic_sub_32(&mca_common_monitoring_hold, 1) ) return;
|
||||
0 < opal_atomic_sub_fetch_32(&mca_common_monitoring_hold, 1) ) return;
|
||||
|
||||
OPAL_MONITORING_PRINT_INFO("common_component_finish");
|
||||
/* Dump monitoring informations */
|
||||
@ -503,21 +503,21 @@ void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag)
|
||||
|
||||
/* Keep tracks of the data_size distribution */
|
||||
if( 0 == data_size ) {
|
||||
opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram], 1);
|
||||
opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram], 1);
|
||||
} else {
|
||||
int log2_size = log10(data_size)/log10_2;
|
||||
if(log2_size > max_size_histogram - 2) /* Avoid out-of-bound write */
|
||||
log2_size = max_size_histogram - 2;
|
||||
opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1);
|
||||
opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1);
|
||||
}
|
||||
|
||||
/* distinguishses positive and negative tags if requested */
|
||||
if( (tag < 0) && (mca_common_monitoring_filter()) ) {
|
||||
opal_atomic_add_size_t(&filtered_pml_data[world_rank], data_size);
|
||||
opal_atomic_add_size_t(&filtered_pml_count[world_rank], 1);
|
||||
opal_atomic_add_fetch_size_t(&filtered_pml_data[world_rank], data_size);
|
||||
opal_atomic_add_fetch_size_t(&filtered_pml_count[world_rank], 1);
|
||||
} else { /* if filtered monitoring is not activated data is aggregated indifferently */
|
||||
opal_atomic_add_size_t(&pml_data[world_rank], data_size);
|
||||
opal_atomic_add_size_t(&pml_count[world_rank], 1);
|
||||
opal_atomic_add_fetch_size_t(&pml_data[world_rank], data_size);
|
||||
opal_atomic_add_fetch_size_t(&pml_count[world_rank], 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -564,11 +564,11 @@ void mca_common_monitoring_record_osc(int world_rank, size_t data_size,
|
||||
if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */
|
||||
|
||||
if( SEND == dir ) {
|
||||
opal_atomic_add_size_t(&osc_data_s[world_rank], data_size);
|
||||
opal_atomic_add_size_t(&osc_count_s[world_rank], 1);
|
||||
opal_atomic_add_fetch_size_t(&osc_data_s[world_rank], data_size);
|
||||
opal_atomic_add_fetch_size_t(&osc_count_s[world_rank], 1);
|
||||
} else {
|
||||
opal_atomic_add_size_t(&osc_data_r[world_rank], data_size);
|
||||
opal_atomic_add_size_t(&osc_count_r[world_rank], 1);
|
||||
opal_atomic_add_fetch_size_t(&osc_data_r[world_rank], data_size);
|
||||
opal_atomic_add_fetch_size_t(&osc_count_r[world_rank], 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -650,8 +650,8 @@ void mca_common_monitoring_record_coll(int world_rank, size_t data_size)
|
||||
{
|
||||
if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */
|
||||
|
||||
opal_atomic_add_size_t(&coll_data[world_rank], data_size);
|
||||
opal_atomic_add_size_t(&coll_count[world_rank], 1);
|
||||
opal_atomic_add_fetch_size_t(&coll_data[world_rank], data_size);
|
||||
opal_atomic_add_fetch_size_t(&coll_count[world_rank], 1);
|
||||
}
|
||||
|
||||
static int mca_common_monitoring_get_coll_count(const struct mca_base_pvar_t *pvar,
|
||||
|
@ -236,8 +236,8 @@ void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data
|
||||
return;
|
||||
}
|
||||
#endif /* OPAL_ENABLE_DEBUG */
|
||||
opal_atomic_add_size_t(&data->o2a_size, size);
|
||||
opal_atomic_add_size_t(&data->o2a_count, 1);
|
||||
opal_atomic_add_fetch_size_t(&data->o2a_size, size);
|
||||
opal_atomic_add_fetch_size_t(&data->o2a_count, 1);
|
||||
}
|
||||
|
||||
int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar,
|
||||
@ -277,8 +277,8 @@ void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data
|
||||
return;
|
||||
}
|
||||
#endif /* OPAL_ENABLE_DEBUG */
|
||||
opal_atomic_add_size_t(&data->a2o_size, size);
|
||||
opal_atomic_add_size_t(&data->a2o_count, 1);
|
||||
opal_atomic_add_fetch_size_t(&data->a2o_size, size);
|
||||
opal_atomic_add_fetch_size_t(&data->a2o_count, 1);
|
||||
}
|
||||
|
||||
int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar,
|
||||
@ -318,8 +318,8 @@ void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data
|
||||
return;
|
||||
}
|
||||
#endif /* OPAL_ENABLE_DEBUG */
|
||||
opal_atomic_add_size_t(&data->a2a_size, size);
|
||||
opal_atomic_add_size_t(&data->a2a_count, 1);
|
||||
opal_atomic_add_fetch_size_t(&data->a2a_size, size);
|
||||
opal_atomic_add_fetch_size_t(&data->a2a_count, 1);
|
||||
}
|
||||
|
||||
int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar,
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -296,9 +296,10 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me,
|
||||
int
|
||||
ompi_mtl_portals4_flowctl_trigger(void)
|
||||
{
|
||||
int32_t _tmp_value = 0;
|
||||
int ret;
|
||||
|
||||
if (true == OPAL_ATOMIC_BOOL_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) {
|
||||
if (true == OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ompi_mtl_portals4.flowctl.flowctl_active, &_tmp_value, 1)) {
|
||||
/* send trigger to root */
|
||||
ret = PtlPut(ompi_mtl_portals4.zero_md_h,
|
||||
0,
|
||||
@ -346,7 +347,7 @@ start_recover(void)
|
||||
int64_t epoch_counter;
|
||||
|
||||
ompi_mtl_portals4.flowctl.flowctl_active = true;
|
||||
epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);
|
||||
epoch_counter = opal_atomic_add_fetch_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);
|
||||
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"Entering flowctl_start_recover %ld",
|
||||
|
@ -53,14 +53,14 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target,
|
||||
int32_t frag_count;
|
||||
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
while (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
ompi_mtl_portals4_progress();
|
||||
}
|
||||
#endif
|
||||
|
||||
frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl;
|
||||
ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count);
|
||||
ret = OPAL_THREAD_ADD_FETCH32(&(request->pending_reply), frag_count);
|
||||
|
||||
for (i = 0 ; i < frag_count ; i++) {
|
||||
opal_free_list_item_t *tmp;
|
||||
@ -385,14 +385,14 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev,
|
||||
opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag,
|
||||
&rndv_get_frag->super);
|
||||
|
||||
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1);
|
||||
ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_reply), -1);
|
||||
if (ret > 0) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
assert(ptl_request->pending_reply == 0);
|
||||
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
#endif
|
||||
|
||||
/* make sure the data is in the right place. Use _ucount for
|
||||
@ -468,7 +468,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
ptl_request->super.type = portals4_req_recv;
|
||||
ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
|
||||
ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
|
||||
ptl_request->hdr_data = 0;
|
||||
#endif
|
||||
ptl_request->buffer_ptr = (free_after) ? start : NULL;
|
||||
@ -549,7 +549,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
|
||||
ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
|
||||
ptl_request->hdr_data = 0;
|
||||
#endif
|
||||
ptl_request->super.type = portals4_req_recv;
|
||||
|
@ -45,7 +45,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
(ompi_mtl_portals4_isend_request_t*) ptl_base_request;
|
||||
|
||||
if (PTL_EVENT_GET == ev->type) {
|
||||
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1);
|
||||
ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1);
|
||||
if (ret > 0) {
|
||||
/* wait for other gets */
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret));
|
||||
@ -94,7 +94,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
|
||||
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
|
||||
&pending->super.super);
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
ompi_mtl_portals4_flowctl_trigger();
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -124,7 +124,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
|
||||
if ((eager == ompi_mtl_portals4.protocol) ||
|
||||
(ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) {
|
||||
val = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1);
|
||||
val = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1);
|
||||
}
|
||||
if (0 == val) {
|
||||
add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */
|
||||
@ -161,7 +161,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||
add++;
|
||||
}
|
||||
val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add);
|
||||
val = OPAL_THREAD_ADD_FETCH32((int32_t*)&ptl_request->event_count, add);
|
||||
assert(val <= 3);
|
||||
|
||||
if (val == 3) {
|
||||
@ -174,7 +174,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
|
||||
*complete = true;
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
opal_free_list_return (&ompi_mtl_portals4.flowctl.pending_fl,
|
||||
&ptl_request->pending->super);
|
||||
|
||||
@ -422,15 +422,15 @@ ompi_mtl_portals4_pending_list_progress()
|
||||
|
||||
while ((!ompi_mtl_portals4.flowctl.flowctl_active) &&
|
||||
(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
|
||||
val = OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1);
|
||||
val = OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1);
|
||||
if (val < 0) {
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends);
|
||||
if (OPAL_UNLIKELY(NULL == item)) {
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -456,7 +456,7 @@ ompi_mtl_portals4_pending_list_progress()
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends,
|
||||
&pending->super.super);
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -492,7 +492,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
|
||||
if (OMPI_SUCCESS != ret) return ret;
|
||||
|
||||
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1);
|
||||
ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*)&ompi_mtl_portals4.opcount, 1);
|
||||
ptl_request->buffer_ptr = (free_after) ? start : NULL;
|
||||
ptl_request->length = length;
|
||||
ptl_request->event_count = 0;
|
||||
@ -520,15 +520,15 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
pending->ptl_proc = ptl_proc;
|
||||
pending->ptl_request = ptl_request;
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
if (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
|
||||
&pending->super.super);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
|
||||
&pending->super.super);
|
||||
ompi_mtl_portals4_pending_list_progress();
|
||||
@ -536,7 +536,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) {
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
|
||||
&pending->super.super);
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -61,7 +61,7 @@
|
||||
static inline void* \
|
||||
ompi_osc_monitoring_## template ##_set_template (ompi_osc_base_module_t*module) \
|
||||
{ \
|
||||
if( 1 == opal_atomic_add_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \
|
||||
if( 1 == opal_atomic_add_fetch_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \
|
||||
/* Saves the original module functions in \
|
||||
* ompi_osc_monitoring_module_## template ##_template \
|
||||
*/ \
|
||||
|
@ -99,7 +99,7 @@ ompi_osc_portals4_complete(struct ompi_win_t *win)
|
||||
PTL_SUM,
|
||||
PTL_INT32_T);
|
||||
if (ret != OMPI_SUCCESS) return ret;
|
||||
OPAL_THREAD_ADD64(&module->opcount, 1);
|
||||
OPAL_THREAD_ADD_FETCH64(&module->opcount, 1);
|
||||
}
|
||||
|
||||
ret = ompi_osc_portals4_complete_all(module);
|
||||
@ -144,7 +144,7 @@ ompi_osc_portals4_post(struct ompi_group_t *group,
|
||||
PTL_SUM,
|
||||
PTL_INT32_T);
|
||||
if (ret != OMPI_SUCCESS) return ret;
|
||||
OPAL_THREAD_ADD64(&module->opcount, 1);
|
||||
OPAL_THREAD_ADD_FETCH64(&module->opcount, 1);
|
||||
}
|
||||
} else {
|
||||
module->post_group = NULL;
|
||||
|
@ -206,7 +206,7 @@ segmentedPut(int64_t *opcount,
|
||||
ptl_size_t bytes_put = 0;
|
||||
|
||||
do {
|
||||
opal_atomic_add_64(opcount, 1);
|
||||
opal_atomic_add_fetch_64(opcount, 1);
|
||||
|
||||
ptl_size_t frag_length = MIN(put_length, segment_length);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
@ -222,7 +222,7 @@ segmentedPut(int64_t *opcount,
|
||||
user_ptr,
|
||||
hdr_data);
|
||||
if (PTL_OK != ret) {
|
||||
opal_atomic_add_64(opcount, -1);
|
||||
opal_atomic_add_fetch_64(opcount, -1);
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d PtlPut failed with return value %d",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
@ -251,7 +251,7 @@ segmentedGet(int64_t *opcount,
|
||||
ptl_size_t bytes_gotten = 0;
|
||||
|
||||
do {
|
||||
opal_atomic_add_64(opcount, 1);
|
||||
opal_atomic_add_fetch_64(opcount, 1);
|
||||
|
||||
ptl_size_t frag_length = MIN(get_length, segment_length);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
@ -266,7 +266,7 @@ segmentedGet(int64_t *opcount,
|
||||
target_offset + bytes_gotten,
|
||||
user_ptr);
|
||||
if (PTL_OK != ret) {
|
||||
opal_atomic_add_64(opcount, -1);
|
||||
opal_atomic_add_fetch_64(opcount, -1);
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d PtlGet failed with return value %d",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
@ -297,7 +297,7 @@ segmentedAtomic(int64_t *opcount,
|
||||
ptl_size_t sent = 0;
|
||||
|
||||
do {
|
||||
opal_atomic_add_64(opcount, 1);
|
||||
opal_atomic_add_fetch_64(opcount, 1);
|
||||
|
||||
ptl_size_t frag_length = MIN(length, segment_length);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
@ -315,7 +315,7 @@ segmentedAtomic(int64_t *opcount,
|
||||
ptl_op,
|
||||
ptl_dt);
|
||||
if (PTL_OK != ret) {
|
||||
opal_atomic_add_64(opcount, -1);
|
||||
opal_atomic_add_fetch_64(opcount, -1);
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d PtlAtomic failed with return value %d",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
@ -348,7 +348,7 @@ segmentedFetchAtomic(int64_t *opcount,
|
||||
ptl_size_t sent = 0;
|
||||
|
||||
do {
|
||||
opal_atomic_add_64(opcount, 1);
|
||||
opal_atomic_add_fetch_64(opcount, 1);
|
||||
|
||||
ptl_size_t frag_length = MIN(length, segment_length);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
@ -367,7 +367,7 @@ segmentedFetchAtomic(int64_t *opcount,
|
||||
ptl_op,
|
||||
ptl_dt);
|
||||
if (PTL_OK != ret) {
|
||||
opal_atomic_add_64(opcount, -1);
|
||||
opal_atomic_add_fetch_64(opcount, -1);
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d PtlFetchAtomic failed with return value %d",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
@ -399,7 +399,7 @@ segmentedSwap(int64_t *opcount,
|
||||
ptl_size_t sent = 0;
|
||||
|
||||
do {
|
||||
opal_atomic_add_64(opcount, 1);
|
||||
opal_atomic_add_fetch_64(opcount, 1);
|
||||
|
||||
ptl_size_t frag_length = MIN(length, segment_length);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
@ -419,7 +419,7 @@ segmentedSwap(int64_t *opcount,
|
||||
PTL_SWAP,
|
||||
ptl_dt);
|
||||
if (PTL_OK != ret) {
|
||||
opal_atomic_add_64(opcount, -1);
|
||||
opal_atomic_add_fetch_64(opcount, -1);
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d PtlSwap failed with return value %d",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
@ -547,7 +547,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module,
|
||||
return ret;
|
||||
}
|
||||
|
||||
opal_atomic_add_64(&module->opcount, 1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
|
||||
@ -564,7 +564,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module,
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d PtlGet() failed: ret = %d",
|
||||
__FUNCTION__, __LINE__, ret));
|
||||
opal_atomic_add_64(&module->opcount, -1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -716,7 +716,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module,
|
||||
return ret;
|
||||
}
|
||||
|
||||
opal_atomic_add_64(&module->opcount, 1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
|
||||
@ -735,7 +735,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module,
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d PtlPut() failed: ret = %d",
|
||||
__FUNCTION__, __LINE__, ret));
|
||||
opal_atomic_add_64(&module->opcount, -1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1252,7 +1252,7 @@ put_to_noncontig(int64_t *opcount,
|
||||
/* determine how much to transfer in this operation */
|
||||
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
|
||||
|
||||
opal_atomic_add_64(opcount, 1);
|
||||
opal_atomic_add_fetch_64(opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
|
||||
@ -1270,7 +1270,7 @@ put_to_noncontig(int64_t *opcount,
|
||||
user_ptr,
|
||||
0);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_atomic_add_64(opcount, -1);
|
||||
opal_atomic_add_fetch_64(opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1361,7 +1361,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
|
||||
/* determine how much to transfer in this operation */
|
||||
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
|
||||
|
||||
opal_atomic_add_64(&module->opcount, 1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
|
||||
@ -1379,7 +1379,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
|
||||
user_ptr,
|
||||
0);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_atomic_add_64(&module->opcount, -1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1479,7 +1479,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module,
|
||||
/* determine how much to transfer in this operation */
|
||||
atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max);
|
||||
|
||||
opal_atomic_add_64(&module->opcount, 1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
|
||||
@ -1501,7 +1501,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module,
|
||||
ptl_op,
|
||||
ptl_dt);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_atomic_add_64(&module->opcount, -1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1586,7 +1586,7 @@ get_from_noncontig(int64_t *opcount,
|
||||
/* determine how much to transfer in this operation */
|
||||
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
|
||||
|
||||
opal_atomic_add_64(opcount, 1);
|
||||
opal_atomic_add_fetch_64(opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
|
||||
@ -1602,7 +1602,7 @@ get_from_noncontig(int64_t *opcount,
|
||||
offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
|
||||
user_ptr);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_atomic_add_64(opcount, -1);
|
||||
opal_atomic_add_fetch_64(opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1687,7 +1687,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
|
||||
/* determine how much to transfer in this operation */
|
||||
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
|
||||
|
||||
opal_atomic_add_64(&module->opcount, 1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
|
||||
@ -1703,7 +1703,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
|
||||
offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
|
||||
user_ptr);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_atomic_add_64(&module->opcount, -1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1817,7 +1817,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module,
|
||||
/* determine how much to transfer in this operation */
|
||||
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
|
||||
|
||||
opal_atomic_add_64(&module->opcount, 1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
|
||||
@ -1844,7 +1844,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module,
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d PtlSwap failed with return value %d",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
opal_atomic_add_64(&module->opcount, -1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1969,7 +1969,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
|
||||
/* determine how much to transfer in this operation */
|
||||
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
|
||||
|
||||
opal_atomic_add_64(&module->opcount, 1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
|
||||
@ -1995,7 +1995,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d PtlFetchAtomic failed with return value %d",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
opal_atomic_add_64(&module->opcount, -1);
|
||||
opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2411,7 +2411,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
|
||||
do {
|
||||
size_t msg_length = MIN(module->atomic_max, length - sent);
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d Atomic", __FUNCTION__, __LINE__));
|
||||
@ -2428,7 +2428,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
|
||||
ptl_op,
|
||||
ptl_dt);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
(void)opal_atomic_add_64(&module->opcount, -1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
|
||||
return ret;
|
||||
}
|
||||
@ -3149,7 +3149,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
|
||||
do {
|
||||
size_t msg_length = MIN(module->atomic_max, length - sent);
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d Atomic", __FUNCTION__, __LINE__));
|
||||
@ -3166,7 +3166,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
|
||||
ptl_op,
|
||||
ptl_dt);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
(void)opal_atomic_add_64(&module->opcount, -1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, -1);
|
||||
return ret;
|
||||
}
|
||||
sent += msg_length;
|
||||
@ -3541,7 +3541,7 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
|
||||
"%s,%d Swap", __FUNCTION__, __LINE__));
|
||||
@ -3613,7 +3613,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d Swap", __FUNCTION__, __LINE__));
|
||||
ret = PtlSwap(module->md_h,
|
||||
@ -3635,7 +3635,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
|
||||
|
||||
md_offset = (ptl_size_t) result_addr;
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
|
||||
"%s,%d Get", __FUNCTION__, __LINE__));
|
||||
ret = PtlGet(module->md_h,
|
||||
@ -3648,7 +3648,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
|
||||
NULL);
|
||||
} else {
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
ret = ompi_osc_portals4_get_op(op, &ptl_op);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
|
@ -230,8 +230,8 @@ process:
|
||||
}
|
||||
|
||||
req = (ompi_osc_portals4_request_t*) ev.user_ptr;
|
||||
opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength);
|
||||
ops = opal_atomic_add_32(&req->ops_committed, 1);
|
||||
opal_atomic_add_fetch_size_t(&req->super.req_status._ucount, ev.mlength);
|
||||
ops = opal_atomic_add_fetch_32(&req->ops_committed, 1);
|
||||
if (ops == req->ops_expected) {
|
||||
ompi_request_complete(&req->super, true);
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ lk_cas64(ompi_osc_portals4_module_t *module,
|
||||
int ret;
|
||||
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
ret = PtlSwap(module->md_h,
|
||||
(ptl_size_t) result_val,
|
||||
@ -76,7 +76,7 @@ lk_write64(ompi_osc_portals4_module_t *module,
|
||||
int ret;
|
||||
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
ret = PtlPut(module->md_h,
|
||||
(ptl_size_t) &write_val,
|
||||
@ -106,7 +106,7 @@ lk_add64(ompi_osc_portals4_module_t *module,
|
||||
int ret;
|
||||
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
(void)opal_atomic_add_fetch_64(&module->opcount, 1);
|
||||
|
||||
ret = PtlFetchAtomic(module->md_h,
|
||||
(ptl_size_t) result_val,
|
||||
|
@ -8,7 +8,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||
@ -145,15 +145,11 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer
|
||||
|
||||
static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value)
|
||||
{
|
||||
int32_t peer_flags, new_flags;
|
||||
do {
|
||||
peer_flags = peer->flags;
|
||||
if (value) {
|
||||
new_flags = peer_flags | flag;
|
||||
} else {
|
||||
new_flags = peer_flags & ~flag;
|
||||
}
|
||||
} while (!OPAL_ATOMIC_BOOL_CMPSET_32 (&peer->flags, peer_flags, new_flags));
|
||||
if (value) {
|
||||
OPAL_ATOMIC_OR_FETCH32 (&peer->flags, flag);
|
||||
} else {
|
||||
OPAL_ATOMIC_AND_FETCH32 (&peer->flags, ~flag);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value)
|
||||
@ -518,7 +514,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"mark_incoming_completion marking active incoming complete. module %p, count = %d",
|
||||
(void *) module, (int) module->active_incoming_frag_count + 1));
|
||||
new_value = OPAL_THREAD_ADD32(&module->active_incoming_frag_count, 1);
|
||||
new_value = OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, 1);
|
||||
if (new_value >= 0) {
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
opal_condition_broadcast(&module->cond);
|
||||
@ -530,7 +526,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"mark_incoming_completion marking passive incoming complete. module %p, source = %d, count = %d",
|
||||
(void *) module, source, (int) peer->passive_incoming_frag_count + 1));
|
||||
new_value = OPAL_THREAD_ADD32((int32_t *) &peer->passive_incoming_frag_count, 1);
|
||||
new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &peer->passive_incoming_frag_count, 1);
|
||||
if (0 == new_value) {
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
opal_condition_broadcast(&module->cond);
|
||||
@ -554,7 +550,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
|
||||
*/
|
||||
static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module)
|
||||
{
|
||||
int32_t new_value = OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, 1);
|
||||
int32_t new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, 1);
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"mark_outgoing_completion: outgoing_frag_count = %d", new_value));
|
||||
if (new_value >= 0) {
|
||||
@ -578,12 +574,12 @@ static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module)
|
||||
*/
|
||||
static inline void ompi_osc_signal_outgoing (ompi_osc_pt2pt_module_t *module, int target, int count)
|
||||
{
|
||||
OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, -count);
|
||||
OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, -count);
|
||||
if (MPI_PROC_NULL != target) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target,
|
||||
count, module->epoch_outgoing_frag_count[target] + count));
|
||||
OPAL_THREAD_ADD32((int32_t *) (module->epoch_outgoing_frag_count + target), count);
|
||||
OPAL_THREAD_ADD_FETCH32((int32_t *) (module->epoch_outgoing_frag_count + target), count);
|
||||
}
|
||||
}
|
||||
|
||||
@ -721,7 +717,7 @@ static inline int get_tag(ompi_osc_pt2pt_module_t *module)
|
||||
/* the LSB of the tag is used be the receiver to determine if the
|
||||
message is a passive or active target (ie, where to mark
|
||||
completion). */
|
||||
int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->tag_counter, 4);
|
||||
int32_t tmp = OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &module->tag_counter, 4);
|
||||
return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch);
|
||||
}
|
||||
|
||||
|
@ -183,7 +183,7 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
|
||||
incoming_reqs));
|
||||
|
||||
/* set our complete condition for incoming requests */
|
||||
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -incoming_reqs);
|
||||
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -incoming_reqs);
|
||||
|
||||
/* wait for completion */
|
||||
while (module->outgoing_frag_count < 0 || module->active_incoming_frag_count < 0) {
|
||||
@ -272,7 +272,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"found unexpected post from %d",
|
||||
peer->rank));
|
||||
OPAL_THREAD_ADD32 (&sync->sync_expected, -1);
|
||||
OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
|
||||
ompi_osc_pt2pt_peer_set_unex (peer, false);
|
||||
}
|
||||
}
|
||||
@ -574,12 +574,12 @@ void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, i
|
||||
frag_count, module->active_incoming_frag_count, module->num_complete_msgs));
|
||||
|
||||
/* the current fragment is not part of the frag_count so we need to add it here */
|
||||
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -frag_count);
|
||||
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -frag_count);
|
||||
|
||||
/* make sure the signal count is written before changing the complete message count */
|
||||
opal_atomic_wmb ();
|
||||
|
||||
if (0 == OPAL_THREAD_ADD32(&module->num_complete_msgs, 1)) {
|
||||
if (0 == OPAL_THREAD_ADD_FETCH32(&module->num_complete_msgs, 1)) {
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
opal_condition_broadcast (&module->cond);
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
@ -62,7 +62,7 @@ static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request)
|
||||
/* update the cbdata for ompi_osc_pt2pt_comm_complete */
|
||||
request->req_complete_cb_data = pt2pt_request->module;
|
||||
|
||||
if (0 == OPAL_THREAD_ADD32(&pt2pt_request->outstanding_requests, -1)) {
|
||||
if (0 == OPAL_THREAD_ADD_FETCH32(&pt2pt_request->outstanding_requests, -1)) {
|
||||
ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR);
|
||||
}
|
||||
|
||||
|
@ -667,7 +667,7 @@ static int accumulate_cb (ompi_request_t *request)
|
||||
rank = acc_data->peer;
|
||||
}
|
||||
|
||||
if (0 == OPAL_THREAD_ADD32(&acc_data->request_count, -1)) {
|
||||
if (0 == OPAL_THREAD_ADD_FETCH32(&acc_data->request_count, -1)) {
|
||||
/* no more requests needed before the buffer can be accumulated */
|
||||
|
||||
if (acc_data->source) {
|
||||
@ -716,9 +716,9 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os
|
||||
/* NTH: ensure we don't leave wait/process_flush/etc until this
|
||||
* accumulate operation is complete. */
|
||||
if (active_target) {
|
||||
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -1);
|
||||
} else {
|
||||
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
|
||||
}
|
||||
|
||||
pending_acc->active_target = active_target;
|
||||
@ -1353,7 +1353,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source,
|
||||
"process_flush header = {.frag_count = %d}", flush_header->frag_count));
|
||||
|
||||
/* increase signal count by incoming frags */
|
||||
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count);
|
||||
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"%d: process_flush: received message from %d. passive_incoming_frag_count = %d",
|
||||
@ -1372,7 +1372,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source,
|
||||
}
|
||||
|
||||
/* signal incomming will increment this counter */
|
||||
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
|
||||
|
||||
return sizeof (*flush_header);
|
||||
}
|
||||
@ -1387,7 +1387,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source,
|
||||
"process_unlock header = {.frag_count = %d}", unlock_header->frag_count));
|
||||
|
||||
/* increase signal count by incoming frags */
|
||||
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count);
|
||||
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"osc pt2pt: processing unlock request from %d. frag count = %d, processed_count = %d",
|
||||
@ -1406,7 +1406,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source,
|
||||
}
|
||||
|
||||
/* signal incoming will increment this counter */
|
||||
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
|
||||
|
||||
return sizeof (*unlock_header);
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -105,8 +105,8 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, om
|
||||
"osc pt2pt: flushing active fragment to target %d. pending: %d",
|
||||
active_frag->target, active_frag->pending));
|
||||
|
||||
if (opal_atomic_bool_cmpset (&peer->active_frag, active_frag, NULL)) {
|
||||
if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) {
|
||||
if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &active_frag, NULL)) {
|
||||
if (0 != OPAL_THREAD_ADD_FETCH32(&active_frag->pending, -1)) {
|
||||
/* communication going on while synchronizing; this is an rma usage bug */
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module,
|
||||
ompi_osc_pt2pt_frag_t* buffer)
|
||||
{
|
||||
opal_atomic_wmb ();
|
||||
if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) {
|
||||
if (0 == OPAL_THREAD_ADD_FETCH32(&buffer->pending, -1)) {
|
||||
opal_atomic_mb ();
|
||||
return ompi_osc_pt2pt_frag_start(module, buffer);
|
||||
}
|
||||
@ -67,7 +67,7 @@ static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (omp
|
||||
|
||||
/* to ensure ordering flush the buffer on the peer */
|
||||
curr = peer->active_frag;
|
||||
if (NULL != curr && opal_atomic_bool_cmpset (&peer->active_frag, curr, NULL)) {
|
||||
if (NULL != curr && opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &curr, NULL)) {
|
||||
/* If there's something pending, the pending finish will
|
||||
start the buffer. Otherwise, we need to start it now. */
|
||||
int ret = ompi_osc_pt2pt_frag_finish (module, curr);
|
||||
@ -142,11 +142,11 @@ static inline int _ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, i
|
||||
curr->pending_long_sends = long_send;
|
||||
peer->active_frag = curr;
|
||||
} else {
|
||||
OPAL_THREAD_ADD32(&curr->header->num_ops, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&curr->header->num_ops, 1);
|
||||
curr->pending_long_sends += long_send;
|
||||
}
|
||||
|
||||
OPAL_THREAD_ADD32(&curr->pending, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&curr->pending, 1);
|
||||
} else {
|
||||
curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len);
|
||||
if (OPAL_UNLIKELY(NULL == curr)) {
|
||||
|
@ -64,7 +64,7 @@ static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, omp
|
||||
|
||||
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
|
||||
|
||||
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
|
||||
|
||||
acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock);
|
||||
if (!acquired) {
|
||||
@ -91,7 +91,7 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
|
||||
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank);
|
||||
int lock_type = lock->sync.lock.type;
|
||||
|
||||
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
|
||||
|
||||
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
|
||||
|
||||
@ -99,9 +99,9 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
|
||||
"ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status));
|
||||
|
||||
if (MPI_LOCK_EXCLUSIVE == lock_type) {
|
||||
OPAL_THREAD_ADD32(&module->lock_status, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
|
||||
ompi_osc_pt2pt_activate_next_lock (module);
|
||||
} else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) {
|
||||
} else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
|
||||
ompi_osc_pt2pt_activate_next_lock (module);
|
||||
}
|
||||
|
||||
@ -128,7 +128,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
|
||||
|
||||
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
|
||||
|
||||
@ -145,7 +145,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp
|
||||
|
||||
ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req));
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OPAL_THREAD_ADD32(&lock->sync_expected, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, -1);
|
||||
} else {
|
||||
ompi_osc_pt2pt_peer_set_locked (peer, true);
|
||||
}
|
||||
@ -163,7 +163,7 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module,
|
||||
ompi_osc_pt2pt_header_unlock_t unlock_req;
|
||||
int ret;
|
||||
|
||||
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
|
||||
|
||||
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
|
||||
|
||||
@ -207,7 +207,7 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module,
|
||||
int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1);
|
||||
int ret;
|
||||
|
||||
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
|
||||
|
||||
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
|
||||
|
||||
@ -744,14 +744,13 @@ static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, in
|
||||
break;
|
||||
}
|
||||
|
||||
if (opal_atomic_bool_cmpset_32 (&module->lock_status, lock_status, lock_status + 1)) {
|
||||
if (opal_atomic_compare_exchange_strong_32 (&module->lock_status, &lock_status, lock_status + 1)) {
|
||||
break;
|
||||
}
|
||||
|
||||
lock_status = module->lock_status;
|
||||
} while (1);
|
||||
} else {
|
||||
queue = !opal_atomic_bool_cmpset_32 (&module->lock_status, 0, -1);
|
||||
int32_t _tmp_value = 0;
|
||||
queue = !opal_atomic_compare_exchange_strong_32 (&module->lock_status, &_tmp_value, -1);
|
||||
}
|
||||
|
||||
if (queue) {
|
||||
@ -909,9 +908,9 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source,
|
||||
}
|
||||
|
||||
if (-1 == module->lock_status) {
|
||||
OPAL_THREAD_ADD32(&module->lock_status, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
|
||||
ompi_osc_pt2pt_activate_next_lock (module);
|
||||
} else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) {
|
||||
} else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
|
||||
ompi_osc_pt2pt_activate_next_lock (module);
|
||||
}
|
||||
|
||||
|
@ -166,7 +166,7 @@ static inline void ompi_osc_pt2pt_sync_wait_expected (ompi_osc_pt2pt_sync_t *syn
|
||||
|
||||
static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync)
|
||||
{
|
||||
int32_t new_value = OPAL_THREAD_ADD32 (&sync->sync_expected, -1);
|
||||
int32_t new_value = OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
|
||||
if (0 == new_value) {
|
||||
OPAL_THREAD_LOCK(&sync->lock);
|
||||
if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) {
|
||||
|
@ -516,7 +516,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
subreq->internal = true;
|
||||
subreq->parent_request = request;
|
||||
if (request) {
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
|
||||
}
|
||||
|
||||
if (result_datatype) {
|
||||
@ -557,7 +557,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
|
||||
|
||||
if (request) {
|
||||
/* release our reference so the request can complete */
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
|
||||
}
|
||||
|
||||
if (source_datatype) {
|
||||
|
@ -8,7 +8,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||
@ -285,7 +285,9 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
|
||||
ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result);
|
||||
assert (OMPI_SUCCESS == ret);
|
||||
} else {
|
||||
result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank);
|
||||
ompi_osc_rdma_lock_t _tmp_value = 0;
|
||||
|
||||
result = !ompi_osc_rdma_lock_compare_exchange ((osc_rdma_counter_t *) target, &_tmp_value, 1 + (osc_rdma_counter_t) my_rank);
|
||||
}
|
||||
|
||||
if (OPAL_LIKELY(0 == result)) {
|
||||
|
@ -217,7 +217,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
|
||||
subreq->parent_request = request;
|
||||
|
||||
if (request) {
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
|
||||
}
|
||||
} else if (!alloc_reqs) {
|
||||
subreq = request;
|
||||
@ -232,7 +232,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) {
|
||||
if (request) {
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
|
||||
}
|
||||
|
||||
if (alloc_reqs) {
|
||||
@ -266,7 +266,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
|
||||
ompi_osc_rdma_request_complete (request, OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
|
||||
}
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)");
|
||||
@ -551,7 +551,7 @@ static int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
|
||||
|
||||
/* increment the outstanding request counter in the request object */
|
||||
if (request) {
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
|
||||
cbcontext = (void *) ((intptr_t) request | 1);
|
||||
request->sync = sync;
|
||||
} else {
|
||||
@ -643,12 +643,12 @@ static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_
|
||||
subreq->internal = true;
|
||||
subreq->type = OMPI_OSC_RDMA_TYPE_RDMA;
|
||||
subreq->parent_request = request;
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
|
||||
|
||||
ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OMPI_OSC_RDMA_REQUEST_RETURN(subreq);
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -35,7 +35,7 @@ static inline void ompi_osc_rdma_cleanup_rdma (ompi_osc_rdma_sync_t *sync, ompi_
|
||||
}
|
||||
|
||||
if (request) {
|
||||
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1);
|
||||
(void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
|
||||
}
|
||||
|
||||
ompi_osc_rdma_sync_rdma_dec (sync);
|
||||
|
@ -37,7 +37,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_rdma_frag_t);
|
||||
|
||||
static inline void ompi_osc_rdma_frag_complete (ompi_osc_rdma_frag_t *frag)
|
||||
{
|
||||
if (0 == OPAL_THREAD_ADD32(&frag->pending, -1)) {
|
||||
if (0 == OPAL_THREAD_ADD_FETCH32(&frag->pending, -1)) {
|
||||
opal_atomic_rmb ();
|
||||
|
||||
ompi_osc_rdma_deregister (frag->module, frag->handle);
|
||||
@ -113,7 +113,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size
|
||||
|
||||
curr->top += request_len;
|
||||
curr->remain_len -= request_len;
|
||||
OPAL_THREAD_ADD32(&curr->pending, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&curr->pending, 1);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
|
@ -17,7 +17,8 @@
|
||||
|
||||
static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock)
|
||||
{
|
||||
return !ompi_osc_rdma_lock_cmpset (lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
|
||||
ompi_osc_rdma_lock_t _tmp_value = 0;
|
||||
return !ompi_osc_rdma_lock_compare_exchange (lock, &_tmp_value, OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
|
||||
}
|
||||
|
||||
static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock)
|
||||
|
@ -201,14 +201,13 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer,
|
||||
int32_t flags;
|
||||
|
||||
opal_atomic_mb ();
|
||||
flags = peer->flags;
|
||||
|
||||
do {
|
||||
flags = peer->flags;
|
||||
if (flags & flag) {
|
||||
return false;
|
||||
}
|
||||
|
||||
} while (!OPAL_THREAD_BOOL_CMPSET_32 (&peer->flags, flags, flags | flag));
|
||||
} while (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&peer->flags, &flags, flags | flag));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -221,7 +220,7 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer,
|
||||
*/
|
||||
static inline void ompi_osc_rdma_peer_clear_flag (ompi_osc_rdma_peer_t *peer, int flag)
|
||||
{
|
||||
OPAL_ATOMIC_AND32(&peer->flags, ~flag);
|
||||
OPAL_ATOMIC_AND_FETCH32(&peer->flags, ~flag);
|
||||
opal_atomic_mb ();
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,7 @@ static int request_complete (struct ompi_request_t *request)
|
||||
{
|
||||
ompi_osc_rdma_request_t *parent_request = ((ompi_osc_rdma_request_t *) request)->parent_request;
|
||||
|
||||
if (parent_request && 0 == OPAL_THREAD_ADD32 (&parent_request->outstanding_requests, -1)) {
|
||||
if (parent_request && 0 == OPAL_THREAD_ADD_FETCH32 (&parent_request->outstanding_requests, -1)) {
|
||||
ompi_osc_rdma_request_complete (parent_request, OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -25,7 +25,7 @@ typedef int64_t osc_rdma_base_t;
|
||||
typedef int64_t osc_rdma_size_t;
|
||||
typedef int64_t osc_rdma_counter_t;
|
||||
|
||||
#define ompi_osc_rdma_counter_add opal_atomic_add_64
|
||||
#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_64
|
||||
|
||||
#else
|
||||
|
||||
@ -33,7 +33,7 @@ typedef int32_t osc_rdma_base_t;
|
||||
typedef int32_t osc_rdma_size_t;
|
||||
typedef int32_t osc_rdma_counter_t;
|
||||
|
||||
#define ompi_osc_rdma_counter_add opal_atomic_add_32
|
||||
#define ompi_osc_rdma_counter_add opal_atomic_add_fetch_32
|
||||
|
||||
#endif
|
||||
|
||||
@ -48,18 +48,18 @@ static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value
|
||||
int64_t new;
|
||||
|
||||
opal_atomic_mb ();
|
||||
new = opal_atomic_add_64 (p, value) - value;
|
||||
new = opal_atomic_add_fetch_64 (p, value) - value;
|
||||
opal_atomic_mb ();
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_lock_cmpset (volatile int64_t *p, int64_t comp, int64_t value)
|
||||
static inline int ompi_osc_rdma_lock_compare_exchange (volatile int64_t *p, int64_t *comp, int64_t value)
|
||||
{
|
||||
int ret;
|
||||
|
||||
opal_atomic_mb ();
|
||||
ret = opal_atomic_bool_cmpset_64 (p, comp, value);
|
||||
ret = opal_atomic_compare_exchange_strong_64 (p, comp, value);
|
||||
opal_atomic_mb ();
|
||||
|
||||
return ret;
|
||||
@ -76,19 +76,19 @@ static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value
|
||||
int32_t new;
|
||||
|
||||
opal_atomic_mb ();
|
||||
/* opal_atomic_add_32 differs from normal atomics in that is returns the new value */
|
||||
new = opal_atomic_add_32 (p, value) - value;
|
||||
/* opal_atomic_add_fetch_32 differs from normal atomics in that is returns the new value */
|
||||
new = opal_atomic_add_fetch_32 (p, value) - value;
|
||||
opal_atomic_mb ();
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
static inline int ompi_osc_rdma_lock_cmpset (volatile int32_t *p, int32_t comp, int32_t value)
|
||||
static inline int ompi_osc_rdma_lock_compare_exchange (volatile int32_t *p, int32_t *comp, int32_t value)
|
||||
{
|
||||
int ret;
|
||||
|
||||
opal_atomic_mb ();
|
||||
ret = opal_atomic_bool_cmpset_32 (p, comp, value);
|
||||
ret = opal_atomic_compare_exchange_strong_32 (p, comp, value);
|
||||
opal_atomic_mb ();
|
||||
|
||||
return ret;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
@ -130,10 +130,11 @@ ompi_osc_sm_start(struct ompi_group_t *group,
|
||||
ompi_osc_sm_module_t *module =
|
||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||
int my_rank = ompi_comm_rank (module->comm);
|
||||
void *_tmp_ptr = NULL;
|
||||
|
||||
OBJ_RETAIN(group);
|
||||
|
||||
if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, NULL, group)) {
|
||||
if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) {
|
||||
OBJ_RELEASE(group);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
@ -150,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group,
|
||||
|
||||
for (int i = 0 ; i < size ; ++i) {
|
||||
int rank_byte = ranks[i] >> OSC_SM_POST_BITS;
|
||||
osc_sm_post_type_t old, rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);
|
||||
osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);
|
||||
|
||||
/* wait for rank to post */
|
||||
while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
|
||||
@ -160,9 +161,11 @@ ompi_osc_sm_start(struct ompi_group_t *group,
|
||||
|
||||
opal_atomic_rmb ();
|
||||
|
||||
do {
|
||||
old = module->posts[my_rank][rank_byte];
|
||||
} while (!opal_atomic_bool_cmpset ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit));
|
||||
#if OPAL_HAVE_ATOMIC_MATH_64
|
||||
(void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit);
|
||||
#else
|
||||
(void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit);
|
||||
#endif
|
||||
}
|
||||
|
||||
free (ranks);
|
||||
@ -185,7 +188,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
|
||||
opal_atomic_mb();
|
||||
|
||||
group = module->start_group;
|
||||
if (NULL == group || !OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, group, NULL)) {
|
||||
if (NULL == group || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, &group, NULL)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
@ -198,7 +201,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
|
||||
|
||||
gsize = ompi_group_size(group);
|
||||
for (int i = 0 ; i < gsize ; ++i) {
|
||||
(void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1);
|
||||
(void) opal_atomic_add_fetch_32(&module->node_states[ranks[i]].complete_count, 1);
|
||||
}
|
||||
|
||||
free (ranks);
|
||||
@ -244,7 +247,7 @@ ompi_osc_sm_post(struct ompi_group_t *group,
|
||||
|
||||
gsize = ompi_group_size(module->post_group);
|
||||
for (int i = 0 ; i < gsize ; ++i) {
|
||||
(void) opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
|
||||
opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
|
||||
}
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
@ -26,9 +26,9 @@ lk_fetch_add32(ompi_osc_sm_module_t *module,
|
||||
size_t offset,
|
||||
uint32_t delta)
|
||||
{
|
||||
/* opal_atomic_add_32 is an add then fetch so delta needs to be subtracted out to get the
|
||||
/* opal_atomic_add_fetch_32 is an add then fetch so delta needs to be subtracted out to get the
|
||||
* old value */
|
||||
return opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
|
||||
return opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
|
||||
delta) - delta;
|
||||
}
|
||||
|
||||
@ -39,7 +39,7 @@ lk_add32(ompi_osc_sm_module_t *module,
|
||||
size_t offset,
|
||||
uint32_t delta)
|
||||
{
|
||||
opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
|
||||
opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
|
||||
delta);
|
||||
}
|
||||
|
||||
|
@ -81,7 +81,7 @@ int mca_pml_base_bsend_init(bool thread_safe)
|
||||
{
|
||||
size_t tmp;
|
||||
|
||||
if(OPAL_THREAD_ADD32(&mca_pml_bsend_init, 1) > 1)
|
||||
if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init, 1) > 1)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
/* initialize static objects */
|
||||
@ -109,7 +109,7 @@ int mca_pml_base_bsend_init(bool thread_safe)
|
||||
*/
|
||||
int mca_pml_base_bsend_fini(void)
|
||||
{
|
||||
if(OPAL_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0)
|
||||
if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init,-1) > 0)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
if(NULL != mca_pml_bsend_allocator)
|
||||
|
@ -261,7 +261,7 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
|
||||
*/
|
||||
#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \
|
||||
if (sendreq->req_state == -1) { \
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, 1); \
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, 1); \
|
||||
}
|
||||
|
||||
/* Now check the error state. This request can be in error if the
|
||||
|
@ -328,7 +328,7 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
* protocol has req_state == 0 and as such should not be
|
||||
* decremented.
|
||||
*/
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
|
||||
}
|
||||
|
||||
if(send_request_pml_complete_check(sendreq) == false)
|
||||
|
@ -206,7 +206,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
|
||||
(void *) des->des_remote,
|
||||
des->des_remote_count, 0);
|
||||
}
|
||||
OPAL_THREAD_SUB_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_SUB_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
|
||||
#if PML_BFO
|
||||
btl->btl_free(btl, des);
|
||||
@ -217,7 +217,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
|
||||
#endif /* PML_BFO */
|
||||
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
/* schedule additional rdma operations */
|
||||
@ -388,7 +388,7 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
||||
#endif /* PML_BFO */
|
||||
|
||||
/* is receive request complete */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
recv_request_pml_complete_check(recvreq);
|
||||
|
||||
MCA_PML_BFO_RDMA_FRAG_RETURN(frag);
|
||||
@ -506,7 +506,7 @@ void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq
|
||||
recvreq->req_recv.req_base.req_datatype);
|
||||
);
|
||||
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
/* check completion status */
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
@ -668,7 +668,7 @@ void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq
|
||||
recvreq->req_recv.req_base.req_datatype);
|
||||
);
|
||||
}
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
/* check completion status */
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
@ -903,7 +903,7 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
|
||||
#endif /* PML_BFO */
|
||||
/* update request state */
|
||||
recvreq->req_rdma_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
recvreq->req_rdma[rdma_idx].length -= size;
|
||||
bytes_remaining -= size;
|
||||
} else {
|
||||
|
@ -70,12 +70,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t);
|
||||
|
||||
static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1;
|
||||
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1;
|
||||
}
|
||||
|
||||
static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0;
|
||||
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -207,10 +207,10 @@ mca_pml_bfo_rndv_completion_request( mca_bml_base_btl_t* bml_btl,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
}
|
||||
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
|
||||
/* advance the request */
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
|
||||
@ -287,7 +287,7 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
||||
(void *) des->des_local,
|
||||
des->des_local_count, 0);
|
||||
if (OPAL_LIKELY(0 < req_bytes_delivered)) {
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
}
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
@ -360,8 +360,8 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
|
||||
des->des_local_count,
|
||||
sizeof(mca_pml_bfo_frag_hdr_t));
|
||||
|
||||
OPAL_THREAD_SUB_SIZE_T(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
OPAL_THREAD_SUB_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
|
||||
#if PML_BFO
|
||||
MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
|
||||
@ -1164,7 +1164,7 @@ cannot_pack:
|
||||
range->range_btls[btl_idx].length -= size;
|
||||
range->range_send_length -= size;
|
||||
range->range_send_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1);
|
||||
if(range->range_send_length == 0) {
|
||||
range = get_next_send_range(sendreq, range);
|
||||
prev_bytes_remaining = 0;
|
||||
@ -1226,7 +1226,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
|
||||
#endif /* PML_BFO */
|
||||
|
||||
/* check for request completion */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
|
||||
@ -1335,7 +1335,7 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
|
||||
size_t i, size = 0;
|
||||
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
|
||||
}
|
||||
#if PML_BFO
|
||||
MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq);
|
||||
|
@ -78,12 +78,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t);
|
||||
|
||||
static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1;
|
||||
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1;
|
||||
}
|
||||
|
||||
static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0;
|
||||
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -445,7 +445,7 @@ mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq )
|
||||
sendreq->req_pipeline_depth = 0;
|
||||
sendreq->req_bytes_delivered = 0;
|
||||
sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD_FETCH32(
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
|
||||
#if PML_BFO
|
||||
sendreq->req_restartseq = 0; /* counts up restarts */
|
||||
|
@ -151,7 +151,7 @@ int mca_pml_ob1_isend(const void *buf,
|
||||
}
|
||||
|
||||
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) {
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
|
||||
}
|
||||
|
||||
if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
|
||||
@ -220,7 +220,7 @@ int mca_pml_ob1_send(const void *buf,
|
||||
}
|
||||
|
||||
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) {
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -56,7 +56,7 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void)
|
||||
static int mca_pml_ob1_progress_needed = 0;
|
||||
int mca_pml_ob1_enable_progress(int32_t count)
|
||||
{
|
||||
int32_t progress_count = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count);
|
||||
int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count);
|
||||
if( 1 < progress_count )
|
||||
return 0; /* progress was already on */
|
||||
|
||||
@ -119,7 +119,7 @@ int mca_pml_ob1_progress(void)
|
||||
}
|
||||
|
||||
if( 0 != completed_requests ) {
|
||||
j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests);
|
||||
j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests);
|
||||
if( 0 == j ) {
|
||||
opal_progress_unregister(mca_pml_ob1_progress);
|
||||
}
|
||||
|
@ -445,7 +445,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
* protocol has req_state == 0 and as such should not be
|
||||
* decremented.
|
||||
*/
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
|
||||
}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
|
||||
|
@ -190,7 +190,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, -1);
|
||||
|
||||
assert ((uint64_t) rdma_size == frag->rdma_length);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
@ -198,7 +198,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
|
||||
if (OPAL_LIKELY(0 < rdma_size)) {
|
||||
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, rdma_size);
|
||||
if (recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
/* schedule additional rdma operations */
|
||||
@ -373,7 +373,7 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_
|
||||
}
|
||||
} else {
|
||||
/* is receive request complete */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
/* TODO: re-add order */
|
||||
mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc,
|
||||
bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag,
|
||||
@ -524,7 +524,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
|
||||
recvreq->req_recv.req_base.req_datatype);
|
||||
);
|
||||
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
/* check completion status */
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
@ -601,7 +601,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
|
||||
* known that the data has been copied out of the descriptor. */
|
||||
des->des_cbfunc(NULL, NULL, des, 0);
|
||||
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
|
||||
/* check completion status */
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
@ -815,7 +815,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq
|
||||
recvreq->req_recv.req_base.req_count,
|
||||
recvreq->req_recv.req_base.req_datatype);
|
||||
);
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
}
|
||||
/* check completion status */
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
@ -1024,7 +1024,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
|
||||
/* update request state */
|
||||
recvreq->req_rdma_offset += size;
|
||||
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, 1);
|
||||
recvreq->req_rdma[rdma_idx].length -= size;
|
||||
bytes_remaining -= size;
|
||||
} else {
|
||||
|
@ -64,12 +64,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t);
|
||||
|
||||
static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1;
|
||||
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1;
|
||||
}
|
||||
|
||||
static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0;
|
||||
return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -205,10 +205,10 @@ mca_pml_ob1_rndv_completion_request( mca_bml_base_btl_t* bml_btl,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
}
|
||||
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
|
||||
/* advance the request */
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
|
||||
@ -261,7 +261,7 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length)
|
||||
|
||||
/* count bytes of user data actually delivered and check for request completion */
|
||||
if (OPAL_LIKELY(0 < rdma_length)) {
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
|
||||
}
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
@ -313,8 +313,8 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
|
||||
des->des_segment_count,
|
||||
sizeof(mca_pml_ob1_frag_hdr_t));
|
||||
|
||||
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1);
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, -1);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
|
||||
if(send_request_pml_complete_check(sendreq) == false) {
|
||||
mca_pml_ob1_send_request_schedule(sendreq);
|
||||
@ -1044,7 +1044,7 @@ cannot_pack:
|
||||
range->range_btls[btl_idx].length -= size;
|
||||
range->range_send_length -= size;
|
||||
range->range_send_offset += size;
|
||||
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1);
|
||||
if(range->range_send_length == 0) {
|
||||
range = get_next_send_range(sendreq, range);
|
||||
prev_bytes_remaining = 0;
|
||||
@ -1060,7 +1060,7 @@ cannot_pack:
|
||||
range->range_btls[btl_idx].length -= size;
|
||||
range->range_send_length -= size;
|
||||
range->range_send_offset += size;
|
||||
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1);
|
||||
if(range->range_send_length == 0) {
|
||||
range = get_next_send_range(sendreq, range);
|
||||
prev_bytes_remaining = 0;
|
||||
@ -1126,7 +1126,7 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b
|
||||
0, 0);
|
||||
|
||||
/* check for request completion */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
} else {
|
||||
@ -1200,7 +1200,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
|
||||
mca_pml_ob1_rdma_frag_t* frag;
|
||||
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) {
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
|
||||
}
|
||||
|
||||
sendreq->req_recv.pval = hdr->hdr_recv_req.pval;
|
||||
|
@ -76,12 +76,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_range_t);
|
||||
|
||||
static inline bool lock_send_request(mca_pml_ob1_send_request_t *sendreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1;
|
||||
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1;
|
||||
}
|
||||
|
||||
static inline bool unlock_send_request(mca_pml_ob1_send_request_t *sendreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0;
|
||||
return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -485,7 +485,7 @@ mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
||||
seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
|
||||
seqn = OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
|
||||
|
||||
return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn);
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
@ -100,6 +100,8 @@ int ompi_request_default_wait_any(size_t count,
|
||||
|
||||
num_requests_null_inactive = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
void *_tmp_ptr = REQUEST_PENDING;
|
||||
|
||||
request = requests[i];
|
||||
|
||||
/* Check for null or completed persistent request. For
|
||||
@ -110,7 +112,7 @@ int ompi_request_default_wait_any(size_t count,
|
||||
continue;
|
||||
}
|
||||
|
||||
if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) {
|
||||
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync) ) {
|
||||
assert(REQUEST_COMPLETE(request));
|
||||
completed = i;
|
||||
*index = i;
|
||||
@ -136,6 +138,8 @@ int ompi_request_default_wait_any(size_t count,
|
||||
* user.
|
||||
*/
|
||||
for(i = completed-1; (i+1) > 0; i--) {
|
||||
void *tmp_ptr = &sync;
|
||||
|
||||
request = requests[i];
|
||||
|
||||
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
||||
@ -146,7 +150,7 @@ int ompi_request_default_wait_any(size_t count,
|
||||
* Otherwise, the request has been completed meanwhile, and it
|
||||
* has been atomically marked as REQUEST_COMPLETE.
|
||||
*/
|
||||
if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
|
||||
if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &tmp_ptr, REQUEST_PENDING) ) {
|
||||
*index = i;
|
||||
}
|
||||
}
|
||||
@ -211,6 +215,8 @@ int ompi_request_default_wait_all( size_t count,
|
||||
WAIT_SYNC_INIT(&sync, count);
|
||||
rptr = requests;
|
||||
for (i = 0; i < count; i++) {
|
||||
void *_tmp_ptr = REQUEST_PENDING;
|
||||
|
||||
request = *rptr++;
|
||||
|
||||
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
||||
@ -218,7 +224,7 @@ int ompi_request_default_wait_all( size_t count,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) {
|
||||
if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) {
|
||||
if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) {
|
||||
failed++;
|
||||
}
|
||||
@ -246,6 +252,8 @@ int ompi_request_default_wait_all( size_t count,
|
||||
if (MPI_STATUSES_IGNORE != statuses) {
|
||||
/* fill out status and free request if required */
|
||||
for( i = 0; i < count; i++, rptr++ ) {
|
||||
void *_tmp_ptr = &sync;
|
||||
|
||||
request = *rptr;
|
||||
|
||||
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
||||
@ -260,7 +268,7 @@ int ompi_request_default_wait_all( size_t count,
|
||||
* mark the request as pending then it is neither failed nor complete, and
|
||||
* we must stop altering it.
|
||||
*/
|
||||
if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) {
|
||||
if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) {
|
||||
/*
|
||||
* Per MPI 2.2 p 60:
|
||||
* Allows requests to be marked as MPI_ERR_PENDING if they are
|
||||
@ -306,6 +314,8 @@ int ompi_request_default_wait_all( size_t count,
|
||||
int rc;
|
||||
/* free request if required */
|
||||
for( i = 0; i < count; i++, rptr++ ) {
|
||||
void *_tmp_ptr = &sync;
|
||||
|
||||
request = *rptr;
|
||||
|
||||
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
||||
@ -320,7 +330,7 @@ int ompi_request_default_wait_all( size_t count,
|
||||
/* If the request is still pending due to a failed request
|
||||
* then skip it in this loop.
|
||||
*/
|
||||
if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) {
|
||||
if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) {
|
||||
/*
|
||||
* Per MPI 2.2 p 60:
|
||||
* Allows requests to be marked as MPI_ERR_PENDING if they are
|
||||
@ -398,6 +408,8 @@ int ompi_request_default_wait_some(size_t count,
|
||||
num_requests_null_inactive = 0;
|
||||
num_requests_done = 0;
|
||||
for (size_t i = 0; i < count; i++, rptr++) {
|
||||
void *_tmp_ptr = REQUEST_PENDING;
|
||||
|
||||
request = *rptr;
|
||||
/*
|
||||
* Check for null or completed persistent request.
|
||||
@ -407,7 +419,7 @@ int ompi_request_default_wait_some(size_t count,
|
||||
num_requests_null_inactive++;
|
||||
continue;
|
||||
}
|
||||
indices[i] = OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync);
|
||||
indices[i] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync);
|
||||
if( !indices[i] ) {
|
||||
/* If the request is completed go ahead and mark it as such */
|
||||
assert( REQUEST_COMPLETE(request) );
|
||||
@ -434,6 +446,8 @@ int ompi_request_default_wait_some(size_t count,
|
||||
rptr = requests;
|
||||
num_requests_done = 0;
|
||||
for (size_t i = 0; i < count; i++, rptr++) {
|
||||
void *_tmp_ptr = &sync;
|
||||
|
||||
request = *rptr;
|
||||
|
||||
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
||||
@ -454,7 +468,7 @@ int ompi_request_default_wait_some(size_t count,
|
||||
*/
|
||||
if( !indices[i] ){
|
||||
indices[num_requests_done++] = i;
|
||||
} else if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
|
||||
} else if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING) ) {
|
||||
indices[num_requests_done++] = i;
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -396,10 +396,12 @@ static inline int ompi_request_free(ompi_request_t** request)
|
||||
static inline void ompi_request_wait_completion(ompi_request_t *req)
|
||||
{
|
||||
if (opal_using_threads () && !REQUEST_COMPLETE(req)) {
|
||||
void *_tmp_ptr = REQUEST_PENDING;
|
||||
ompi_wait_sync_t sync;
|
||||
|
||||
WAIT_SYNC_INIT(&sync, 1);
|
||||
|
||||
if (OPAL_ATOMIC_BOOL_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
|
||||
if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) {
|
||||
SYNC_WAIT(&sync);
|
||||
} else {
|
||||
/* completed before we had a chance to swap in the sync object */
|
||||
@ -439,7 +441,9 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
|
||||
|
||||
if (0 == rc) {
|
||||
if( OPAL_LIKELY(with_signal) ) {
|
||||
if(!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) {
|
||||
void *_tmp_ptr = REQUEST_PENDING;
|
||||
|
||||
if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) {
|
||||
ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete,
|
||||
REQUEST_COMPLETED);
|
||||
/* In the case where another thread concurrently changed the request to REQUEST_PENDING */
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* reseved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -76,7 +76,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
|
||||
return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost;
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_128
|
||||
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
|
||||
|
||||
/* Add one element to the FIFO. We will return the last head of the list
|
||||
* to allow the upper level to detect if this element is the first one in the
|
||||
@ -85,14 +85,12 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
|
||||
static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
|
||||
opal_list_item_t *item)
|
||||
{
|
||||
opal_counted_pointer_t tail;
|
||||
opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value};
|
||||
|
||||
item->opal_list_next = &fifo->opal_fifo_ghost;
|
||||
|
||||
do {
|
||||
tail.value = fifo->opal_fifo_tail.value;
|
||||
|
||||
if (opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, item)) {
|
||||
if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
@ -102,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
|
||||
if (&fifo->opal_fifo_ghost == tail.data.item) {
|
||||
/* update the head */
|
||||
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value};
|
||||
opal_update_counted_pointer (&fifo->opal_fifo_head, head, item);
|
||||
opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item);
|
||||
} else {
|
||||
/* update previous item */
|
||||
tail.data.item->opal_list_next = item;
|
||||
@ -116,29 +114,28 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
|
||||
*/
|
||||
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
{
|
||||
opal_list_item_t *item, *next;
|
||||
opal_counted_pointer_t head, tail;
|
||||
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
|
||||
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail;
|
||||
|
||||
do {
|
||||
head.value = fifo->opal_fifo_head.value;
|
||||
tail.value = fifo->opal_fifo_tail.value;
|
||||
opal_atomic_rmb ();
|
||||
|
||||
item = (opal_list_item_t *) head.data.item;
|
||||
next = (opal_list_item_t *) item->opal_list_next;
|
||||
|
||||
if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) {
|
||||
if (ghost == tail.data.item && ghost == item) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* the head or next pointer are in an inconsistent state. keep looping. */
|
||||
if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item &&
|
||||
&fifo->opal_fifo_ghost == next) {
|
||||
if (tail.data.item != item && ghost != tail.data.item && ghost == next) {
|
||||
head.value = fifo->opal_fifo_head.value;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* try popping the head */
|
||||
if (opal_update_counted_pointer (&fifo->opal_fifo_head, head, next)) {
|
||||
if (opal_update_counted_pointer (&fifo->opal_fifo_head, &head, next)) {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
@ -146,14 +143,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
opal_atomic_wmb ();
|
||||
|
||||
/* check for tail and head consistency */
|
||||
if (&fifo->opal_fifo_ghost == next) {
|
||||
if (ghost == next) {
|
||||
/* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */
|
||||
if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) {
|
||||
if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, ghost)) {
|
||||
/* tail was changed by a push operation. wait for the item's next pointer to be se then
|
||||
* update the head */
|
||||
|
||||
/* wait for next pointer to be updated by push */
|
||||
while (&fifo->opal_fifo_ghost == item->opal_list_next) {
|
||||
while (ghost == item->opal_list_next) {
|
||||
opal_atomic_rmb ();
|
||||
}
|
||||
|
||||
@ -166,7 +163,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
head.value = fifo->opal_fifo_head.value;
|
||||
next = (opal_list_item_t *) item->opal_list_next;
|
||||
|
||||
assert (&fifo->opal_fifo_ghost == head.data.item);
|
||||
assert (ghost == head.data.item);
|
||||
|
||||
fifo->opal_fifo_head.data.item = next;
|
||||
opal_atomic_wmb ();
|
||||
@ -215,14 +212,14 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
|
||||
*/
|
||||
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
{
|
||||
opal_list_item_t *item, *next;
|
||||
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_LLSC_PTR
|
||||
/* use load-linked store-conditional to avoid ABA issues */
|
||||
do {
|
||||
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
|
||||
if (&fifo->opal_fifo_ghost == item) {
|
||||
if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) {
|
||||
if (ghost == item) {
|
||||
if (ghost == fifo->opal_fifo_tail.data.item) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -239,7 +236,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
#else
|
||||
/* protect against ABA issues by "locking" the head */
|
||||
do {
|
||||
if (opal_atomic_bool_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) {
|
||||
if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -249,7 +246,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
opal_atomic_wmb();
|
||||
|
||||
item = opal_fifo_head (fifo);
|
||||
if (&fifo->opal_fifo_ghost == item) {
|
||||
if (ghost == item) {
|
||||
fifo->opal_fifo_head.data.counter = 0;
|
||||
return NULL;
|
||||
}
|
||||
@ -258,9 +255,11 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
fifo->opal_fifo_head.data.item = next;
|
||||
#endif
|
||||
|
||||
if (&fifo->opal_fifo_ghost == next) {
|
||||
if (!opal_atomic_bool_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) {
|
||||
while (&fifo->opal_fifo_ghost == item->opal_list_next) {
|
||||
if (ghost == next) {
|
||||
void *tmp = item;
|
||||
|
||||
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) {
|
||||
while (ghost == item->opal_list_next) {
|
||||
opal_atomic_rmb ();
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* reseved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -36,8 +36,8 @@
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* NTH: temporarily suppress warnings about this not being defined */
|
||||
#if !defined(OPAL_HAVE_ATOMIC_CMPSET_128)
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_128 0
|
||||
#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128)
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
@ -50,7 +50,7 @@ union opal_counted_pointer_t {
|
||||
/** list item pointer */
|
||||
volatile opal_list_item_t * volatile item;
|
||||
} data;
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_128 && HAVE_OPAL_INT128_T
|
||||
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T
|
||||
/** used for atomics when there is a cmpset that can operate on
|
||||
* two 64-bit values */
|
||||
opal_int128_t value;
|
||||
@ -59,19 +59,19 @@ union opal_counted_pointer_t {
|
||||
typedef union opal_counted_pointer_t opal_counted_pointer_t;
|
||||
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_128
|
||||
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
|
||||
|
||||
/* Add one element to the FIFO. We will return the last head of the list
|
||||
* to allow the upper level to detect if this element is the first one in the
|
||||
* list (if the list was empty before this operation).
|
||||
*/
|
||||
static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t old,
|
||||
static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *old,
|
||||
opal_list_item_t *item)
|
||||
{
|
||||
opal_counted_pointer_t new_p;
|
||||
new_p.data.item = item;
|
||||
new_p.data.counter = old.data.counter + 1;
|
||||
return opal_atomic_bool_cmpset_128 (&addr->value, old.value, new_p.value);
|
||||
new_p.data.counter = old->data.counter + 1;
|
||||
return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -110,7 +110,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
|
||||
}
|
||||
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_128
|
||||
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
|
||||
|
||||
/* Add one element to the LIFO. We will return the last head of the list
|
||||
* to allow the upper level to detect if this element is the first one in the
|
||||
@ -119,14 +119,14 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
|
||||
static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
|
||||
opal_list_item_t *item)
|
||||
{
|
||||
do {
|
||||
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
|
||||
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
|
||||
|
||||
do {
|
||||
item->opal_list_next = next;
|
||||
opal_atomic_wmb ();
|
||||
|
||||
/* to protect against ABA issues it is sufficient to only update the counter in pop */
|
||||
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) {
|
||||
if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) {
|
||||
return next;
|
||||
}
|
||||
/* DO some kind of pause to release the bus */
|
||||
@ -141,17 +141,17 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
opal_counted_pointer_t old_head;
|
||||
opal_list_item_t *item;
|
||||
|
||||
old_head.data.counter = lifo->opal_lifo_head.data.counter;
|
||||
opal_atomic_rmb ();
|
||||
old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
|
||||
|
||||
do {
|
||||
|
||||
old_head.data.counter = lifo->opal_lifo_head.data.counter;
|
||||
opal_atomic_rmb ();
|
||||
old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item;
|
||||
|
||||
item = (opal_list_item_t *) old_head.data.item;
|
||||
if (item == &lifo->opal_lifo_ghost) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (opal_update_counted_pointer (&lifo->opal_lifo_head, old_head,
|
||||
if (opal_update_counted_pointer (&lifo->opal_lifo_head, &old_head,
|
||||
(opal_list_item_t *) item->opal_list_next)) {
|
||||
opal_atomic_wmb ();
|
||||
item->opal_list_next = NULL;
|
||||
@ -169,13 +169,15 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
|
||||
opal_list_item_t *item)
|
||||
{
|
||||
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
|
||||
|
||||
/* item free acts as a mini lock to avoid ABA problems */
|
||||
item->item_free = 1;
|
||||
|
||||
do {
|
||||
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
|
||||
item->opal_list_next = next;
|
||||
opal_atomic_wmb();
|
||||
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) {
|
||||
if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) {
|
||||
opal_atomic_wmb ();
|
||||
/* now safe to pop this item */
|
||||
item->item_free = 0;
|
||||
@ -236,8 +238,11 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
*/
|
||||
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
while ((item = (opal_list_item_t *) lifo->opal_lifo_head.data.item) != &lifo->opal_lifo_ghost) {
|
||||
opal_list_item_t *item, *head, *ghost = &lifo->opal_lifo_ghost;
|
||||
|
||||
item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
|
||||
|
||||
while (item != ghost) {
|
||||
/* ensure it is safe to pop the head */
|
||||
if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) {
|
||||
continue;
|
||||
@ -245,14 +250,16 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
||||
head = item;
|
||||
/* try to swap out the head pointer */
|
||||
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, item,
|
||||
(void *) item->opal_list_next)) {
|
||||
if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &head,
|
||||
(void *) item->opal_list_next)) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* NTH: don't need another atomic here */
|
||||
item->item_free = 0;
|
||||
item = head;
|
||||
|
||||
/* Do some kind of pause to release the bus */
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx)
|
||||
/* Spot check: ensure this item is only on the list that we
|
||||
just insertted it into */
|
||||
|
||||
(void)opal_atomic_add( &(item->opal_list_item_refcount), 1 );
|
||||
opal_atomic_add ( &(item->opal_list_item_refcount), 1 );
|
||||
assert(1 == item->opal_list_item_refcount);
|
||||
item->opal_list_item_belong_to = list;
|
||||
#endif
|
||||
|
@ -509,7 +509,7 @@ static inline opal_list_item_t *opal_list_remove_item
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
/* Spot check: ensure that this item is still only on one list */
|
||||
|
||||
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 );
|
||||
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
|
||||
assert(0 == item->opal_list_item_refcount);
|
||||
item->opal_list_item_belong_to = NULL;
|
||||
#endif
|
||||
@ -575,7 +575,7 @@ static inline void _opal_list_append(opal_list_t *list, opal_list_item_t *item
|
||||
/* Spot check: ensure this item is only on the list that we just
|
||||
appended it to */
|
||||
|
||||
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 );
|
||||
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
|
||||
assert(1 == item->opal_list_item_refcount);
|
||||
item->opal_list_item_belong_to = list;
|
||||
#endif
|
||||
@ -625,7 +625,7 @@ static inline void opal_list_prepend(opal_list_t *list,
|
||||
/* Spot check: ensure this item is only on the list that we just
|
||||
prepended it to */
|
||||
|
||||
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 );
|
||||
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
|
||||
assert(1 == item->opal_list_item_refcount);
|
||||
item->opal_list_item_belong_to = list;
|
||||
#endif
|
||||
@ -686,7 +686,7 @@ static inline opal_list_item_t *opal_list_remove_first(opal_list_t *list)
|
||||
/* Spot check: ensure that the item we're returning is now on no
|
||||
lists */
|
||||
|
||||
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 );
|
||||
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
|
||||
assert(0 == item->opal_list_item_refcount);
|
||||
#endif
|
||||
|
||||
@ -746,7 +746,7 @@ static inline opal_list_item_t *opal_list_remove_last(opal_list_t *list)
|
||||
/* Spot check: ensure that the item we're returning is now on no
|
||||
lists */
|
||||
|
||||
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 );
|
||||
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
|
||||
assert(0 == item->opal_list_item_refcount);
|
||||
item->opal_list_item_belong_to = NULL;
|
||||
#endif
|
||||
@ -789,7 +789,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos
|
||||
/* Spot check: double check that this item is only on the list
|
||||
that we just added it to */
|
||||
|
||||
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 );
|
||||
OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
|
||||
assert(1 == item->opal_list_item_refcount);
|
||||
item->opal_list_item_belong_to = list;
|
||||
#endif
|
||||
|
@ -510,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
|
||||
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
|
||||
static inline int opal_obj_update(opal_object_t *object, int inc)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&object->obj_reference_count, inc);
|
||||
return OPAL_THREAD_ADD_FETCH32(&object->obj_reference_count, inc);
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
@ -210,7 +210,7 @@ void opal_tree_add_child(opal_tree_item_t *parent_item,
|
||||
/* Spot check: ensure this item is only on the list that we just
|
||||
appended it to */
|
||||
|
||||
OPAL_THREAD_ADD32( &(new_item->opal_tree_item_refcount), 1 );
|
||||
OPAL_THREAD_ADD_FETCH32( &(new_item->opal_tree_item_refcount), 1 );
|
||||
assert(1 == new_item->opal_tree_item_refcount);
|
||||
new_item->opal_tree_item_belong_to = new_item->opal_tree_container;
|
||||
#endif
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -11,6 +12,8 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2010 ARM ltd. All rights reserved.
|
||||
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -104,12 +107,12 @@ void opal_atomic_isync(void)
|
||||
|
||||
#if (OPAL_GCC_INLINE_ASSEMBLY && (OPAL_ASM_ARM_VERSION >= 6))
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 1
|
||||
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
int32_t ret, tmp;
|
||||
int32_t prev, tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"1: ldrex %0, [%2] \n"
|
||||
@ -120,11 +123,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
" bne 1b \n"
|
||||
"2: \n"
|
||||
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
@ -132,51 +137,50 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
bool rc;
|
||||
|
||||
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
|
||||
}
|
||||
|
||||
#if (OPAL_ASM_SUPPORT_64BIT == 1)
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
int64_t ret;
|
||||
int tmp;
|
||||
int64_t prev;
|
||||
int tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"1: ldrexd %0, %H0, [%2] \n"
|
||||
" cmp %0, %3 \n"
|
||||
" it eq \n"
|
||||
" cmpeq %H0, %H3 \n"
|
||||
" bne 2f \n"
|
||||
" strexd %1, %4, %H4, [%2] \n"
|
||||
" cmp %1, #0 \n"
|
||||
" bne 1b \n"
|
||||
"2: \n"
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"1: ldrexd %0, %H0, [%2] \n"
|
||||
" cmp %0, %3 \n"
|
||||
" it eq \n"
|
||||
" cmpeq %H0, %H3 \n"
|
||||
" bne 2f \n"
|
||||
" strexd %1, %4, %H4, [%2] \n"
|
||||
" cmp %1, #0 \n"
|
||||
" bne 1b \n"
|
||||
"2: \n"
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
@ -184,91 +188,65 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
bool rc;
|
||||
|
||||
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_ADD_32 1
|
||||
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc)
|
||||
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int inc)
|
||||
{
|
||||
int32_t t;
|
||||
int tmp;
|
||||
int32_t t, old;
|
||||
int tmp;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldrex %0, [%2] \n"
|
||||
" add %0, %0, %3 \n"
|
||||
" strex %1, %0, [%2] \n"
|
||||
" cmp %1, #0 \n"
|
||||
__asm__ __volatile__(
|
||||
"1: ldrex %1, [%3] \n"
|
||||
" add %0, %1, %4 \n"
|
||||
" strex %2, %0, [%3] \n"
|
||||
" cmp %2, #0 \n"
|
||||
" bne 1b \n"
|
||||
|
||||
: "=&r" (t), "=&r" (tmp)
|
||||
: "=&r" (t), "=&r" (old), "=&r" (tmp)
|
||||
: "r" (v), "r" (inc)
|
||||
: "cc", "memory");
|
||||
|
||||
|
||||
return t;
|
||||
return old;
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_SUB_32 1
|
||||
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec)
|
||||
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int dec)
|
||||
{
|
||||
int32_t t;
|
||||
int tmp;
|
||||
int32_t t, old;
|
||||
int tmp;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldrex %0, [%2] \n"
|
||||
" sub %0, %0, %3 \n"
|
||||
" strex %1, %0, [%2] \n"
|
||||
" cmp %1, #0 \n"
|
||||
__asm__ __volatile__(
|
||||
"1: ldrex %1, [%3] \n"
|
||||
" sub %0, %1, %4 \n"
|
||||
" strex %2, %0, [%3] \n"
|
||||
" cmp %2, #0 \n"
|
||||
" bne 1b \n"
|
||||
|
||||
: "=&r" (t), "=&r" (tmp)
|
||||
: "=&r" (t), "=&r" (old), "=&r" (tmp)
|
||||
: "r" (v), "r" (dec)
|
||||
: "cc", "memory");
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
#else /* OPAL_ASM_ARM_VERSION <=5 or no GCC inline assembly */
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define __kuser_cmpxchg (*((int (*)(int, int, volatile int*))(0xffff0fc0)))
|
||||
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
return !(__kuser_cmpxchg(oldval, newval, addr));
|
||||
}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
/* kernel function includes all necessary memory barriers */
|
||||
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
/* kernel function includes all necessary memory barriers */
|
||||
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
return t;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -29,10 +29,10 @@
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_32 1
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_32 1
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 1
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_64 1
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_64 1
|
||||
#define OPAL_HAVE_ATOMIC_ADD_32 1
|
||||
@ -82,10 +82,10 @@ static inline void opal_atomic_isync (void)
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
int32_t ret, tmp;
|
||||
int32_t prev, tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
|
||||
" cmp %w0, %w3 \n"
|
||||
@ -93,11 +93,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
" stxr %w1, %w4, [%2] \n"
|
||||
" cbnz %w1, 1b \n"
|
||||
"2: \n"
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
|
||||
@ -119,10 +121,10 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
int32_t ret, tmp;
|
||||
int32_t prev, tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
|
||||
" cmp %w0, %w3 \n"
|
||||
@ -130,18 +132,20 @@ static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
|
||||
" stxr %w1, %w4, [%2] \n"
|
||||
" cbnz %w1, 1b \n"
|
||||
"2: \n"
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
int32_t ret, tmp;
|
||||
int32_t prev, tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ ("1: ldxr %w0, [%2] \n"
|
||||
" cmp %w0, %w3 \n"
|
||||
@ -149,11 +153,13 @@ static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
|
||||
" stlxr %w1, %w4, [%2] \n"
|
||||
" cbnz %w1, 1b \n"
|
||||
"2: \n"
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
|
||||
@ -179,11 +185,11 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
int64_t ret;
|
||||
int64_t prev;
|
||||
int tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
|
||||
" cmp %0, %3 \n"
|
||||
@ -191,11 +197,13 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
|
||||
" stxr %w1, %4, [%2] \n"
|
||||
" cbnz %w1, 1b \n"
|
||||
"2: \n"
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval)
|
||||
@ -218,11 +226,11 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
int64_t ret;
|
||||
int64_t prev;
|
||||
int tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
|
||||
" cmp %0, %3 \n"
|
||||
@ -230,19 +238,21 @@ static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
|
||||
" stxr %w1, %4, [%2] \n"
|
||||
" cbnz %w1, 1b \n"
|
||||
"2: \n"
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
int64_t ret;
|
||||
int64_t prev;
|
||||
int tmp;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ ("1: ldxr %0, [%2] \n"
|
||||
" cmp %0, %3 \n"
|
||||
@ -250,11 +260,13 @@ static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
|
||||
" stlxr %w1, %4, [%2] \n"
|
||||
" cbnz %w1, 1b \n"
|
||||
"2: \n"
|
||||
: "=&r" (ret), "=&r" (tmp)
|
||||
: "r" (addr), "r" (oldval), "r" (newval)
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (addr), "r" (*oldval), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
|
||||
@ -281,20 +293,20 @@ static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
|
||||
}
|
||||
|
||||
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
|
||||
static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \
|
||||
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
|
||||
{ \
|
||||
type newval; \
|
||||
type newval, old; \
|
||||
int32_t tmp; \
|
||||
\
|
||||
__asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \
|
||||
" " inst " %" reg "0, %" reg "0, %" reg "3 \n" \
|
||||
" stxr %w1, %" reg "0, [%2] \n" \
|
||||
" cbnz %w1, 1b \n" \
|
||||
: "=&r" (newval), "=&r" (tmp) \
|
||||
__asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \
|
||||
" " inst " %" reg "0, %" reg "1, %" reg "4 \n" \
|
||||
" stxr %w2, %" reg "0, [%3] \n" \
|
||||
" cbnz %w2, 1b \n" \
|
||||
: "=&r" (newval), "=&r" (old), "=&r" (tmp) \
|
||||
: "r" (addr), "r" (value) \
|
||||
: "cc", "memory"); \
|
||||
\
|
||||
return newval; \
|
||||
return old; \
|
||||
}
|
||||
|
||||
OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w")
|
||||
|
@ -40,11 +40,11 @@
|
||||
*
|
||||
* - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers
|
||||
* - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
|
||||
* - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly"
|
||||
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly"
|
||||
* - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/compare-exchange can be done "atomicly"
|
||||
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/compare-exchange can be done "atomicly"
|
||||
*
|
||||
* Note that for the Atomic math, atomic add/sub may be implemented as
|
||||
* C code using opal_atomic_bool_cmpset. The appearance of atomic
|
||||
* C code using opal_atomic_compare_exchange. The appearance of atomic
|
||||
* operation will be upheld in these cases.
|
||||
*/
|
||||
|
||||
@ -107,8 +107,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
|
||||
*********************************************************************/
|
||||
#if !OPAL_GCC_INLINE_ASSEMBLY
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 0
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 0
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_AND_32 0
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_OR_32 0
|
||||
@ -123,8 +123,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0
|
||||
#else
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 1
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_AND_32 1
|
||||
#define OPAL_HAVE_INLINE_ATOMIC_OR_32 1
|
||||
@ -187,14 +187,14 @@ enum {
|
||||
/* compare and set operations can't really be emulated from software,
|
||||
so if these defines aren't already set, they should be set to 0
|
||||
now */
|
||||
#ifndef OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 0
|
||||
#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0
|
||||
#endif
|
||||
#ifndef OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 0
|
||||
#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0
|
||||
#endif
|
||||
#ifndef OPAL_HAVE_ATOMIC_CMPSET_128
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_128 0
|
||||
#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0
|
||||
#endif
|
||||
#ifndef OPAL_HAVE_ATOMIC_LLSC_32
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_32 0
|
||||
@ -270,7 +270,7 @@ void opal_atomic_wmb(void);
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic spinlocks - always inlined, if have atomic cmpset
|
||||
* Atomic spinlocks - always inlined, if have atomic compare-and-swap
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
@ -280,7 +280,7 @@ void opal_atomic_wmb(void);
|
||||
#define OPAL_HAVE_ATOMIC_SPINLOCKS 0
|
||||
#endif
|
||||
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
|
||||
|
||||
/**
|
||||
* Initialize a lock to value
|
||||
@ -330,7 +330,7 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock);
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_SPINLOCKS == 0
|
||||
#undef OPAL_HAVE_ATOMIC_SPINLOCKS
|
||||
#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
|
||||
#define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1
|
||||
#endif
|
||||
|
||||
@ -347,48 +347,48 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock);
|
||||
#endif
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
static inline
|
||||
#endif
|
||||
bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, int32_t oldval,
|
||||
int32_t newval);
|
||||
bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval,
|
||||
int32_t newval);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
static inline
|
||||
#endif
|
||||
bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, int32_t oldval,
|
||||
int32_t newval);
|
||||
bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval,
|
||||
int32_t newval);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
static inline
|
||||
#endif
|
||||
bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval,
|
||||
int32_t newval);
|
||||
bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval,
|
||||
int32_t newval);
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_CMPSET_64) && !defined(DOXYGEN)
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 0
|
||||
#if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN)
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0
|
||||
#endif
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
static inline
|
||||
#endif
|
||||
bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, int64_t oldval,
|
||||
int64_t newval);
|
||||
bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval,
|
||||
int64_t newval);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
static inline
|
||||
#endif
|
||||
bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, int64_t oldval,
|
||||
int64_t newval);
|
||||
bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval,
|
||||
int64_t newval);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
static inline
|
||||
#endif
|
||||
bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval,
|
||||
int64_t newval);
|
||||
bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval,
|
||||
int64_t newval);
|
||||
|
||||
#endif
|
||||
|
||||
@ -397,45 +397,25 @@ bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval,
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 0
|
||||
#endif
|
||||
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
|
||||
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides
|
||||
a static inline version of it (in assembly). If we have to fall
|
||||
back on cmpset 32, that too will be inline. */
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
|
||||
static inline
|
||||
#endif
|
||||
int32_t opal_atomic_add_32(volatile int32_t *addr, int delta);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
|
||||
static inline
|
||||
#endif
|
||||
int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
|
||||
static inline
|
||||
#endif
|
||||
int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
|
||||
static inline
|
||||
#endif
|
||||
int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value);
|
||||
|
||||
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides
|
||||
a static inline version of it (in assembly). If we have to fall
|
||||
back to cmpset 32, that too will be inline. */
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
|
||||
static inline
|
||||
#endif
|
||||
int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta);
|
||||
static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta);
|
||||
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int delta);
|
||||
static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value);
|
||||
static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value);
|
||||
static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value);
|
||||
static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value);
|
||||
static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value);
|
||||
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value);
|
||||
static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta);
|
||||
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int delta);
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */
|
||||
|
||||
#if ! OPAL_HAVE_ATOMIC_MATH_32
|
||||
/* fix up the value of opal_have_atomic_math_32 to allow for C versions */
|
||||
#undef OPAL_HAVE_ATOMIC_MATH_32
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
#endif
|
||||
|
||||
#ifndef OPAL_HAVE_ATOMIC_MATH_64
|
||||
@ -443,45 +423,24 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta);
|
||||
#define OPAL_HAVE_ATOMIC_MATH_64 0
|
||||
#endif
|
||||
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
|
||||
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides
|
||||
a static inline version of it (in assembly). If we have to fall
|
||||
back to cmpset 64, that too will be inline */
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
static inline
|
||||
#endif
|
||||
int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
static inline
|
||||
#endif
|
||||
int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
static inline
|
||||
#endif
|
||||
int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value);
|
||||
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
static inline
|
||||
#endif
|
||||
int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value);
|
||||
|
||||
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides
|
||||
a static inline version of it (in assembly). If we have to fall
|
||||
back to cmpset 64, that too will be inline */
|
||||
#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
static inline
|
||||
#endif
|
||||
int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta);
|
||||
static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta);
|
||||
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta);
|
||||
static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value);
|
||||
static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value);
|
||||
static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value);
|
||||
static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value);
|
||||
static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value);
|
||||
static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta);
|
||||
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta);
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */
|
||||
|
||||
#if ! OPAL_HAVE_ATOMIC_MATH_64
|
||||
/* fix up the value of opal_have_atomic_math_64 to allow for C versions */
|
||||
#undef OPAL_HAVE_ATOMIC_MATH_64
|
||||
#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
#endif
|
||||
|
||||
/* provide a size_t add/subtract. When in debug mode, make it an
|
||||
@ -491,114 +450,141 @@ int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta);
|
||||
*/
|
||||
#if defined(DOXYGEN) || OPAL_ENABLE_DEBUG
|
||||
static inline size_t
|
||||
opal_atomic_add_size_t(volatile size_t *addr, size_t delta)
|
||||
opal_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta)
|
||||
{
|
||||
#if SIZEOF_SIZE_T == 4
|
||||
return (size_t) opal_atomic_add_32((int32_t*) addr, delta);
|
||||
return (size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta);
|
||||
#elif SIZEOF_SIZE_T == 8
|
||||
return (size_t) opal_atomic_add_64((int64_t*) addr, delta);
|
||||
return (size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta);
|
||||
#else
|
||||
#error "Unknown size_t size"
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
opal_atomic_sub_size_t(volatile size_t *addr, size_t delta)
|
||||
opal_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta)
|
||||
{
|
||||
#if SIZEOF_SIZE_T == 4
|
||||
return (size_t) opal_atomic_sub_32((int32_t*) addr, delta);
|
||||
return (size_t) opal_atomic_fetch_add_32((int32_t*) addr, delta);
|
||||
#elif SIZEOF_SIZE_T == 8
|
||||
return (size_t) opal_atomic_sub_64((int64_t*) addr, delta);
|
||||
return (size_t) opal_atomic_fetch_add_64((int64_t*) addr, delta);
|
||||
#else
|
||||
#error "Unknown size_t size"
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta)
|
||||
{
|
||||
#if SIZEOF_SIZE_T == 4
|
||||
return (size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta);
|
||||
#elif SIZEOF_SIZE_T == 8
|
||||
return (size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta);
|
||||
#else
|
||||
#error "Unknown size_t size"
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
opal_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta)
|
||||
{
|
||||
#if SIZEOF_SIZE_T == 4
|
||||
return (size_t) opal_atomic_fetch_sub_32((int32_t*) addr, delta);
|
||||
#elif SIZEOF_SIZE_T == 8
|
||||
return (size_t) opal_atomic_fetch_sub_64((int64_t*) addr, delta);
|
||||
#else
|
||||
#error "Unknown size_t size"
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
#if SIZEOF_SIZE_T == 4
|
||||
#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_32((int32_t*) addr, delta))
|
||||
#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_32((int32_t*) addr, delta))
|
||||
#elif SIZEOF_SIZE_T ==8
|
||||
#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_64((int64_t*) addr, delta))
|
||||
#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_64((int64_t*) addr, delta))
|
||||
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((volatile int32_t *) addr, delta))
|
||||
#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_32((volatile int32_t *) addr, delta))
|
||||
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((volatile int32_t *) addr, delta))
|
||||
#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_32((volatile int32_t *) addr, delta))
|
||||
#elif SIZEOF_SIZE_T == 8
|
||||
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((volatile int64_t *) addr, delta))
|
||||
#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_64((volatile int64_t *) addr, delta))
|
||||
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((volatile int64_t *) addr, delta))
|
||||
#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_64((volatile int64_t *) addr, delta))
|
||||
#else
|
||||
#error "Unknown size_t size"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
|
||||
/* these are always done with inline functions, so always mark as
|
||||
static inline */
|
||||
static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length);
|
||||
static inline bool opal_atomic_bool_cmpset_acq_xx(volatile void* addr,
|
||||
int64_t oldval, int64_t newval,
|
||||
size_t length);
|
||||
static inline bool opal_atomic_bool_cmpset_rel_xx(volatile void* addr,
|
||||
int64_t oldval, int64_t newval,
|
||||
size_t length);
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_ptr(volatile void* addr,
|
||||
void* oldval,
|
||||
void* newval);
|
||||
static inline bool opal_atomic_bool_cmpset_acq_ptr(volatile void* addr,
|
||||
void* oldval,
|
||||
void* newval);
|
||||
static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
|
||||
void* oldval,
|
||||
void* newval);
|
||||
static inline bool opal_atomic_compare_exchange_strong_xx (volatile void *addr, void *oldval,
|
||||
int64_t newval, size_t length);
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_xx (volatile void *addr, void *oldval,
|
||||
int64_t newval, size_t length);
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_xx (volatile void *addr, void *oldval,
|
||||
int64_t newval, size_t length);
|
||||
|
||||
|
||||
static inline bool opal_atomic_compare_exchange_strong_ptr (volatile void* addr, void *oldval,
|
||||
void *newval);
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_ptr (volatile void* addr, void *oldval,
|
||||
void *newval);
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval,
|
||||
void *newval);
|
||||
|
||||
/**
|
||||
* Atomic compare and set of pointer with relaxed semantics. This
|
||||
* Atomic compare and set of generic type with relaxed semantics. This
|
||||
* macro detect at compile time the type of the first argument and
|
||||
* choose the correct function to be called.
|
||||
*
|
||||
* \note This macro should only be used for integer types.
|
||||
*
|
||||
* @param addr Address of <TYPE>.
|
||||
* @param oldval Comparison value <TYPE>.
|
||||
* @param oldval Comparison value address of <TYPE>.
|
||||
* @param newval New value to set if comparision is true <TYPE>.
|
||||
*
|
||||
* See opal_atomic_bool_cmpset_* for pseudo-code.
|
||||
* See opal_atomic_compare_exchange_* for pseudo-code.
|
||||
*/
|
||||
#define opal_atomic_bool_cmpset( ADDR, OLDVAL, NEWVAL ) \
|
||||
opal_atomic_bool_cmpset_xx( (volatile void*)(ADDR), (intptr_t)(OLDVAL), \
|
||||
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
#define opal_atomic_compare_exchange_strong( ADDR, OLDVAL, NEWVAL ) \
|
||||
opal_atomic_compare_exchange_strong_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
|
||||
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
|
||||
/**
|
||||
* Atomic compare and set of pointer with acquire semantics. This
|
||||
* macro detect at compile time the type of the first argument
|
||||
* and choose the correct function to be called.
|
||||
* Atomic compare and set of generic type with acquire semantics. This
|
||||
* macro detect at compile time the type of the first argument and
|
||||
* choose the correct function to be called.
|
||||
*
|
||||
* \note This macro should only be used for integer types.
|
||||
*
|
||||
* @param addr Address of <TYPE>.
|
||||
* @param oldval Comparison value <TYPE>.
|
||||
* @param oldval Comparison value address of <TYPE>.
|
||||
* @param newval New value to set if comparision is true <TYPE>.
|
||||
*
|
||||
* See opal_atomic_bool_cmpset_acq_* for pseudo-code.
|
||||
* See opal_atomic_compare_exchange_acq_* for pseudo-code.
|
||||
*/
|
||||
#define opal_atomic_bool_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \
|
||||
opal_atomic_bool_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
|
||||
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
|
||||
#define opal_atomic_compare_exchange_strong_acq( ADDR, OLDVAL, NEWVAL ) \
|
||||
opal_atomic_compare_exchange_strong_acq_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
|
||||
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
|
||||
/**
|
||||
* Atomic compare and set of pointer with release semantics. This
|
||||
* macro detect at compile time the type of the first argument
|
||||
* and choose the correct function to b
|
||||
* Atomic compare and set of generic type with release semantics. This
|
||||
* macro detect at compile time the type of the first argument and
|
||||
* choose the correct function to be called.
|
||||
*
|
||||
* \note This macro should only be used for integer types.
|
||||
*
|
||||
* @param addr Address of <TYPE>.
|
||||
* @param oldval Comparison value <TYPE>.
|
||||
* @param oldval Comparison value address of <TYPE>.
|
||||
* @param newval New value to set if comparision is true <TYPE>.
|
||||
*
|
||||
* See opal_atomic_bool_cmpsetrel_* for pseudo-code.
|
||||
* See opal_atomic_compare_exchange_rel_* for pseudo-code.
|
||||
*/
|
||||
#define opal_atomic_bool_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \
|
||||
opal_atomic_bool_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
|
||||
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
#define opal_atomic_compare_exchange_strong_rel( ADDR, OLDVAL, NEWVAL ) \
|
||||
opal_atomic_compare_exchange_strong_rel_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
|
||||
(intptr_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
|
||||
#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */
|
||||
|
||||
#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */
|
||||
|
||||
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64)
|
||||
|
||||
@ -606,15 +592,11 @@ static inline void opal_atomic_add_xx(volatile void* addr,
|
||||
int32_t value, size_t length);
|
||||
static inline void opal_atomic_sub_xx(volatile void* addr,
|
||||
int32_t value, size_t length);
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta );
|
||||
static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta );
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta );
|
||||
static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta );
|
||||
#else
|
||||
#error Atomic arithmetic on pointers not supported
|
||||
#endif
|
||||
|
||||
static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta );
|
||||
static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta );
|
||||
static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta );
|
||||
static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta );
|
||||
|
||||
/**
|
||||
* Atomically increment the content depending on the type. This
|
||||
|
@ -34,20 +34,30 @@
|
||||
*
|
||||
* Some architectures do not provide support for the 64 bits
|
||||
* atomic operations. Until we find a better solution let's just
|
||||
* undefine all those functions if there is no 64 bit cmpset
|
||||
* undefine all those functions if there is no 64 bit compare-exchange
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
|
||||
#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \
|
||||
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
|
||||
{ \
|
||||
type oldval; \
|
||||
do { \
|
||||
oldval = *addr; \
|
||||
} while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, oldval operation value)); \
|
||||
\
|
||||
return oldval; \
|
||||
}
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_SWAP_32)
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_32 1
|
||||
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr,
|
||||
int32_t newval)
|
||||
{
|
||||
int32_t old;
|
||||
int32_t old = *addr;
|
||||
do {
|
||||
old = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_32(addr, old, newval));
|
||||
} while (!opal_atomic_compare_exchange_strong_32 (addr, &old, newval));
|
||||
|
||||
return old;
|
||||
}
|
||||
@ -55,161 +65,91 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr,
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_ADD_32)
|
||||
#define OPAL_HAVE_ATOMIC_ADD_32 1
|
||||
static inline int32_t
|
||||
opal_atomic_add_32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval + delta));
|
||||
return (oldval + delta);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_ADD_32 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_AND_32)
|
||||
#define OPAL_HAVE_ATOMIC_AND_32 1
|
||||
static inline int32_t
|
||||
opal_atomic_and_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval & value));
|
||||
return (oldval & value);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_AND_32 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_OR_32)
|
||||
#define OPAL_HAVE_ATOMIC_OR_32 1
|
||||
static inline int32_t
|
||||
opal_atomic_or_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval | value));
|
||||
return (oldval | value);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_OR_32 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_XOR_32)
|
||||
#define OPAL_HAVE_ATOMIC_XOR_32 1
|
||||
static inline int32_t
|
||||
opal_atomic_xor_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval ^ value));
|
||||
return (oldval ^ value);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_XOR_32 */
|
||||
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_SUB_32)
|
||||
#define OPAL_HAVE_ATOMIC_SUB_32 1
|
||||
static inline int32_t
|
||||
opal_atomic_sub_32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval - delta));
|
||||
return (oldval - delta);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_SUB_32 */
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
|
||||
#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */
|
||||
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_SWAP_64)
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_64 1
|
||||
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr,
|
||||
int64_t newval)
|
||||
{
|
||||
int64_t old;
|
||||
int64_t old = *addr;
|
||||
do {
|
||||
old = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_64(addr, old, newval));
|
||||
} while (!opal_atomic_compare_exchange_strong_64 (addr, &old, newval));
|
||||
|
||||
return old;
|
||||
}
|
||||
#endif /* OPAL_HAVE_ATOMIC_SWAP_32 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_ADD_64)
|
||||
#define OPAL_HAVE_ATOMIC_ADD_64 1
|
||||
static inline int64_t
|
||||
opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval + delta));
|
||||
return (oldval + delta);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_ADD_64 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_AND_64)
|
||||
#define OPAL_HAVE_ATOMIC_AND_64 1
|
||||
static inline int64_t
|
||||
opal_atomic_and_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval & value));
|
||||
return (oldval & value);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_AND_64 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_OR_64)
|
||||
#define OPAL_HAVE_ATOMIC_OR_64 1
|
||||
static inline int64_t
|
||||
opal_atomic_or_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval | value));
|
||||
return (oldval | value);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_OR_64 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_XOR_64)
|
||||
#define OPAL_HAVE_ATOMIC_XOR_64 1
|
||||
static inline int64_t
|
||||
opal_atomic_xor_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval ^ value));
|
||||
return (oldval ^ value);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_XOR_64 */
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_SUB_64)
|
||||
#define OPAL_HAVE_ATOMIC_SUB_64 1
|
||||
static inline int64_t
|
||||
opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval - delta));
|
||||
return (oldval - delta);
|
||||
}
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub)
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_SUB_64 */
|
||||
|
||||
#else
|
||||
@ -222,130 +162,70 @@ opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
#define OPAL_HAVE_ATOMIC_SUB_64 0
|
||||
#endif
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
|
||||
#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */
|
||||
|
||||
#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
|
||||
|
||||
#if (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64)
|
||||
|
||||
static inline bool
|
||||
opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
return opal_atomic_bool_cmpset_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
return opal_atomic_bool_cmpset_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
|
||||
}
|
||||
abort();
|
||||
/* This should never happen, so deliberately abort (hopefully
|
||||
leaving a corefile for analysis) */
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
opal_atomic_bool_cmpset_acq_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
return opal_atomic_bool_cmpset_acq_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
return opal_atomic_bool_cmpset_acq_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
|
||||
}
|
||||
/* This should never happen, so deliberately abort (hopefully
|
||||
leaving a corefile for analysis) */
|
||||
abort();
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
opal_atomic_bool_cmpset_rel_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
return opal_atomic_bool_cmpset_rel_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
return opal_atomic_bool_cmpset_rel_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
|
||||
}
|
||||
/* This should never happen, so deliberately abort (hopefully
|
||||
leaving a corefile for analysis) */
|
||||
abort();
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
opal_atomic_bool_cmpset_ptr(volatile void* addr,
|
||||
void* oldval,
|
||||
void* newval)
|
||||
{
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
return opal_atomic_bool_cmpset_32((int32_t*) addr, (unsigned long) oldval,
|
||||
(unsigned long) newval);
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
return opal_atomic_bool_cmpset_64((int64_t*) addr, (unsigned long) oldval,
|
||||
(unsigned long) newval);
|
||||
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \
|
||||
static inline bool \
|
||||
opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \
|
||||
int64_t newval, const size_t length) \
|
||||
{ \
|
||||
switch (length) { \
|
||||
case 4: \
|
||||
return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \
|
||||
(int32_t *) oldval, (int32_t) newval); \
|
||||
case 8: \
|
||||
return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, \
|
||||
(int64_t *) oldval, (int64_t) newval); \
|
||||
} \
|
||||
abort(); \
|
||||
}
|
||||
#elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \
|
||||
static inline bool \
|
||||
opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \
|
||||
int64_t newval, const size_t length) \
|
||||
{ \
|
||||
switch (length) { \
|
||||
case 4: \
|
||||
return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \
|
||||
(int32_t *) oldval, (int32_t) newval); \
|
||||
abort(); \
|
||||
}
|
||||
#else
|
||||
abort();
|
||||
#error "Platform does not have required atomic compare-and-swap functionality"
|
||||
#endif
|
||||
}
|
||||
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_)
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_)
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_)
|
||||
|
||||
static inline bool
|
||||
opal_atomic_bool_cmpset_acq_ptr(volatile void* addr,
|
||||
void* oldval,
|
||||
void* newval)
|
||||
{
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
return opal_atomic_bool_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval,
|
||||
(unsigned long) newval);
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
return opal_atomic_bool_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval,
|
||||
(unsigned long) newval);
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
|
||||
#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \
|
||||
static inline bool \
|
||||
opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \
|
||||
{ \
|
||||
return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \
|
||||
}
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
|
||||
#define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \
|
||||
static inline bool \
|
||||
opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \
|
||||
{ \
|
||||
return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \
|
||||
}
|
||||
#else
|
||||
abort();
|
||||
#error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics"
|
||||
#endif
|
||||
}
|
||||
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_)
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_)
|
||||
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
|
||||
void* oldval,
|
||||
void* newval)
|
||||
{
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
return opal_atomic_bool_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval,
|
||||
(unsigned long) newval);
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
return opal_atomic_bool_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval,
|
||||
(unsigned long) newval);
|
||||
#else
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */
|
||||
|
||||
#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */
|
||||
|
||||
#if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64)
|
||||
|
||||
@ -383,20 +263,19 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64
|
||||
|
||||
|
||||
static inline void
|
||||
opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length)
|
||||
opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OPAL_HAVE_ATOMIC_ADD_32
|
||||
case 4:
|
||||
opal_atomic_add_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
(void) opal_atomic_fetch_add_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
break;
|
||||
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
|
||||
#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_ADD_64
|
||||
case 8:
|
||||
opal_atomic_add_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
(void) opal_atomic_fetch_add_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
break;
|
||||
#endif /* OPAL_HAVE_ATOMIC_ADD_64 */
|
||||
default:
|
||||
@ -413,13 +292,13 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
|
||||
switch( length ) {
|
||||
#if OPAL_HAVE_ATOMIC_SUB_32
|
||||
case 4:
|
||||
opal_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
(void) opal_atomic_fetch_sub_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
break;
|
||||
#endif /* OPAL_HAVE_ATOMIC_SUB_32 */
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_SUB_64
|
||||
case 8:
|
||||
opal_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
(void) opal_atomic_fetch_sub_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
break;
|
||||
#endif /* OPAL_HAVE_ATOMIC_SUB_64 */
|
||||
default:
|
||||
@ -429,47 +308,77 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
|
||||
}
|
||||
}
|
||||
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
|
||||
static inline int32_t opal_atomic_add_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
return opal_atomic_add_32((int32_t*) addr, (unsigned long) delta);
|
||||
}
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
|
||||
static inline int64_t opal_atomic_add_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
return opal_atomic_add_64((int64_t*) addr, (unsigned long) delta);
|
||||
}
|
||||
#else
|
||||
static inline int32_t opal_atomic_add_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
abort();
|
||||
return 0;
|
||||
}
|
||||
#define OPAL_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \
|
||||
static inline type opal_atomic_ ## op ## _fetch_ ## suffix (volatile ptr_type *addr, type value) \
|
||||
{ \
|
||||
return opal_atomic_fetch_ ## op ## _ ## suffix (addr, value) operation value; \
|
||||
}
|
||||
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int32_t, int32_t, 32)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int32_t, int32_t, 32)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32)
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_MATH_64
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int64_t, int64_t, 64)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int64_t, int64_t, 64)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64)
|
||||
OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64)
|
||||
#endif
|
||||
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
|
||||
static inline int32_t opal_atomic_sub_ptr( volatile void* addr,
|
||||
static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
return opal_atomic_sub_32((int32_t*) addr, (unsigned long) delta);
|
||||
}
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
|
||||
static inline int64_t opal_atomic_sub_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
return opal_atomic_sub_64((int64_t*) addr, (unsigned long) delta);
|
||||
}
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
|
||||
return opal_atomic_fetch_add_32((int32_t*) addr, (unsigned long) delta);
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
|
||||
return opal_atomic_fetch_add_64((int64_t*) addr, (unsigned long) delta);
|
||||
#else
|
||||
static inline int32_t opal_atomic_sub_ptr( volatile void* addr,
|
||||
abort ();
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
|
||||
return opal_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta);
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
|
||||
return opal_atomic_add_fetch_64((int64_t*) addr, (unsigned long) delta);
|
||||
#else
|
||||
abort ();
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
|
||||
return opal_atomic_fetch_sub_32((int32_t*) addr, (unsigned long) delta);
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
|
||||
return opal_atomic_fetch_sub_64((int64_t*) addr, (unsigned long) delta);
|
||||
#else
|
||||
abort();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr,
|
||||
void* delta )
|
||||
{
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
|
||||
return opal_atomic_sub_fetch_32((int32_t*) addr, (unsigned long) delta);
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
|
||||
return opal_atomic_sub_fetch_64((int64_t*) addr, (unsigned long) delta);
|
||||
#else
|
||||
abort();
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */
|
||||
|
||||
@ -493,21 +402,20 @@ opal_atomic_lock_init( opal_atomic_lock_t* lock, int32_t value )
|
||||
static inline int
|
||||
opal_atomic_trylock(opal_atomic_lock_t *lock)
|
||||
{
|
||||
bool ret = opal_atomic_bool_cmpset_acq_32( &(lock->u.lock),
|
||||
OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED);
|
||||
return (ret == 0) ? 1 : 0;
|
||||
int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED;
|
||||
bool ret = opal_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED);
|
||||
return (ret == false) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
opal_atomic_lock(opal_atomic_lock_t *lock)
|
||||
{
|
||||
while( !opal_atomic_bool_cmpset_acq_32( &(lock->u.lock),
|
||||
OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED) ) {
|
||||
while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) {
|
||||
/* spin */ ;
|
||||
}
|
||||
}
|
||||
while (opal_atomic_trylock (lock)) {
|
||||
while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) {
|
||||
/* spin */ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -33,7 +33,7 @@
|
||||
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 1
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
#define OPAL_HAVE_ATOMIC_ADD_32 1
|
||||
#define OPAL_HAVE_ATOMIC_AND_32 1
|
||||
#define OPAL_HAVE_ATOMIC_OR_32 1
|
||||
@ -41,7 +41,7 @@
|
||||
#define OPAL_HAVE_ATOMIC_SUB_32 1
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_32 1
|
||||
#define OPAL_HAVE_ATOMIC_MATH_64 1
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
#define OPAL_HAVE_ATOMIC_ADD_64 1
|
||||
#define OPAL_HAVE_ATOMIC_AND_64 1
|
||||
#define OPAL_HAVE_ATOMIC_OR_64 1
|
||||
@ -81,26 +81,20 @@ static inline void opal_atomic_wmb(void)
|
||||
#pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L)
|
||||
#endif
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
|
||||
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
|
||||
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
|
||||
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
|
||||
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval)
|
||||
@ -110,51 +104,45 @@ static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newva
|
||||
return oldval;
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta)
|
||||
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta)
|
||||
{
|
||||
return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value)
|
||||
static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value)
|
||||
static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value)
|
||||
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta)
|
||||
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta)
|
||||
{
|
||||
return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
|
||||
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
|
||||
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
|
||||
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
|
||||
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval)
|
||||
@ -164,52 +152,55 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva
|
||||
return oldval;
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
|
||||
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value)
|
||||
static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value)
|
||||
static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value)
|
||||
static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED);
|
||||
return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr,
|
||||
opal_int128_t oldval, opal_int128_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
|
||||
opal_int128_t *oldval, opal_int128_t newval)
|
||||
{
|
||||
return __atomic_compare_exchange_n (addr, &oldval, newval, false,
|
||||
return __atomic_compare_exchange_n (addr, oldval, newval, false,
|
||||
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
#elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
|
||||
|
||||
/* __atomic version is not lock-free so use legacy __sync version */
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr,
|
||||
opal_int128_t oldval, opal_int128_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
|
||||
opal_int128_t *oldval, opal_int128_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap (addr, oldval, newval);
|
||||
opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval);
|
||||
bool ret = prev == *oldval;
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -40,7 +40,7 @@
|
||||
*********************************************************************/
|
||||
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 1
|
||||
#define OPAL_HAVE_ATOMIC_ADD_32 1
|
||||
@ -84,15 +84,13 @@ static inline void opal_atomic_isync(void)
|
||||
*********************************************************************/
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval,
|
||||
int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
unsigned char ret;
|
||||
__asm__ __volatile__ (
|
||||
SMPLOCK "cmpxchgl %3,%2 \n\t"
|
||||
"sete %0 \n\t"
|
||||
: "=qm" (ret), "+a" (oldval), "+m" (*addr)
|
||||
: "=qm" (ret), "+a" (*oldval), "+m" (*addr)
|
||||
: "q"(newval)
|
||||
: "memory", "cc");
|
||||
|
||||
@ -101,8 +99,8 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32
|
||||
#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32
|
||||
#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
|
||||
#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
|
||||
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
@ -132,7 +130,7 @@ static inline int32_t opal_atomic_swap_32( volatile int32_t *addr,
|
||||
*
|
||||
* Atomically adds @i to @v.
|
||||
*/
|
||||
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
|
||||
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i)
|
||||
{
|
||||
int ret = i;
|
||||
__asm__ __volatile__(
|
||||
@ -141,7 +139,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
|
||||
:
|
||||
:"memory", "cc"
|
||||
);
|
||||
return (ret+i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -152,7 +150,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
|
||||
*
|
||||
* Atomically subtracts @i from @v.
|
||||
*/
|
||||
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
|
||||
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i)
|
||||
{
|
||||
int ret = -i;
|
||||
__asm__ __volatile__(
|
||||
@ -161,7 +159,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
|
||||
:
|
||||
:"memory", "cc"
|
||||
);
|
||||
return (ret-i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
@ -40,7 +40,7 @@
|
||||
*********************************************************************/
|
||||
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_32 1
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_32 1
|
||||
|
||||
@ -53,7 +53,7 @@
|
||||
|
||||
|
||||
#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_64 1
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_64 1
|
||||
#define OPAL_HAVE_ATOMIC_MATH_64 1
|
||||
@ -144,24 +144,25 @@ void opal_atomic_isync(void)
|
||||
#define OPAL_ASM_VALUE64(x) x
|
||||
#endif
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
int32_t ret;
|
||||
int32_t prev;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"1: lwarx %0, 0, %2 \n\t"
|
||||
" cmpw 0, %0, %3 \n\t"
|
||||
" bne- 2f \n\t"
|
||||
" stwcx. %4, 0, %2 \n\t"
|
||||
" bne- 1b \n\t"
|
||||
"2:"
|
||||
: "=&r" (ret), "=m" (*addr)
|
||||
: "r" OPAL_ASM_ADDR(addr), "r" (oldval), "r" (newval), "m" (*addr)
|
||||
: "cc", "memory");
|
||||
__asm__ __volatile__ (
|
||||
"1: lwarx %0, 0, %2 \n\t"
|
||||
" cmpw 0, %0, %3 \n\t"
|
||||
" bne- 2f \n\t"
|
||||
" stwcx. %4, 0, %2 \n\t"
|
||||
" bne- 1b \n\t"
|
||||
"2:"
|
||||
: "=&r" (prev), "=m" (*addr)
|
||||
: "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
|
||||
@ -195,23 +196,21 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
bool rc;
|
||||
|
||||
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
|
||||
@ -236,20 +235,20 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \
|
||||
static inline int64_t opal_atomic_ ## type ## _64(volatile int64_t* v, int64_t val) \
|
||||
static inline int64_t opal_atomic_fetch_ ## type ## _64(volatile int64_t* v, int64_t val) \
|
||||
{ \
|
||||
int64_t t; \
|
||||
int64_t t, old; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: ldarx %0, 0, %3 \n\t" \
|
||||
" " #instr " %0, %2, %0 \n\t" \
|
||||
" stdcx. %0, 0, %3 \n\t" \
|
||||
"1: ldarx %1, 0, %4 \n\t" \
|
||||
" " #instr " %0, %3, %1 \n\t" \
|
||||
" stdcx. %0, 0, %4 \n\t" \
|
||||
" bne- 1b \n\t" \
|
||||
: "=&r" (t), "=m" (*v) \
|
||||
: "=&r" (t), "=&r" (old), "=m" (*v) \
|
||||
: "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \
|
||||
: "cc"); \
|
||||
\
|
||||
return t; \
|
||||
return old; \
|
||||
}
|
||||
|
||||
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add)
|
||||
@ -258,23 +257,25 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or)
|
||||
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor)
|
||||
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf)
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
int64_t ret;
|
||||
int64_t prev;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"1: ldarx %0, 0, %2 \n\t"
|
||||
" cmpd 0, %0, %3 \n\t"
|
||||
" bne- 2f \n\t"
|
||||
" stdcx. %4, 0, %2 \n\t"
|
||||
" bne- 1b \n\t"
|
||||
"2:"
|
||||
: "=&r" (ret), "=m" (*addr)
|
||||
: "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr)
|
||||
: "cc", "memory");
|
||||
__asm__ __volatile__ (
|
||||
"1: ldarx %0, 0, %2 \n\t"
|
||||
" cmpd 0, %0, %3 \n\t"
|
||||
" bne- 2f \n\t"
|
||||
" stdcx. %4, 0, %2 \n\t"
|
||||
" bne- 1b \n\t"
|
||||
"2:"
|
||||
: "=&r" (prev), "=m" (*addr)
|
||||
: "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr)
|
||||
: "cc", "memory");
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
|
||||
@ -303,29 +304,6 @@ static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_64 nor
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
bool rc;
|
||||
|
||||
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
|
||||
{
|
||||
@ -352,9 +330,9 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval
|
||||
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
int64_t prev;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -369,67 +347,65 @@ static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr,
|
||||
* is very similar to the pure 64 bit version.
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"ld r4,%2 \n\t"
|
||||
"ld r5,%3 \n\t"
|
||||
"1: ldarx r9, 0, %1 \n\t"
|
||||
" cmpd 0, r9, r4 \n\t"
|
||||
"ld r4,%3 \n\t"
|
||||
"ld r5,%4 \n\t"
|
||||
"1: ldarx %1, 0, %2 \n\t"
|
||||
" cmpd 0, %1, r4 \n\t"
|
||||
" bne- 2f \n\t"
|
||||
" stdcx. r5, 0, %1 \n\t"
|
||||
" stdcx. r5, 0, %2 \n\t"
|
||||
" bne- 1b \n\t"
|
||||
"2: \n\t"
|
||||
"xor r5,r4,r9 \n\t"
|
||||
"xor r5,r4,%1 \n\t"
|
||||
"subfic r9,r5,0 \n\t"
|
||||
"adde %0,r9,r5 \n\t"
|
||||
: "=&r" (ret)
|
||||
: "=&r" (ret), "+r" (prev)
|
||||
: "r"OPAL_ASM_ADDR(addr),
|
||||
"m"(oldval), "m"(newval)
|
||||
"m"(*oldval), "m"(newval)
|
||||
: "r4", "r5", "r9", "cc", "memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_64 nor
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
*oldval = prev;
|
||||
return (bool) ret;
|
||||
}
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#endif /* OPAL_ASM_SUPPORT_64BIT */
|
||||
|
||||
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_64 nor
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
bool rc;
|
||||
|
||||
rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
|
||||
}
|
||||
|
||||
|
||||
#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \
|
||||
static inline int32_t opal_atomic_ ## type ## _32(volatile int32_t* v, int val) \
|
||||
static inline int32_t opal_atomic_fetch_ ## type ## _32(volatile int32_t* v, int val) \
|
||||
{ \
|
||||
int32_t t; \
|
||||
int32_t t, old; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: lwarx %0, 0, %3 \n\t" \
|
||||
" " #instr " %0, %2, %0 \n\t" \
|
||||
" stwcx. %0, 0, %3 \n\t" \
|
||||
"1: lwarx %1, 0, %4 \n\t" \
|
||||
" " #instr " %0, %3, %1 \n\t" \
|
||||
" stwcx. %0, 0, %4 \n\t" \
|
||||
" bne- 1b \n\t" \
|
||||
: "=&r" (t), "=m" (*v) \
|
||||
: "=&r" (t), "=&r" (old), "=m" (*v) \
|
||||
: "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \
|
||||
: "cc"); \
|
||||
\
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -12,6 +13,8 @@
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -38,9 +41,9 @@
|
||||
*********************************************************************/
|
||||
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
@ -82,50 +85,49 @@ static inline void opal_atomic_isync(void)
|
||||
*********************************************************************/
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
|
||||
*
|
||||
* if (*(reg(rs1)) == reg(rs2) )
|
||||
* swap reg(rd), *(reg(rs1))
|
||||
* else
|
||||
* reg(rd) = *(reg(rs1))
|
||||
*/
|
||||
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
|
||||
*
|
||||
* if (*(reg(rs1)) == reg(rs2) )
|
||||
* swap reg(rd), *(reg(rs1))
|
||||
* else
|
||||
* reg(rd) = *(reg(rs1))
|
||||
*/
|
||||
|
||||
int32_t ret = newval;
|
||||
int32_t prev = newval;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__("casa [%1] " ASI_P ", %2, %0"
|
||||
: "+r" (ret)
|
||||
: "r" (addr), "r" (oldval));
|
||||
return (ret == oldval);
|
||||
__asm__ __volatile__("casa [%1] " ASI_P ", %2, %0"
|
||||
: "+r" (prev)
|
||||
: "r" (addr), "r" (*oldval));
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
bool rc;
|
||||
bool rc;
|
||||
|
||||
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
|
||||
}
|
||||
|
||||
|
||||
#if OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
|
||||
*
|
||||
@ -134,18 +136,20 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
* else
|
||||
* reg(rd) = *(reg(rs1))
|
||||
*/
|
||||
int64_t ret = newval;
|
||||
int64_t prev = newval;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0"
|
||||
: "+r" (ret)
|
||||
: "r" (addr), "r" (oldval));
|
||||
return (ret == oldval);
|
||||
__asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0"
|
||||
: "+r" (prev)
|
||||
: "r" (addr), "r" (*oldval));
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
|
||||
*
|
||||
@ -155,40 +159,41 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
* reg(rd) = *(reg(rs1))
|
||||
*
|
||||
*/
|
||||
long long ret = newval;
|
||||
int64_t prev = newval;
|
||||
bool ret;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldx %0, %%g1 \n\t" /* g1 = ret */
|
||||
"ldx %2, %%g2 \n\t" /* g2 = oldval */
|
||||
"casxa [%1] " ASI_P ", %%g2, %%g1 \n\t"
|
||||
"stx %%g1, %0 \n"
|
||||
: "+m"(ret)
|
||||
: "r"(addr), "m"(oldval)
|
||||
: "+m"(prev)
|
||||
: "r"(addr), "m"(*oldval)
|
||||
: "%g1", "%g2"
|
||||
);
|
||||
|
||||
return (ret == oldval);
|
||||
ret = (prev == *oldval);
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
bool rc;
|
||||
bool rc;
|
||||
|
||||
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
|
||||
opal_atomic_rmb();
|
||||
|
||||
return rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
|
||||
opal_atomic_wmb();
|
||||
return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
|
||||
}
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
@ -53,119 +53,110 @@ static inline void opal_atomic_wmb(void)
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap(addr, oldval, newval);
|
||||
int32_t prev = __sync_val_compare_and_swap (add, *oldval, newval);
|
||||
bool ret = prev == *oldval;
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap(addr, oldval, newval);}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap(addr, oldval, newval);
|
||||
}
|
||||
#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
|
||||
#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_ADD_32 1
|
||||
static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta)
|
||||
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta)
|
||||
{
|
||||
return __sync_add_and_fetch(addr, delta);
|
||||
return __sync_fetch_and_add(addr, delta);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_AND_32 1
|
||||
static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value)
|
||||
static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
return __sync_and_and_fetch(addr, value);
|
||||
return __sync_fetch_and_and(addr, value);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_OR_32 1
|
||||
static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value)
|
||||
static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
return __sync_or_and_fetch(addr, value);
|
||||
return __sync_fetch_and_or(addr, value);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_XOR_32 1
|
||||
static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value)
|
||||
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value)
|
||||
{
|
||||
return __sync_xor_and_fetch(addr, value);
|
||||
return __sync_fetch_and_xor(addr, value);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_SUB_32 1
|
||||
static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta)
|
||||
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta)
|
||||
{
|
||||
return __sync_sub_and_fetch(addr, delta);
|
||||
return __sync_fetch_and_sub(addr, delta);
|
||||
}
|
||||
|
||||
#if OPAL_ASM_SYNC_HAVE_64BIT
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap(addr, oldval, newval);
|
||||
int64_t prev = __sync_val_compare_and_swap (add, *oldval, newval);
|
||||
bool ret = prev == *oldval;
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap(addr, oldval, newval);}
|
||||
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap(addr, oldval, newval);
|
||||
}
|
||||
#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64
|
||||
#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_MATH_64 1
|
||||
#define OPAL_HAVE_ATOMIC_ADD_64 1
|
||||
static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
|
||||
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
return __sync_add_and_fetch(addr, delta);
|
||||
return __sync_fetch_and_add(addr, delta);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_AND_64 1
|
||||
static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value)
|
||||
static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
return __sync_and_and_fetch(addr, value);
|
||||
return __sync_fetch_and_and(addr, value);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_OR_64 1
|
||||
static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value)
|
||||
static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
return __sync_or_and_fetch(addr, value);
|
||||
return __sync_fetch_and_or(addr, value);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_XOR_64 1
|
||||
static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value)
|
||||
static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value)
|
||||
{
|
||||
return __sync_xor_and_fetch(addr, value);
|
||||
return __sync_fetch_and_xor(addr, value);
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_SUB_64 1
|
||||
static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
return __sync_sub_and_fetch(addr, delta);
|
||||
return __sync_fetch_and_sub(addr, delta);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128
|
||||
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr,
|
||||
opal_int128_t oldval, opal_int128_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
|
||||
opal_int128_t *oldval, opal_int128_t newval)
|
||||
{
|
||||
return __sync_bool_compare_and_swap(addr, oldval, newval);
|
||||
opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval);
|
||||
bool ret = prev == *oldval;
|
||||
*oldval = prev;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -40,9 +40,9 @@
|
||||
*********************************************************************/
|
||||
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
@ -82,14 +82,13 @@ static inline void opal_atomic_isync(void)
|
||||
*********************************************************************/
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
|
||||
{
|
||||
unsigned char ret;
|
||||
__asm__ __volatile__ (
|
||||
SMPLOCK "cmpxchgl %3,%2 \n\t"
|
||||
"sete %0 \n\t"
|
||||
: "=qm" (ret), "+a" (oldval), "+m" (*addr)
|
||||
: "=qm" (ret), "+a" (*oldval), "+m" (*addr)
|
||||
: "q"(newval)
|
||||
: "memory", "cc");
|
||||
|
||||
@ -98,19 +97,18 @@ static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32
|
||||
#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32
|
||||
#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
|
||||
#define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
|
||||
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
unsigned char ret;
|
||||
__asm__ __volatile__ (
|
||||
SMPLOCK "cmpxchgq %3,%2 \n\t"
|
||||
"sete %0 \n\t"
|
||||
: "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr))
|
||||
: "=qm" (ret), "+a" (*oldval), "+m" (*((volatile long*)addr))
|
||||
: "q"(newval)
|
||||
: "memory", "cc"
|
||||
);
|
||||
@ -120,13 +118,12 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define opal_atomic_bool_cmpset_acq_64 opal_atomic_bool_cmpset_64
|
||||
#define opal_atomic_bool_cmpset_rel_64 opal_atomic_bool_cmpset_64
|
||||
#define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64
|
||||
#define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64
|
||||
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T
|
||||
|
||||
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval,
|
||||
opal_int128_t newval)
|
||||
static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval)
|
||||
{
|
||||
unsigned char ret;
|
||||
|
||||
@ -135,15 +132,14 @@ static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, op
|
||||
* at the address is returned in eax:edx. */
|
||||
__asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t"
|
||||
"sete %0 \n\t"
|
||||
: "=qm" (ret)
|
||||
: "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]),
|
||||
"a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1])
|
||||
: "memory", "cc");
|
||||
: "=qm" (ret), "+a" (((int64_t *)oldval)[0]), "+d" (((int64_t *)oldval)[1])
|
||||
: "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1])
|
||||
: "memory", "cc", "eax", "edx");
|
||||
|
||||
return (bool) ret;
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_128 1
|
||||
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
@ -200,7 +196,7 @@ static inline int64_t opal_atomic_swap_64( volatile int64_t *addr,
|
||||
*
|
||||
* Atomically adds @i to @v.
|
||||
*/
|
||||
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
|
||||
static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i)
|
||||
{
|
||||
int ret = i;
|
||||
__asm__ __volatile__(
|
||||
@ -209,7 +205,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
|
||||
:
|
||||
:"memory", "cc"
|
||||
);
|
||||
return (ret+i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_ADD_64 1
|
||||
@ -221,7 +217,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
|
||||
*
|
||||
* Atomically adds @i to @v.
|
||||
*/
|
||||
static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
|
||||
static inline int64_t opal_atomic_fetch_add_64(volatile int64_t* v, int64_t i)
|
||||
{
|
||||
int64_t ret = i;
|
||||
__asm__ __volatile__(
|
||||
@ -230,7 +226,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
|
||||
:
|
||||
:"memory", "cc"
|
||||
);
|
||||
return (ret+i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_SUB_32 1
|
||||
@ -242,7 +238,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
|
||||
*
|
||||
* Atomically subtracts @i from @v.
|
||||
*/
|
||||
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
|
||||
static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i)
|
||||
{
|
||||
int ret = -i;
|
||||
__asm__ __volatile__(
|
||||
@ -251,7 +247,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
|
||||
:
|
||||
:"memory", "cc"
|
||||
);
|
||||
return (ret-i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_SUB_64 1
|
||||
@ -263,7 +259,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
|
||||
*
|
||||
* Atomically subtracts @i from @v.
|
||||
*/
|
||||
static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i)
|
||||
static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t* v, int64_t i)
|
||||
{
|
||||
int64_t ret = -i;
|
||||
__asm__ __volatile__(
|
||||
@ -272,7 +268,7 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i)
|
||||
:
|
||||
:"memory", "cc"
|
||||
);
|
||||
return (ret-i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
@ -1119,7 +1119,7 @@ int mca_btl_openib_add_procs(
|
||||
}
|
||||
|
||||
if (nprocs_new) {
|
||||
opal_atomic_add_32 (&openib_btl->num_peers, nprocs_new);
|
||||
opal_atomic_add_fetch_32 (&openib_btl->num_peers, nprocs_new);
|
||||
|
||||
/* adjust cq sizes given the new procs */
|
||||
rc = openib_btl_size_queues (openib_btl);
|
||||
@ -1229,7 +1229,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
|
||||
|
||||
/* this is a new process to this openib btl
|
||||
* account this procs if need */
|
||||
opal_atomic_add_32 (&openib_btl->num_peers, 1);
|
||||
opal_atomic_add_fetch_32 (&openib_btl->num_peers, 1);
|
||||
rc = openib_btl_size_queues(openib_btl);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating cqs"));
|
||||
|
@ -237,7 +237,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg)
|
||||
/* Set the flag to fatal */
|
||||
device->got_fatal_event = true;
|
||||
/* It is not critical to protect the counter */
|
||||
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1);
|
||||
/* fall through */
|
||||
case IBV_EVENT_CQ_ERR:
|
||||
case IBV_EVENT_QP_FATAL:
|
||||
@ -280,7 +280,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg)
|
||||
openib_event_to_str((enum ibv_event_type)event_type));
|
||||
/* Set the flag to indicate port error */
|
||||
device->got_port_event = true;
|
||||
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1);
|
||||
break;
|
||||
case IBV_EVENT_COMM_EST:
|
||||
case IBV_EVENT_PORT_ACTIVE:
|
||||
@ -470,7 +470,7 @@ void mca_btl_openib_async_fini (void)
|
||||
void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device)
|
||||
{
|
||||
if (mca_btl_openib_component.async_evbase) {
|
||||
if (1 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, 1)) {
|
||||
if (1 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, 1)) {
|
||||
mca_btl_openib_async_init ();
|
||||
}
|
||||
opal_event_set (mca_btl_openib_component.async_evbase, &device->async_event,
|
||||
@ -484,7 +484,7 @@ void mca_btl_openib_async_rem_device (mca_btl_openib_device_t *device)
|
||||
{
|
||||
if (mca_btl_openib_component.async_evbase) {
|
||||
opal_event_del (&device->async_event);
|
||||
if (0 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, -1)) {
|
||||
if (0 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, -1)) {
|
||||
mca_btl_openib_async_fini ();
|
||||
}
|
||||
}
|
||||
|
@ -3203,7 +3203,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
credits = hdr->credits;
|
||||
|
||||
if(hdr->cm_seen)
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
|
||||
|
||||
/* Now return fragment. Don't touch hdr after this point! */
|
||||
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
|
||||
@ -3215,7 +3215,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(ep, erl->tail);
|
||||
if(MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(tf))
|
||||
break;
|
||||
OPAL_THREAD_ADD32(&erl->credits, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&erl->credits, 1);
|
||||
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(erl->tail);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&erl->lock);
|
||||
@ -3233,14 +3233,14 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
MCA_BTL_IB_FRAG_RETURN(frag);
|
||||
if (BTL_OPENIB_QP_TYPE_PP(rqp)) {
|
||||
if (OPAL_UNLIKELY(is_credit_msg)) {
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_received, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_received, 1);
|
||||
} else {
|
||||
OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
|
||||
}
|
||||
mca_btl_openib_endpoint_post_rr(ep, cqp);
|
||||
} else {
|
||||
mca_btl_openib_module_t *btl = ep->endpoint_btl;
|
||||
OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
|
||||
mca_btl_openib_post_srr(btl, rqp);
|
||||
}
|
||||
}
|
||||
@ -3251,10 +3251,10 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
/* If we got any credits (RDMA or send), then try to progress all
|
||||
the no_credits_pending_frags lists */
|
||||
if (rcredits > 0) {
|
||||
OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits);
|
||||
}
|
||||
if (credits > 0) {
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
|
||||
}
|
||||
if (rcredits + credits > 0) {
|
||||
int rc;
|
||||
@ -3303,7 +3303,7 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
|
||||
credits = hdr->credits;
|
||||
|
||||
if(hdr->cm_seen)
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
|
||||
|
||||
/* We should not be here with eager, control, or credit messages */
|
||||
assert(openib_frag_type(frag) != MCA_BTL_OPENIB_FRAG_EAGER_RDMA);
|
||||
@ -3314,11 +3314,11 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
|
||||
/* Otherwise, FRAG_RETURN it and repost if necessary */
|
||||
MCA_BTL_IB_FRAG_RETURN(frag);
|
||||
if (BTL_OPENIB_QP_TYPE_PP(rqp)) {
|
||||
OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
|
||||
mca_btl_openib_endpoint_post_rr(ep, cqp);
|
||||
} else {
|
||||
mca_btl_openib_module_t *btl = ep->endpoint_btl;
|
||||
OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
|
||||
mca_btl_openib_post_srr(btl, rqp);
|
||||
}
|
||||
|
||||
@ -3327,10 +3327,10 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
|
||||
/* If we got any credits (RDMA or send), then try to progress all
|
||||
the no_credits_pending_frags lists */
|
||||
if (rcredits > 0) {
|
||||
OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits);
|
||||
}
|
||||
if (credits > 0) {
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
|
||||
}
|
||||
if (rcredits + credits > 0) {
|
||||
int rc;
|
||||
@ -3523,7 +3523,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
|
||||
case IBV_WC_FETCH_ADD:
|
||||
OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE"));
|
||||
|
||||
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->get_tokens, 1);
|
||||
|
||||
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
|
||||
|
||||
@ -3575,7 +3575,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
|
||||
n = qp_frag_to_wqe(endpoint, qp, to_com_frag(des));
|
||||
|
||||
if(IBV_WC_SEND == wc->opcode && !BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n);
|
||||
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n);
|
||||
|
||||
/* new SRQ credit available. Try to progress pending frags*/
|
||||
progress_pending_frags_srq(openib_btl, qp);
|
||||
@ -3601,7 +3601,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
|
||||
wc->byte_len < mca_btl_openib_component.eager_limit &&
|
||||
openib_btl->eager_rdma_channels <
|
||||
mca_btl_openib_component.max_eager_rdma &&
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_recv_count, 1) ==
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_recv_count, 1) ==
|
||||
mca_btl_openib_component.eager_rdma_threshold) {
|
||||
mca_btl_openib_endpoint_connect_eager_rdma(endpoint);
|
||||
}
|
||||
@ -3934,7 +3934,7 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp)
|
||||
if(OPAL_LIKELY(0 == rc)) {
|
||||
struct ibv_srq_attr srq_attr;
|
||||
|
||||
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post);
|
||||
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post);
|
||||
|
||||
if(true == openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag) {
|
||||
srq_attr.max_wr = openib_btl->qps[qp].u.srq_qp.rd_curr_num;
|
||||
|
@ -96,7 +96,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \
|
||||
do { \
|
||||
(SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \
|
||||
(SEQ) = OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1; \
|
||||
(OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \
|
||||
} while(0)
|
||||
|
||||
@ -108,7 +108,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \
|
||||
do { \
|
||||
(OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \
|
||||
(OLD_HEAD) = (OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
@ -212,7 +212,7 @@ endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp)
|
||||
qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */
|
||||
rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP);
|
||||
if (0 == rc) {
|
||||
opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr);
|
||||
opal_atomic_add_fetch_32 (&ep_qp->qp->sd_wqe, incr);
|
||||
}
|
||||
} else {
|
||||
ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe;
|
||||
@ -373,11 +373,12 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
||||
|
||||
/* Release memory resources */
|
||||
do {
|
||||
void *_tmp_ptr = NULL;
|
||||
/* Make sure that mca_btl_openib_endpoint_connect_eager_rdma ()
|
||||
* was not in "connect" or "bad" flow (failed to allocate memory)
|
||||
* and changed the pointer back to NULL
|
||||
*/
|
||||
if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) {
|
||||
if(!opal_atomic_compare_exchange_strong_ptr(&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, (void *) 1)) {
|
||||
if (NULL != endpoint->eager_rdma_local.reg) {
|
||||
endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache,
|
||||
&endpoint->eager_rdma_local.reg->base);
|
||||
@ -766,9 +767,9 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
|
||||
if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) {
|
||||
do_rdma = true;
|
||||
} else {
|
||||
if(OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) >
|
||||
if(OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) >
|
||||
(mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) {
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
|
||||
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
|
||||
return;
|
||||
}
|
||||
@ -781,7 +782,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
|
||||
if(cm_return > 255) {
|
||||
frag->hdr->cm_seen = 255;
|
||||
cm_return -= 255;
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
} else {
|
||||
frag->hdr->cm_seen = cm_return;
|
||||
}
|
||||
@ -802,14 +803,14 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
|
||||
BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr);
|
||||
}
|
||||
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits,
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.rd_credits,
|
||||
frag->hdr->credits);
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits,
|
||||
credits_hdr->rdma_credits);
|
||||
if(do_rdma)
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
|
||||
else
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
|
||||
|
||||
BTL_ERROR(("error posting send request errno %d says %s", rc,
|
||||
strerror(errno)));
|
||||
@ -823,7 +824,7 @@ static void mca_btl_openib_endpoint_eager_rdma_connect_cb(
|
||||
int status)
|
||||
{
|
||||
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
|
||||
OPAL_THREAD_ADD32(&device->non_eager_rdma_endpoints, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&device->non_eager_rdma_endpoints, -1);
|
||||
assert(device->non_eager_rdma_endpoints >= 0);
|
||||
MCA_BTL_IB_FRAG_RETURN(descriptor);
|
||||
}
|
||||
@ -894,12 +895,14 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
|
||||
mca_btl_openib_recv_frag_t *headers_buf;
|
||||
int i, rc;
|
||||
uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS;
|
||||
void *_tmp_ptr = NULL;
|
||||
|
||||
/* Set local rdma pointer to 1 temporarily so other threads will not try
|
||||
* to enter the function */
|
||||
if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL,
|
||||
(void*)1))
|
||||
if(!opal_atomic_compare_exchange_strong_ptr (&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr,
|
||||
(void *) 1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
headers_buf = (mca_btl_openib_recv_frag_t*)
|
||||
malloc(sizeof(mca_btl_openib_recv_frag_t) *
|
||||
@ -975,22 +978,23 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
|
||||
endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1;
|
||||
|
||||
/* set local rdma pointer to real value */
|
||||
(void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
|
||||
(void*)1, buf);
|
||||
endpoint->eager_rdma_local.base.pval = buf;
|
||||
endpoint->eager_rdma_local.alloc_base = alloc_base;
|
||||
|
||||
if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) {
|
||||
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
|
||||
mca_btl_openib_endpoint_t **p;
|
||||
void *_tmp_ptr;
|
||||
OBJ_RETAIN(endpoint);
|
||||
assert(((opal_object_t*)endpoint)->obj_reference_count == 2);
|
||||
do {
|
||||
_tmp_ptr = NULL;
|
||||
p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count];
|
||||
} while(!opal_atomic_bool_cmpset_ptr(p, NULL, endpoint));
|
||||
} while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint));
|
||||
|
||||
OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&openib_btl->eager_rdma_channels, 1);
|
||||
/* from this point progress function starts to poll new buffer */
|
||||
OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&device->eager_rdma_buffers_count, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1001,8 +1005,7 @@ free_headers_buf:
|
||||
free(headers_buf);
|
||||
unlock_rdma_local:
|
||||
/* set local rdma pointer back to zero. Will retry later */
|
||||
(void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
|
||||
endpoint->eager_rdma_local.base.pval, NULL);
|
||||
endpoint->eager_rdma_local.base.pval = NULL;
|
||||
endpoint->eager_rdma_local.frags = NULL;
|
||||
}
|
||||
|
||||
|
@ -277,19 +277,19 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
|
||||
|
||||
static inline int32_t qp_get_wqe(mca_btl_openib_endpoint_t *ep, const int qp)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, -1);
|
||||
return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, -1);
|
||||
}
|
||||
|
||||
static inline int32_t qp_put_wqe(mca_btl_openib_endpoint_t *ep, const int qp)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, 1);
|
||||
return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, 1);
|
||||
}
|
||||
|
||||
|
||||
static inline int32_t qp_inc_inflight_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag)
|
||||
{
|
||||
frag->n_wqes_inflight = 0;
|
||||
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe_inflight, 1);
|
||||
return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe_inflight, 1);
|
||||
}
|
||||
|
||||
static inline void qp_inflight_wqe_to_frag(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag)
|
||||
@ -303,7 +303,7 @@ static inline int qp_frag_to_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mc
|
||||
{
|
||||
int n;
|
||||
n = frag->n_wqes_inflight;
|
||||
OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, n);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, n);
|
||||
frag->n_wqes_inflight = 0;
|
||||
|
||||
return n;
|
||||
@ -420,15 +420,15 @@ static inline int mca_btl_openib_endpoint_post_rr_nolock(
|
||||
if((rc = post_recvs(ep, qp, num_post)) != OPAL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_posted, num_post);
|
||||
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_credits, num_post);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_posted, num_post);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_credits, num_post);
|
||||
|
||||
/* post buffers for credit management on credit management qp */
|
||||
if((rc = post_recvs(ep, cqp, cm_received)) != OPAL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_return, cm_received);
|
||||
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_return, cm_received);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received);
|
||||
|
||||
assert(ep->qps[qp].u.pp_qp.rd_credits <= rd_num &&
|
||||
ep->qps[qp].u.pp_qp.rd_credits >= 0);
|
||||
@ -446,14 +446,16 @@ static inline int mca_btl_openib_endpoint_post_rr(
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define BTL_OPENIB_CREDITS_SEND_TRYLOCK(E, Q) \
|
||||
OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 0, 1)
|
||||
#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \
|
||||
OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0)
|
||||
#define BTL_OPENIB_GET_CREDITS(FROM, TO) \
|
||||
do { \
|
||||
TO = FROM; \
|
||||
} while(0 == OPAL_ATOMIC_BOOL_CMPSET_32(&FROM, TO, 0))
|
||||
static inline __opal_attribute_always_inline__ bool btl_openib_credits_send_trylock (mca_btl_openib_endpoint_t *ep, int qp)
|
||||
{
|
||||
int32_t _tmp_value = 0;
|
||||
return OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ep->qps[qp].rd_credit_send_lock, &_tmp_value, 1);
|
||||
}
|
||||
|
||||
#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \
|
||||
OPAL_ATOMIC_SWAP_32 (&(E)->qps[(Q)].rd_credit_send_lock, 0)
|
||||
#define BTL_OPENIB_GET_CREDITS(FROM, TO) \
|
||||
TO = OPAL_ATOMIC_SWAP_32(&FROM, 0)
|
||||
|
||||
|
||||
static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep)
|
||||
@ -486,7 +488,7 @@ static inline void send_credits(mca_btl_openib_endpoint_t *ep, int qp)
|
||||
return;
|
||||
|
||||
try_send:
|
||||
if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp))
|
||||
if(btl_openib_credits_send_trylock(ep, qp))
|
||||
mca_btl_openib_endpoint_send_credits(ep, qp);
|
||||
}
|
||||
|
||||
@ -530,8 +532,8 @@ ib_send_flags(uint32_t size, mca_btl_openib_endpoint_qp_t *qp, int do_signal)
|
||||
static inline int
|
||||
acquire_eager_rdma_send_credit(mca_btl_openib_endpoint_t *endpoint)
|
||||
{
|
||||
if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
|
||||
if(OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -636,8 +638,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
|
||||
prio = !prio;
|
||||
|
||||
if (BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
if (OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
|
||||
if (OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) {
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
|
||||
if (queue_frag) {
|
||||
opal_list_append(&endpoint->qps[qp].no_credits_pending_frags[prio],
|
||||
(opal_list_item_t *)frag);
|
||||
@ -646,8 +648,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
} else {
|
||||
if(OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) {
|
||||
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
|
||||
if(OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) {
|
||||
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
|
||||
if (queue_frag) {
|
||||
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
||||
opal_list_append(&openib_btl->qps[qp].u.srq_qp.pending_frags[prio],
|
||||
@ -682,7 +684,7 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
|
||||
if(cm_return > 255) {
|
||||
hdr->cm_seen = 255;
|
||||
cm_return -= 255;
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
} else {
|
||||
hdr->cm_seen = cm_return;
|
||||
}
|
||||
@ -697,18 +699,18 @@ static inline void mca_btl_openib_endpoint_credit_release (struct mca_btl_base_e
|
||||
mca_btl_openib_header_t *hdr = frag->hdr;
|
||||
|
||||
if (BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) {
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits));
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits));
|
||||
}
|
||||
|
||||
if (do_rdma) {
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
|
||||
} else {
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
OPAL_THREAD_ADD32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits);
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
|
||||
OPAL_THREAD_ADD_FETCH32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits);
|
||||
OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
|
||||
} else if BTL_OPENIB_QP_TYPE_SRQ(qp){
|
||||
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
|
||||
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -148,9 +148,9 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
|
||||
}
|
||||
|
||||
/* check for a get token */
|
||||
if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) {
|
||||
if (OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,-1) < 0) {
|
||||
qp_put_wqe(ep, qp);
|
||||
OPAL_THREAD_ADD32(&ep->get_tokens,1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -159,7 +159,7 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
|
||||
|
||||
if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) {
|
||||
qp_put_wqe(ep, qp);
|
||||
OPAL_THREAD_ADD32(&ep->get_tokens,1);
|
||||
OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
|
@ -423,7 +423,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
|
||||
curr_proc,
|
||||
&btl_peer_data[i]);
|
||||
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, 1);
|
||||
/* and here we can reach */
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
|
||||
@ -476,7 +476,7 @@ mca_btl_portals4_del_procs(struct mca_btl_base_module_t *btl,
|
||||
portals4 entry in proc_endpoints instead of the peer_data */
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
free(btl_peer_data[i]);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, -1);
|
||||
}
|
||||
|
||||
if (0 == portals4_btl->portals_num_procs)
|
||||
@ -537,7 +537,7 @@ mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
|
||||
if (frag->me_h != PTL_INVALID_HANDLE) {
|
||||
frag->me_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
|
||||
OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag);
|
||||
@ -622,7 +622,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
|
||||
handle->key = OPAL_THREAD_ADD_FETCH64(&(portals4_btl->portals_rdma_key), 1);
|
||||
handle->remote_offset = 0;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
@ -662,7 +662,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
|
@ -609,7 +609,7 @@ mca_btl_portals4_component_progress(void)
|
||||
mca_btl_portals4_free(&portals4_btl->super, &frag->base);
|
||||
}
|
||||
if (0 != frag->size) {
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
|
||||
portals4_btl->portals_outstanding_ops));
|
||||
@ -646,7 +646,7 @@ mca_btl_portals4_component_progress(void)
|
||||
}
|
||||
|
||||
if (0 != frag->size) {
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
|
||||
}
|
||||
@ -749,7 +749,7 @@ mca_btl_portals4_component_progress(void)
|
||||
OPAL_SUCCESS);
|
||||
|
||||
OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
|
||||
goto done;
|
||||
|
@ -53,16 +53,16 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
|
||||
int ret;
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
portals4_btl->portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
|
||||
mca_btl_portals4_component_progress();
|
||||
}
|
||||
|
||||
OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
|
||||
if (NULL == frag){
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
|
@ -49,9 +49,9 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
|
||||
BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
portals4_btl->portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"Call to mca_btl_portals4_component_progress (4)\n"));
|
||||
mca_btl_portals4_component_progress();
|
||||
|
@ -636,7 +636,7 @@ int mca_btl_smcuda_add_procs(
|
||||
/* Sync with other local procs. Force the FIFO initialization to always
|
||||
* happens before the readers access it.
|
||||
*/
|
||||
(void)opal_atomic_add_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
|
||||
(void)opal_atomic_add_fetch_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
|
||||
while( n_local_procs >
|
||||
mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) {
|
||||
opal_progress();
|
||||
@ -976,7 +976,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
|
||||
* the return code indicates failure, the write has still "completed" from
|
||||
* our point of view: it has been posted to a "pending send" queue.
|
||||
*/
|
||||
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
|
||||
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
|
||||
(void)rc; /* this is safe to ignore as the message is requeued till success */
|
||||
@ -1026,7 +1026,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
|
||||
* post the descriptor in the queue - post with the relative
|
||||
* address
|
||||
*/
|
||||
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
|
||||
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
|
||||
if( OPAL_LIKELY(0 == rc) ) {
|
||||
@ -1241,7 +1241,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b
|
||||
* the return code indicates failure, the write has still "completed" from
|
||||
* our point of view: it has been posted to a "pending send" queue.
|
||||
*/
|
||||
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
|
||||
"Sending CUDA IPC REQ (try=%d): myrank=%d, mydev=%d, peerrank=%d",
|
||||
endpoint->ipctries,
|
||||
|
@ -658,7 +658,7 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl,
|
||||
* the return code indicates failure, the write has still "completed" from
|
||||
* our point of view: it has been posted to a "pending send" queue.
|
||||
*/
|
||||
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
|
||||
|
||||
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
|
||||
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
|
||||
@ -980,7 +980,7 @@ void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep)
|
||||
|
||||
if(NULL == si) return; /* Another thread got in before us. Thats ok. */
|
||||
|
||||
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, -1);
|
||||
|
||||
MCA_BTL_SMCUDA_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data,
|
||||
true, false, rc);
|
||||
@ -1093,7 +1093,7 @@ int mca_btl_smcuda_component_progress(void)
|
||||
if( btl_ownership ) {
|
||||
MCA_BTL_SMCUDA_FRAG_RETURN(frag);
|
||||
}
|
||||
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, -1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, -1);
|
||||
if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) {
|
||||
btl_smcuda_process_pending_sends(endpoint);
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend)
|
||||
si = (btl_smcuda_pending_send_item_t*)i;
|
||||
si->data = data;
|
||||
|
||||
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, +1);
|
||||
OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, +1);
|
||||
|
||||
/* if data was on pending send list then prepend it to the list to
|
||||
* minimize reordering */
|
||||
|
@ -272,7 +272,7 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
|
||||
|
||||
rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags);
|
||||
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
|
||||
opal_atomic_add_32(&ugni_module->reg_count,1);
|
||||
opal_atomic_add_fetch_32(&ugni_module->reg_count,1);
|
||||
}
|
||||
|
||||
return rc;
|
||||
@ -286,7 +286,7 @@ ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
|
||||
|
||||
rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg);
|
||||
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
|
||||
opal_atomic_add_32(&ugni_module->reg_count,-1);
|
||||
opal_atomic_add_fetch_32(&ugni_module->reg_count,-1);
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
@ -543,7 +543,7 @@ int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device)
|
||||
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
|
||||
|
||||
ep->dg_posted = false;
|
||||
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1);
|
||||
(void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, -1);
|
||||
}
|
||||
|
||||
(void) mca_btl_ugni_ep_connect_progress (ep);
|
||||
|
@ -181,7 +181,7 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec
|
||||
}
|
||||
} while (device->dev_smsg_local_cq.active_operations);
|
||||
|
||||
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
|
||||
(void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
|
||||
}
|
||||
|
||||
mca_btl_ugni_device_lock (device);
|
||||
@ -278,7 +278,7 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
|
||||
|
||||
ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
|
||||
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
|
||||
(void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
|
||||
|
||||
/* send all pending messages */
|
||||
BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
|
||||
@ -312,7 +312,7 @@ static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
|
||||
ep->remote_attr, sizeof (*ep->remote_attr),
|
||||
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
|
||||
if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) {
|
||||
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, 1);
|
||||
(void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1);
|
||||
}
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
|
@ -192,7 +192,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in
|
||||
|
||||
opal_atomic_mb ();
|
||||
|
||||
ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1);
|
||||
ref_cnt = OPAL_THREAD_ADD_FETCH32(&frag->ref_cnt, -1);
|
||||
if (ref_cnt) {
|
||||
assert (ref_cnt > 0);
|
||||
return false;
|
||||
|
@ -59,12 +59,13 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
mca_btl_ugni_base_frag_t frag;
|
||||
mca_btl_base_segment_t seg;
|
||||
bool disconnect = false;
|
||||
int32_t _tmp_value = 0;
|
||||
uintptr_t data_ptr;
|
||||
gni_return_t rc;
|
||||
uint32_t len;
|
||||
int count = 0;
|
||||
|
||||
if (!opal_atomic_bool_cmpset_32 (&ep->smsg_progressing, 0, 1)) {
|
||||
if (!opal_atomic_compare_exchange_strong_32 (&ep->smsg_progressing, &_tmp_value, 1)) {
|
||||
/* already progressing (we can't support reentry here) */
|
||||
return 0;
|
||||
}
|
||||
|
@ -261,14 +261,14 @@ static inline bool mca_btl_vader_check_fboxes (void)
|
||||
|
||||
static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr)
|
||||
{
|
||||
if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_SIZE_T (&ep->send_count, 1))) {
|
||||
if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_FETCH_SIZE_T (&ep->send_count, 1))) {
|
||||
/* protect access to mca_btl_vader_component.segment_offset */
|
||||
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
|
||||
|
||||
if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size &&
|
||||
mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) {
|
||||
/* verify the remote side will accept another fbox */
|
||||
if (0 <= opal_atomic_add_32 (&ep->fifo->fbox_available, -1)) {
|
||||
if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
|
||||
void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset;
|
||||
mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size;
|
||||
|
||||
@ -280,7 +280,7 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc
|
||||
hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer);
|
||||
++mca_btl_vader_component.fbox_count;
|
||||
} else {
|
||||
opal_atomic_add_32 (&ep->fifo->fbox_available, 1);
|
||||
opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1);
|
||||
}
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2014 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2010-2017 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -30,8 +30,9 @@
|
||||
#include "btl_vader_endpoint.h"
|
||||
#include "btl_vader_frag.h"
|
||||
|
||||
#define vader_item_compare_exchange(x, y, z) opal_atomic_compare_exchange_strong_ptr ((volatile void **) (x), (void **) (y), (void *) (z))
|
||||
|
||||
#if SIZEOF_VOID_P == 8
|
||||
#define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z))
|
||||
#define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y))
|
||||
|
||||
#define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll
|
||||
@ -40,7 +41,6 @@
|
||||
|
||||
typedef int64_t fifo_value_t;
|
||||
#else
|
||||
#define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z))
|
||||
#define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y))
|
||||
|
||||
#define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl
|
||||
@ -138,7 +138,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m
|
||||
if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) {
|
||||
opal_atomic_rmb();
|
||||
|
||||
if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) {
|
||||
if (!vader_item_compare_exchange (&fifo->fifo_tail, &value, VADER_FIFO_FREE)) {
|
||||
while (VADER_FIFO_FREE == hdr->next) {
|
||||
opal_atomic_rmb ();
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
|
||||
vader_ctx->reg[0] = reg;
|
||||
|
||||
if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) {
|
||||
(void)opal_atomic_add (®->ref_count, 1);
|
||||
opal_atomic_add (®->ref_count, 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -93,7 +93,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
|
||||
/* start the new segment from the lower of the two bases */
|
||||
base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base;
|
||||
|
||||
if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) {
|
||||
if (OPAL_LIKELY(0 == opal_atomic_add_fetch_32 (®->ref_count, -1))) {
|
||||
/* this pointer is not in use */
|
||||
(void) xpmem_detach (reg->rcache_context);
|
||||
OBJ_RELEASE(reg);
|
||||
@ -143,7 +143,7 @@ void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_
|
||||
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
|
||||
int32_t ref_count;
|
||||
|
||||
ref_count = opal_atomic_add_32 (®->ref_count, -1);
|
||||
ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1);
|
||||
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
|
||||
/* protect rcache access */
|
||||
mca_rcache_base_vma_delete (vma_module, reg);
|
||||
|
@ -131,7 +131,7 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp,
|
||||
}
|
||||
|
||||
/* increment the number of processes that are attached to the segment. */
|
||||
(void)opal_atomic_add_size_t(&map->module_seg->seg_num_procs_inited, 1);
|
||||
(void)opal_atomic_add_fetch_size_t(&map->module_seg->seg_num_procs_inited, 1);
|
||||
|
||||
/* commit the changes before we return */
|
||||
opal_atomic_wmb();
|
||||
|
@ -131,7 +131,7 @@ void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep)
|
||||
if (huge_page->path) {
|
||||
int32_t count;
|
||||
|
||||
count = opal_atomic_add_32 (&huge_page->count, 1);
|
||||
count = opal_atomic_add_fetch_32 (&huge_page->count, 1);
|
||||
|
||||
rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path,
|
||||
getpid (), count);
|
||||
|
@ -232,7 +232,7 @@ static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_
|
||||
|
||||
/* This segment fits fully within an existing segment. */
|
||||
rcache_grdma->stat_cache_hit++;
|
||||
int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1);
|
||||
int32_t ref_cnt = opal_atomic_add_fetch_32 (&grdma_reg->ref_count, 1);
|
||||
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
|
||||
"returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
|
||||
(void)ref_cnt;
|
||||
@ -296,7 +296,7 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
|
||||
/* get updated access flags */
|
||||
access_flags = find_args.access_flags;
|
||||
|
||||
OPAL_THREAD_ADD32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1);
|
||||
OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1);
|
||||
}
|
||||
|
||||
item = opal_free_list_get_mt (&rcache_grdma->reg_list);
|
||||
@ -380,7 +380,7 @@ static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr,
|
||||
(opal_list_item_t*)(*reg));
|
||||
}
|
||||
rcache_grdma->stat_cache_found++;
|
||||
opal_atomic_add_32 (&(*reg)->ref_count, 1);
|
||||
opal_atomic_add_fetch_32 (&(*reg)->ref_count, 1);
|
||||
} else {
|
||||
rcache_grdma->stat_cache_notfound++;
|
||||
}
|
||||
@ -398,7 +398,7 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
|
||||
int rc;
|
||||
|
||||
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
|
||||
ref_count = opal_atomic_add_32 (®->ref_count, -1);
|
||||
ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
|
||||
"returning registration %p, remaining references %d", (void *) reg, ref_count));
|
||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче
Block a user