1
1

Merge pull request #4552 from hjelmn/asm_cleanup2

Add atomic fetch-and-op and compare-exchange functions
Этот коммит содержится в:
bosilca 2017-11-30 22:29:38 -05:00 коммит произвёл GitHub
родитель 0fcc996c41 7893248c5a
Коммит 5cb72aa568
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
110 изменённых файлов: 1534 добавлений и 1466 удалений

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California. * Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science * Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -378,7 +378,7 @@ int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data,
* a read only memory). * a read only memory).
*/ */
if( NULL != pArgs ) { if( NULL != pArgs ) {
OPAL_THREAD_ADD32(&pArgs->ref_count, 1); OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, 1);
dest_data->args = pArgs; dest_data->args = pArgs;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -396,7 +396,7 @@ int32_t ompi_datatype_release_args( ompi_datatype_t* pData )
ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args; ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args;
assert( 0 < pArgs->ref_count ); assert( 0 < pArgs->ref_count );
OPAL_THREAD_ADD32(&pArgs->ref_count, -1); OPAL_THREAD_ADD_FETCH32(&pArgs->ref_count, -1);
if( 0 == pArgs->ref_count ) { if( 0 == pArgs->ref_count ) {
/* There are some duplicated datatypes around that have a pointer to this /* There are some duplicated datatypes around that have a pointer to this
* args. We will release them only when the last datatype will dissapear. * args. We will release them only when the last datatype will dissapear.
@ -487,7 +487,8 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype,
void* recursive_buffer; void* recursive_buffer;
if (NULL == packed_description) { if (NULL == packed_description) {
if (opal_atomic_bool_cmpset (&datatype->packed_description, NULL, (void *) 1)) { void *_tmp_ptr = NULL;
if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (void *) &_tmp_ptr, (void *) 1)) {
if( ompi_datatype_is_predefined(datatype) ) { if( ompi_datatype_is_predefined(datatype) ) {
packed_description = malloc(2 * sizeof(int)); packed_description = malloc(2 * sizeof(int));
} else if( NULL == args ) { } else if( NULL == args ) {

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -356,7 +356,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group,
ompi_proc_t *real_proc = ompi_proc_t *real_proc =
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc)); (ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc));
if (opal_atomic_bool_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) { if (opal_atomic_compare_exchange_strong_ptr (group->grp_proc_pointers + peer_id, &proc, real_proc)) {
OBJ_RETAIN(real_proc); OBJ_RETAIN(real_proc);
} }

Просмотреть файл

@ -314,7 +314,7 @@ libnbc_module_destruct(ompi_coll_libnbc_module_t *module)
/* if we ever were used for a collective op, do the progress cleanup. */ /* if we ever were used for a collective op, do the progress cleanup. */
if (true == module->comm_registered) { if (true == module->comm_registered) {
int32_t tmp = int32_t tmp =
OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, -1); OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, -1);
if (0 == tmp) { if (0 == tmp) {
opal_progress_unregister(ompi_coll_libnbc_progress); opal_progress_unregister(ompi_coll_libnbc_progress);
} }

Просмотреть файл

@ -618,7 +618,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t
/* register progress */ /* register progress */
if (need_register) { if (need_register) {
int32_t tmp = int32_t tmp =
OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, 1); OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, 1);
if (tmp == 1) { if (tmp == 1) {
opal_progress_register(ompi_coll_libnbc_progress); opal_progress_register(ompi_coll_libnbc_progress);
} }

Просмотреть файл

@ -120,7 +120,7 @@ static int
mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
{ {
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
if( 1 == opal_atomic_add_32(&monitoring_module->is_initialized, 1) ) { if( 1 == opal_atomic_add_fetch_32(&monitoring_module->is_initialized, 1) ) {
MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm); MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm);
monitoring_module->data = mca_common_monitoring_coll_new(comm); monitoring_module->data = mca_common_monitoring_coll_new(comm);
OPAL_MONITORING_PRINT_INFO("coll_module_enabled"); OPAL_MONITORING_PRINT_INFO("coll_module_enabled");
@ -132,7 +132,7 @@ static int
mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
{ {
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
if( 0 == opal_atomic_sub_32(&monitoring_module->is_initialized, 1) ) { if( 0 == opal_atomic_sub_fetch_32(&monitoring_module->is_initialized, 1) ) {
MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm); MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm);
mca_common_monitoring_coll_release(monitoring_module->data); mca_common_monitoring_coll_release(monitoring_module->data);
monitoring_module->data = NULL; monitoring_module->data = NULL;

Просмотреть файл

@ -68,7 +68,7 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h; zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h; data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&module->coll_count, 1); internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1);
/* /*
** DATATYPE and SIZES ** DATATYPE and SIZES

Просмотреть файл

@ -44,7 +44,7 @@ barrier_hypercube_top(struct ompi_communicator_t *comm,
request->type = OMPI_COLL_PORTALS4_TYPE_BARRIER; request->type = OMPI_COLL_PORTALS4_TYPE_BARRIER;
count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, ret = PtlCTAlloc(mca_coll_portals4_component.ni_h,
&request->u.barrier.rtr_ct_h); &request->u.barrier.rtr_ct_h);

Просмотреть файл

@ -176,7 +176,7 @@ bcast_kary_tree_top(void *buff, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h; zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h; data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
/* /*
@ -513,7 +513,7 @@ bcast_pipeline_top(void *buff, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h; zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h; data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); internal_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
/* /*
** DATATYPE and SIZES ** DATATYPE and SIZES

Просмотреть файл

@ -582,7 +582,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
/* Setup Common Parameters */ /* Setup Common Parameters */
/**********************************/ /**********************************/
request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( comm, portals4_module, request->u.gather.root_rank ); COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( comm, portals4_module, request->u.gather.root_rank );
bmtree = portals4_module->cached_in_order_bmtree; bmtree = portals4_module->cached_in_order_bmtree;
@ -879,7 +879,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank); i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank);
request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); request->u.gather.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
ret = setup_gather_buffers_linear(comm, request, portals4_module); ret = setup_gather_buffers_linear(comm, request, portals4_module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }

Просмотреть файл

@ -69,7 +69,7 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
zero_md_h = mca_coll_portals4_component.zero_md_h; zero_md_h = mca_coll_portals4_component.zero_md_h;
data_md_h = mca_coll_portals4_component.data_md_h; data_md_h = mca_coll_portals4_component.data_md_h;
internal_count = opal_atomic_add_size_t(&module->coll_count, 1); internal_count = opal_atomic_add_fetch_size_t(&module->coll_count, 1);
/* /*
** DATATYPE and SIZES ** DATATYPE and SIZES

Просмотреть файл

@ -399,7 +399,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank);
request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); request->u.scatter.coll_count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
ret = setup_scatter_buffers_linear(comm, request, portals4_module); ret = setup_scatter_buffers_linear(comm, request, portals4_module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }

Просмотреть файл

@ -358,7 +358,7 @@ extern uint32_t mca_coll_sm_one;
* Macro to release an in-use flag from this process * Macro to release an in-use flag from this process
*/ */
#define FLAG_RELEASE(flag) \ #define FLAG_RELEASE(flag) \
(void)opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1) opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1)
/** /**
* Macro to copy a single segment in from a user buffer to a shared * Macro to copy a single segment in from a user buffer to a shared

Просмотреть файл

@ -101,7 +101,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm,
if (0 != rank) { if (0 != rank) {
/* Get parent *in* buffer */ /* Get parent *in* buffer */
parent = &data->mcb_barrier_control_parent[buffer_set]; parent = &data->mcb_barrier_control_parent[buffer_set];
(void)opal_atomic_add(parent, 1); opal_atomic_add (parent, 1);
SPIN_CONDITION(0 != *me_out, exit_label2); SPIN_CONDITION(0 != *me_out, exit_label2);
*me_out = 0; *me_out = 0;

Просмотреть файл

@ -463,7 +463,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
OBJ_RETAIN(sm_module->previous_reduce_module); OBJ_RETAIN(sm_module->previous_reduce_module);
/* Indicate that we have successfully attached and setup */ /* Indicate that we have successfully attached and setup */
(void)opal_atomic_add(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1); opal_atomic_add (&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
/* Wait for everyone in this communicator to attach and setup */ /* Wait for everyone in this communicator to attach and setup */
opal_output_verbose(10, ompi_coll_base_framework.framework_output, opal_output_verbose(10, ompi_coll_base_framework.framework_output,

Просмотреть файл

@ -209,7 +209,7 @@ static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar,
int mca_common_monitoring_init( void ) int mca_common_monitoring_init( void )
{ {
if( !mca_common_monitoring_enabled ) return OMPI_ERROR; if( !mca_common_monitoring_enabled ) return OMPI_ERROR;
if( 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */ if( 1 < opal_atomic_add_fetch_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */
char hostname[OPAL_MAXHOSTNAMELEN] = "NA"; char hostname[OPAL_MAXHOSTNAMELEN] = "NA";
/* Initialize constant */ /* Initialize constant */
@ -229,7 +229,7 @@ int mca_common_monitoring_init( void )
void mca_common_monitoring_finalize( void ) void mca_common_monitoring_finalize( void )
{ {
if( ! mca_common_monitoring_enabled || /* Don't release if not last */ if( ! mca_common_monitoring_enabled || /* Don't release if not last */
0 < opal_atomic_sub_32(&mca_common_monitoring_hold, 1) ) return; 0 < opal_atomic_sub_fetch_32(&mca_common_monitoring_hold, 1) ) return;
OPAL_MONITORING_PRINT_INFO("common_component_finish"); OPAL_MONITORING_PRINT_INFO("common_component_finish");
/* Dump monitoring informations */ /* Dump monitoring informations */
@ -503,21 +503,21 @@ void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag)
/* Keep tracks of the data_size distribution */ /* Keep tracks of the data_size distribution */
if( 0 == data_size ) { if( 0 == data_size ) {
opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram], 1); opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram], 1);
} else { } else {
int log2_size = log10(data_size)/log10_2; int log2_size = log10(data_size)/log10_2;
if(log2_size > max_size_histogram - 2) /* Avoid out-of-bound write */ if(log2_size > max_size_histogram - 2) /* Avoid out-of-bound write */
log2_size = max_size_histogram - 2; log2_size = max_size_histogram - 2;
opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1); opal_atomic_add_fetch_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1);
} }
/* distinguishses positive and negative tags if requested */ /* distinguishses positive and negative tags if requested */
if( (tag < 0) && (mca_common_monitoring_filter()) ) { if( (tag < 0) && (mca_common_monitoring_filter()) ) {
opal_atomic_add_size_t(&filtered_pml_data[world_rank], data_size); opal_atomic_add_fetch_size_t(&filtered_pml_data[world_rank], data_size);
opal_atomic_add_size_t(&filtered_pml_count[world_rank], 1); opal_atomic_add_fetch_size_t(&filtered_pml_count[world_rank], 1);
} else { /* if filtered monitoring is not activated data is aggregated indifferently */ } else { /* if filtered monitoring is not activated data is aggregated indifferently */
opal_atomic_add_size_t(&pml_data[world_rank], data_size); opal_atomic_add_fetch_size_t(&pml_data[world_rank], data_size);
opal_atomic_add_size_t(&pml_count[world_rank], 1); opal_atomic_add_fetch_size_t(&pml_count[world_rank], 1);
} }
} }
@ -564,11 +564,11 @@ void mca_common_monitoring_record_osc(int world_rank, size_t data_size,
if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */
if( SEND == dir ) { if( SEND == dir ) {
opal_atomic_add_size_t(&osc_data_s[world_rank], data_size); opal_atomic_add_fetch_size_t(&osc_data_s[world_rank], data_size);
opal_atomic_add_size_t(&osc_count_s[world_rank], 1); opal_atomic_add_fetch_size_t(&osc_count_s[world_rank], 1);
} else { } else {
opal_atomic_add_size_t(&osc_data_r[world_rank], data_size); opal_atomic_add_fetch_size_t(&osc_data_r[world_rank], data_size);
opal_atomic_add_size_t(&osc_count_r[world_rank], 1); opal_atomic_add_fetch_size_t(&osc_count_r[world_rank], 1);
} }
} }
@ -650,8 +650,8 @@ void mca_common_monitoring_record_coll(int world_rank, size_t data_size)
{ {
if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */
opal_atomic_add_size_t(&coll_data[world_rank], data_size); opal_atomic_add_fetch_size_t(&coll_data[world_rank], data_size);
opal_atomic_add_size_t(&coll_count[world_rank], 1); opal_atomic_add_fetch_size_t(&coll_count[world_rank], 1);
} }
static int mca_common_monitoring_get_coll_count(const struct mca_base_pvar_t *pvar, static int mca_common_monitoring_get_coll_count(const struct mca_base_pvar_t *pvar,

Просмотреть файл

@ -236,8 +236,8 @@ void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data
return; return;
} }
#endif /* OPAL_ENABLE_DEBUG */ #endif /* OPAL_ENABLE_DEBUG */
opal_atomic_add_size_t(&data->o2a_size, size); opal_atomic_add_fetch_size_t(&data->o2a_size, size);
opal_atomic_add_size_t(&data->o2a_count, 1); opal_atomic_add_fetch_size_t(&data->o2a_count, 1);
} }
int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar, int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar,
@ -277,8 +277,8 @@ void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data
return; return;
} }
#endif /* OPAL_ENABLE_DEBUG */ #endif /* OPAL_ENABLE_DEBUG */
opal_atomic_add_size_t(&data->a2o_size, size); opal_atomic_add_fetch_size_t(&data->a2o_size, size);
opal_atomic_add_size_t(&data->a2o_count, 1); opal_atomic_add_fetch_size_t(&data->a2o_count, 1);
} }
int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar, int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar,
@ -318,8 +318,8 @@ void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data
return; return;
} }
#endif /* OPAL_ENABLE_DEBUG */ #endif /* OPAL_ENABLE_DEBUG */
opal_atomic_add_size_t(&data->a2a_size, size); opal_atomic_add_fetch_size_t(&data->a2a_size, size);
opal_atomic_add_size_t(&data->a2a_count, 1); opal_atomic_add_fetch_size_t(&data->a2a_count, 1);
} }
int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar, int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar,

Просмотреть файл

@ -1,7 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -296,9 +296,10 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me,
int int
ompi_mtl_portals4_flowctl_trigger(void) ompi_mtl_portals4_flowctl_trigger(void)
{ {
int32_t _tmp_value = 0;
int ret; int ret;
if (true == OPAL_ATOMIC_BOOL_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) { if (true == OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ompi_mtl_portals4.flowctl.flowctl_active, &_tmp_value, 1)) {
/* send trigger to root */ /* send trigger to root */
ret = PtlPut(ompi_mtl_portals4.zero_md_h, ret = PtlPut(ompi_mtl_portals4.zero_md_h,
0, 0,
@ -346,7 +347,7 @@ start_recover(void)
int64_t epoch_counter; int64_t epoch_counter;
ompi_mtl_portals4.flowctl.flowctl_active = true; ompi_mtl_portals4.flowctl.flowctl_active = true;
epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); epoch_counter = opal_atomic_add_fetch_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"Entering flowctl_start_recover %ld", "Entering flowctl_start_recover %ld",

Просмотреть файл

@ -53,14 +53,14 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target,
int32_t frag_count; int32_t frag_count;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL #if OMPI_MTL_PORTALS4_FLOW_CONTROL
while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { while (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
ompi_mtl_portals4_progress(); ompi_mtl_portals4_progress();
} }
#endif #endif
frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl;
ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count); ret = OPAL_THREAD_ADD_FETCH32(&(request->pending_reply), frag_count);
for (i = 0 ; i < frag_count ; i++) { for (i = 0 ; i < frag_count ; i++) {
opal_free_list_item_t *tmp; opal_free_list_item_t *tmp;
@ -385,14 +385,14 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev,
opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag, opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag,
&rndv_get_frag->super); &rndv_get_frag->super);
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1); ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_reply), -1);
if (ret > 0) { if (ret > 0) {
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
assert(ptl_request->pending_reply == 0); assert(ptl_request->pending_reply == 0);
#if OMPI_MTL_PORTALS4_FLOW_CONTROL #if OMPI_MTL_PORTALS4_FLOW_CONTROL
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
#endif #endif
/* make sure the data is in the right place. Use _ucount for /* make sure the data is in the right place. Use _ucount for
@ -468,7 +468,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
ptl_request->super.type = portals4_req_recv; ptl_request->super.type = portals4_req_recv;
ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->hdr_data = 0; ptl_request->hdr_data = 0;
#endif #endif
ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->buffer_ptr = (free_after) ? start : NULL;
@ -549,7 +549,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
} }
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->hdr_data = 0; ptl_request->hdr_data = 0;
#endif #endif
ptl_request->super.type = portals4_req_recv; ptl_request->super.type = portals4_req_recv;

Просмотреть файл

@ -45,7 +45,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
(ompi_mtl_portals4_isend_request_t*) ptl_base_request; (ompi_mtl_portals4_isend_request_t*) ptl_base_request;
if (PTL_EVENT_GET == ev->type) { if (PTL_EVENT_GET == ev->type) {
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); ret = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1);
if (ret > 0) { if (ret > 0) {
/* wait for other gets */ /* wait for other gets */
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret)); OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret));
@ -94,7 +94,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super); &pending->super.super);
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
ompi_mtl_portals4_flowctl_trigger(); ompi_mtl_portals4_flowctl_trigger();
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -124,7 +124,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
if ((eager == ompi_mtl_portals4.protocol) || if ((eager == ompi_mtl_portals4.protocol) ||
(ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) { (ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) {
val = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); val = OPAL_THREAD_ADD_FETCH32(&(ptl_request->pending_get), -1);
} }
if (0 == val) { if (0 == val) {
add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */ add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */
@ -161,7 +161,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
ptl_request->me_h = PTL_INVALID_HANDLE; ptl_request->me_h = PTL_INVALID_HANDLE;
add++; add++;
} }
val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add); val = OPAL_THREAD_ADD_FETCH32((int32_t*)&ptl_request->event_count, add);
assert(val <= 3); assert(val <= 3);
if (val == 3) { if (val == 3) {
@ -174,7 +174,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
*complete = true; *complete = true;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL #if OMPI_MTL_PORTALS4_FLOW_CONTROL
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_free_list_return (&ompi_mtl_portals4.flowctl.pending_fl, opal_free_list_return (&ompi_mtl_portals4.flowctl.pending_fl,
&ptl_request->pending->super); &ptl_request->pending->super);
@ -422,15 +422,15 @@ ompi_mtl_portals4_pending_list_progress()
while ((!ompi_mtl_portals4.flowctl.flowctl_active) && while ((!ompi_mtl_portals4.flowctl.flowctl_active) &&
(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { (0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
val = OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1); val = OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1);
if (val < 0) { if (val < 0) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
return; return;
} }
item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends); item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends);
if (OPAL_UNLIKELY(NULL == item)) { if (OPAL_UNLIKELY(NULL == item)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
return; return;
} }
@ -456,7 +456,7 @@ ompi_mtl_portals4_pending_list_progress()
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super); &pending->super.super);
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
} }
} }
} }
@ -492,7 +492,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
if (OMPI_SUCCESS != ret) return ret; if (OMPI_SUCCESS != ret) return ret;
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->opcount = OPAL_THREAD_ADD_FETCH64((int64_t*)&ompi_mtl_portals4.opcount, 1);
ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->buffer_ptr = (free_after) ? start : NULL;
ptl_request->length = length; ptl_request->length = length;
ptl_request->event_count = 0; ptl_request->event_count = 0;
@ -520,15 +520,15 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
pending->ptl_proc = ptl_proc; pending->ptl_proc = ptl_proc;
pending->ptl_request = ptl_request; pending->ptl_request = ptl_request;
if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { if (OPAL_UNLIKELY(OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super); &pending->super.super);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super); &pending->super.super);
ompi_mtl_portals4_pending_list_progress(); ompi_mtl_portals4_pending_list_progress();
@ -536,7 +536,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
} }
if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) { if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD_FETCH32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super); &pending->super.super);
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -61,7 +61,7 @@
static inline void* \ static inline void* \
ompi_osc_monitoring_## template ##_set_template (ompi_osc_base_module_t*module) \ ompi_osc_monitoring_## template ##_set_template (ompi_osc_base_module_t*module) \
{ \ { \
if( 1 == opal_atomic_add_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \ if( 1 == opal_atomic_add_fetch_32(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \
/* Saves the original module functions in \ /* Saves the original module functions in \
* ompi_osc_monitoring_module_## template ##_template \ * ompi_osc_monitoring_module_## template ##_template \
*/ \ */ \

Просмотреть файл

@ -99,7 +99,7 @@ ompi_osc_portals4_complete(struct ompi_win_t *win)
PTL_SUM, PTL_SUM,
PTL_INT32_T); PTL_INT32_T);
if (ret != OMPI_SUCCESS) return ret; if (ret != OMPI_SUCCESS) return ret;
OPAL_THREAD_ADD64(&module->opcount, 1); OPAL_THREAD_ADD_FETCH64(&module->opcount, 1);
} }
ret = ompi_osc_portals4_complete_all(module); ret = ompi_osc_portals4_complete_all(module);
@ -144,7 +144,7 @@ ompi_osc_portals4_post(struct ompi_group_t *group,
PTL_SUM, PTL_SUM,
PTL_INT32_T); PTL_INT32_T);
if (ret != OMPI_SUCCESS) return ret; if (ret != OMPI_SUCCESS) return ret;
OPAL_THREAD_ADD64(&module->opcount, 1); OPAL_THREAD_ADD_FETCH64(&module->opcount, 1);
} }
} else { } else {
module->post_group = NULL; module->post_group = NULL;

Просмотреть файл

@ -206,7 +206,7 @@ segmentedPut(int64_t *opcount,
ptl_size_t bytes_put = 0; ptl_size_t bytes_put = 0;
do { do {
opal_atomic_add_64(opcount, 1); opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(put_length, segment_length); ptl_size_t frag_length = MIN(put_length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -222,7 +222,7 @@ segmentedPut(int64_t *opcount,
user_ptr, user_ptr,
hdr_data); hdr_data);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1); opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output, opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlPut failed with return value %d", "%s:%d PtlPut failed with return value %d",
__FUNCTION__, __LINE__, ret); __FUNCTION__, __LINE__, ret);
@ -251,7 +251,7 @@ segmentedGet(int64_t *opcount,
ptl_size_t bytes_gotten = 0; ptl_size_t bytes_gotten = 0;
do { do {
opal_atomic_add_64(opcount, 1); opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(get_length, segment_length); ptl_size_t frag_length = MIN(get_length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -266,7 +266,7 @@ segmentedGet(int64_t *opcount,
target_offset + bytes_gotten, target_offset + bytes_gotten,
user_ptr); user_ptr);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1); opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output, opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlGet failed with return value %d", "%s:%d PtlGet failed with return value %d",
__FUNCTION__, __LINE__, ret); __FUNCTION__, __LINE__, ret);
@ -297,7 +297,7 @@ segmentedAtomic(int64_t *opcount,
ptl_size_t sent = 0; ptl_size_t sent = 0;
do { do {
opal_atomic_add_64(opcount, 1); opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(length, segment_length); ptl_size_t frag_length = MIN(length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -315,7 +315,7 @@ segmentedAtomic(int64_t *opcount,
ptl_op, ptl_op,
ptl_dt); ptl_dt);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1); opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output, opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlAtomic failed with return value %d", "%s:%d PtlAtomic failed with return value %d",
__FUNCTION__, __LINE__, ret); __FUNCTION__, __LINE__, ret);
@ -348,7 +348,7 @@ segmentedFetchAtomic(int64_t *opcount,
ptl_size_t sent = 0; ptl_size_t sent = 0;
do { do {
opal_atomic_add_64(opcount, 1); opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(length, segment_length); ptl_size_t frag_length = MIN(length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -367,7 +367,7 @@ segmentedFetchAtomic(int64_t *opcount,
ptl_op, ptl_op,
ptl_dt); ptl_dt);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1); opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output, opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlFetchAtomic failed with return value %d", "%s:%d PtlFetchAtomic failed with return value %d",
__FUNCTION__, __LINE__, ret); __FUNCTION__, __LINE__, ret);
@ -399,7 +399,7 @@ segmentedSwap(int64_t *opcount,
ptl_size_t sent = 0; ptl_size_t sent = 0;
do { do {
opal_atomic_add_64(opcount, 1); opal_atomic_add_fetch_64(opcount, 1);
ptl_size_t frag_length = MIN(length, segment_length); ptl_size_t frag_length = MIN(length, segment_length);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
@ -419,7 +419,7 @@ segmentedSwap(int64_t *opcount,
PTL_SWAP, PTL_SWAP,
ptl_dt); ptl_dt);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_atomic_add_64(opcount, -1); opal_atomic_add_fetch_64(opcount, -1);
opal_output_verbose(1, ompi_osc_base_framework.framework_output, opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlSwap failed with return value %d", "%s:%d PtlSwap failed with return value %d",
__FUNCTION__, __LINE__, ret); __FUNCTION__, __LINE__, ret);
@ -547,7 +547,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module,
return ret; return ret;
} }
opal_atomic_add_64(&module->opcount, 1); opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
@ -564,7 +564,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module,
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d PtlGet() failed: ret = %d", "%s,%d PtlGet() failed: ret = %d",
__FUNCTION__, __LINE__, ret)); __FUNCTION__, __LINE__, ret));
opal_atomic_add_64(&module->opcount, -1); opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
@ -716,7 +716,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module,
return ret; return ret;
} }
opal_atomic_add_64(&module->opcount, 1); opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
@ -735,7 +735,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module,
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d PtlPut() failed: ret = %d", "%s,%d PtlPut() failed: ret = %d",
__FUNCTION__, __LINE__, ret)); __FUNCTION__, __LINE__, ret));
opal_atomic_add_64(&module->opcount, -1); opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
@ -1252,7 +1252,7 @@ put_to_noncontig(int64_t *opcount,
/* determine how much to transfer in this operation */ /* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(opcount, 1); opal_atomic_add_fetch_64(opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu", "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1270,7 +1270,7 @@ put_to_noncontig(int64_t *opcount,
user_ptr, user_ptr,
0); 0);
if (OPAL_UNLIKELY(PTL_OK != ret)) { if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(opcount, -1); opal_atomic_add_fetch_64(opcount, -1);
return ret; return ret;
} }
@ -1361,7 +1361,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */ /* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1); opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu", "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1379,7 +1379,7 @@ atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
user_ptr, user_ptr,
0); 0);
if (OPAL_UNLIKELY(PTL_OK != ret)) { if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(&module->opcount, -1); opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
@ -1479,7 +1479,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */ /* determine how much to transfer in this operation */
atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max); atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max);
opal_atomic_add_64(&module->opcount, 1); opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu", "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1501,7 +1501,7 @@ atomic_to_noncontig(ompi_osc_portals4_module_t *module,
ptl_op, ptl_op,
ptl_dt); ptl_dt);
if (OPAL_UNLIKELY(PTL_OK != ret)) { if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(&module->opcount, -1); opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
@ -1586,7 +1586,7 @@ get_from_noncontig(int64_t *opcount,
/* determine how much to transfer in this operation */ /* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(opcount, 1); opal_atomic_add_fetch_64(opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu", "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1602,7 +1602,7 @@ get_from_noncontig(int64_t *opcount,
offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
user_ptr); user_ptr);
if (OPAL_UNLIKELY(PTL_OK != ret)) { if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(opcount, -1); opal_atomic_add_fetch_64(opcount, -1);
return ret; return ret;
} }
@ -1687,7 +1687,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */ /* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1); opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing rdma on contiguous region. local: %p, remote: %p, len: %lu", "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
@ -1703,7 +1703,7 @@ atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
user_ptr); user_ptr);
if (OPAL_UNLIKELY(PTL_OK != ret)) { if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_atomic_add_64(&module->opcount, -1); opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
@ -1817,7 +1817,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */ /* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1); opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
@ -1844,7 +1844,7 @@ swap_from_noncontig(ompi_osc_portals4_module_t *module,
opal_output_verbose(1, ompi_osc_base_framework.framework_output, opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlSwap failed with return value %d", "%s:%d PtlSwap failed with return value %d",
__FUNCTION__, __LINE__, ret); __FUNCTION__, __LINE__, ret);
opal_atomic_add_64(&module->opcount, -1); opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
@ -1969,7 +1969,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
/* determine how much to transfer in this operation */ /* determine how much to transfer in this operation */
rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
opal_atomic_add_64(&module->opcount, 1); opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
@ -1995,7 +1995,7 @@ fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
opal_output_verbose(1, ompi_osc_base_framework.framework_output, opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlFetchAtomic failed with return value %d", "%s:%d PtlFetchAtomic failed with return value %d",
__FUNCTION__, __LINE__, ret); __FUNCTION__, __LINE__, ret);
opal_atomic_add_64(&module->opcount, -1); opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
@ -2411,7 +2411,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
do { do {
size_t msg_length = MIN(module->atomic_max, length - sent); size_t msg_length = MIN(module->atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Atomic", __FUNCTION__, __LINE__)); "%s,%d Atomic", __FUNCTION__, __LINE__));
@ -2428,7 +2428,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
ptl_op, ptl_op,
ptl_dt); ptl_dt);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
(void)opal_atomic_add_64(&module->opcount, -1); (void)opal_atomic_add_fetch_64(&module->opcount, -1);
OMPI_OSC_PORTALS4_REQUEST_RETURN(request); OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret; return ret;
} }
@ -3149,7 +3149,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
do { do {
size_t msg_length = MIN(module->atomic_max, length - sent); size_t msg_length = MIN(module->atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Atomic", __FUNCTION__, __LINE__)); "%s,%d Atomic", __FUNCTION__, __LINE__));
@ -3166,7 +3166,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
ptl_op, ptl_op,
ptl_dt); ptl_dt);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
(void)opal_atomic_add_64(&module->opcount, -1); (void)opal_atomic_add_fetch_64(&module->opcount, -1);
return ret; return ret;
} }
sent += msg_length; sent += msg_length;
@ -3541,7 +3541,7 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
result_md_offset = (ptl_size_t) result_addr; result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr; origin_md_offset = (ptl_size_t) origin_addr;
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__)); "%s,%d Swap", __FUNCTION__, __LINE__));
@ -3613,7 +3613,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
result_md_offset = (ptl_size_t) result_addr; result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr; origin_md_offset = (ptl_size_t) origin_addr;
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__)); "%s,%d Swap", __FUNCTION__, __LINE__));
ret = PtlSwap(module->md_h, ret = PtlSwap(module->md_h,
@ -3635,7 +3635,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
md_offset = (ptl_size_t) result_addr; md_offset = (ptl_size_t) result_addr;
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Get", __FUNCTION__, __LINE__)); "%s,%d Get", __FUNCTION__, __LINE__));
ret = PtlGet(module->md_h, ret = PtlGet(module->md_h,
@ -3648,7 +3648,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
NULL); NULL);
} else { } else {
ptl_size_t result_md_offset, origin_md_offset; ptl_size_t result_md_offset, origin_md_offset;
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = ompi_osc_portals4_get_op(op, &ptl_op); ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {

Просмотреть файл

@ -230,8 +230,8 @@ process:
} }
req = (ompi_osc_portals4_request_t*) ev.user_ptr; req = (ompi_osc_portals4_request_t*) ev.user_ptr;
opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); opal_atomic_add_fetch_size_t(&req->super.req_status._ucount, ev.mlength);
ops = opal_atomic_add_32(&req->ops_committed, 1); ops = opal_atomic_add_fetch_32(&req->ops_committed, 1);
if (ops == req->ops_expected) { if (ops == req->ops_expected) {
ompi_request_complete(&req->super, true); ompi_request_complete(&req->super, true);
} }

Просмотреть файл

@ -43,7 +43,7 @@ lk_cas64(ompi_osc_portals4_module_t *module,
int ret; int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = PtlSwap(module->md_h, ret = PtlSwap(module->md_h,
(ptl_size_t) result_val, (ptl_size_t) result_val,
@ -76,7 +76,7 @@ lk_write64(ompi_osc_portals4_module_t *module,
int ret; int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = PtlPut(module->md_h, ret = PtlPut(module->md_h,
(ptl_size_t) &write_val, (ptl_size_t) &write_val,
@ -106,7 +106,7 @@ lk_add64(ompi_osc_portals4_module_t *module,
int ret; int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
(void)opal_atomic_add_64(&module->opcount, 1); (void)opal_atomic_add_fetch_64(&module->opcount, 1);
ret = PtlFetchAtomic(module->md_h, ret = PtlFetchAtomic(module->md_h,
(ptl_size_t) result_val, (ptl_size_t) result_val,

Просмотреть файл

@ -8,7 +8,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
@ -145,15 +145,11 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer
static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value) static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value)
{ {
int32_t peer_flags, new_flags; if (value) {
do { OPAL_ATOMIC_OR_FETCH32 (&peer->flags, flag);
peer_flags = peer->flags; } else {
if (value) { OPAL_ATOMIC_AND_FETCH32 (&peer->flags, ~flag);
new_flags = peer_flags | flag; }
} else {
new_flags = peer_flags & ~flag;
}
} while (!OPAL_ATOMIC_BOOL_CMPSET_32 (&peer->flags, peer_flags, new_flags));
} }
static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value) static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value)
@ -518,7 +514,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking active incoming complete. module %p, count = %d", "mark_incoming_completion marking active incoming complete. module %p, count = %d",
(void *) module, (int) module->active_incoming_frag_count + 1)); (void *) module, (int) module->active_incoming_frag_count + 1));
new_value = OPAL_THREAD_ADD32(&module->active_incoming_frag_count, 1); new_value = OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, 1);
if (new_value >= 0) { if (new_value >= 0) {
OPAL_THREAD_LOCK(&module->lock); OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast(&module->cond); opal_condition_broadcast(&module->cond);
@ -530,7 +526,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking passive incoming complete. module %p, source = %d, count = %d", "mark_incoming_completion marking passive incoming complete. module %p, source = %d, count = %d",
(void *) module, source, (int) peer->passive_incoming_frag_count + 1)); (void *) module, source, (int) peer->passive_incoming_frag_count + 1));
new_value = OPAL_THREAD_ADD32((int32_t *) &peer->passive_incoming_frag_count, 1); new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &peer->passive_incoming_frag_count, 1);
if (0 == new_value) { if (0 == new_value) {
OPAL_THREAD_LOCK(&module->lock); OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast(&module->cond); opal_condition_broadcast(&module->cond);
@ -554,7 +550,7 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in
*/ */
static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module) static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module)
{ {
int32_t new_value = OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, 1); int32_t new_value = OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, 1);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_outgoing_completion: outgoing_frag_count = %d", new_value)); "mark_outgoing_completion: outgoing_frag_count = %d", new_value));
if (new_value >= 0) { if (new_value >= 0) {
@ -578,12 +574,12 @@ static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module)
*/ */
static inline void ompi_osc_signal_outgoing (ompi_osc_pt2pt_module_t *module, int target, int count) static inline void ompi_osc_signal_outgoing (ompi_osc_pt2pt_module_t *module, int target, int count)
{ {
OPAL_THREAD_ADD32((int32_t *) &module->outgoing_frag_count, -count); OPAL_THREAD_ADD_FETCH32((int32_t *) &module->outgoing_frag_count, -count);
if (MPI_PROC_NULL != target) { if (MPI_PROC_NULL != target) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target, "ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target,
count, module->epoch_outgoing_frag_count[target] + count)); count, module->epoch_outgoing_frag_count[target] + count));
OPAL_THREAD_ADD32((int32_t *) (module->epoch_outgoing_frag_count + target), count); OPAL_THREAD_ADD_FETCH32((int32_t *) (module->epoch_outgoing_frag_count + target), count);
} }
} }
@ -721,7 +717,7 @@ static inline int get_tag(ompi_osc_pt2pt_module_t *module)
/* the LSB of the tag is used be the receiver to determine if the /* the LSB of the tag is used be the receiver to determine if the
message is a passive or active target (ie, where to mark message is a passive or active target (ie, where to mark
completion). */ completion). */
int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->tag_counter, 4); int32_t tmp = OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &module->tag_counter, 4);
return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch); return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch);
} }

Просмотреть файл

@ -183,7 +183,7 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
incoming_reqs)); incoming_reqs));
/* set our complete condition for incoming requests */ /* set our complete condition for incoming requests */
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -incoming_reqs); OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -incoming_reqs);
/* wait for completion */ /* wait for completion */
while (module->outgoing_frag_count < 0 || module->active_incoming_frag_count < 0) { while (module->outgoing_frag_count < 0 || module->active_incoming_frag_count < 0) {
@ -272,7 +272,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"found unexpected post from %d", "found unexpected post from %d",
peer->rank)); peer->rank));
OPAL_THREAD_ADD32 (&sync->sync_expected, -1); OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
ompi_osc_pt2pt_peer_set_unex (peer, false); ompi_osc_pt2pt_peer_set_unex (peer, false);
} }
} }
@ -574,12 +574,12 @@ void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, i
frag_count, module->active_incoming_frag_count, module->num_complete_msgs)); frag_count, module->active_incoming_frag_count, module->num_complete_msgs));
/* the current fragment is not part of the frag_count so we need to add it here */ /* the current fragment is not part of the frag_count so we need to add it here */
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -frag_count); OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -frag_count);
/* make sure the signal count is written before changing the complete message count */ /* make sure the signal count is written before changing the complete message count */
opal_atomic_wmb (); opal_atomic_wmb ();
if (0 == OPAL_THREAD_ADD32(&module->num_complete_msgs, 1)) { if (0 == OPAL_THREAD_ADD_FETCH32(&module->num_complete_msgs, 1)) {
OPAL_THREAD_LOCK(&module->lock); OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast (&module->cond); opal_condition_broadcast (&module->cond);
OPAL_THREAD_UNLOCK(&module->lock); OPAL_THREAD_UNLOCK(&module->lock);

Просмотреть файл

@ -62,7 +62,7 @@ static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request)
/* update the cbdata for ompi_osc_pt2pt_comm_complete */ /* update the cbdata for ompi_osc_pt2pt_comm_complete */
request->req_complete_cb_data = pt2pt_request->module; request->req_complete_cb_data = pt2pt_request->module;
if (0 == OPAL_THREAD_ADD32(&pt2pt_request->outstanding_requests, -1)) { if (0 == OPAL_THREAD_ADD_FETCH32(&pt2pt_request->outstanding_requests, -1)) {
ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR); ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR);
} }

Просмотреть файл

@ -667,7 +667,7 @@ static int accumulate_cb (ompi_request_t *request)
rank = acc_data->peer; rank = acc_data->peer;
} }
if (0 == OPAL_THREAD_ADD32(&acc_data->request_count, -1)) { if (0 == OPAL_THREAD_ADD_FETCH32(&acc_data->request_count, -1)) {
/* no more requests needed before the buffer can be accumulated */ /* no more requests needed before the buffer can be accumulated */
if (acc_data->source) { if (acc_data->source) {
@ -716,9 +716,9 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os
/* NTH: ensure we don't leave wait/process_flush/etc until this /* NTH: ensure we don't leave wait/process_flush/etc until this
* accumulate operation is complete. */ * accumulate operation is complete. */
if (active_target) { if (active_target) {
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, -1); OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -1);
} else { } else {
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
} }
pending_acc->active_target = active_target; pending_acc->active_target = active_target;
@ -1353,7 +1353,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source,
"process_flush header = {.frag_count = %d}", flush_header->frag_count)); "process_flush header = {.frag_count = %d}", flush_header->frag_count));
/* increase signal count by incoming frags */ /* increase signal count by incoming frags */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count); OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) flush_header->frag_count);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: process_flush: received message from %d. passive_incoming_frag_count = %d", "%d: process_flush: received message from %d. passive_incoming_frag_count = %d",
@ -1372,7 +1372,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source,
} }
/* signal incomming will increment this counter */ /* signal incomming will increment this counter */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
return sizeof (*flush_header); return sizeof (*flush_header);
} }
@ -1387,7 +1387,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source,
"process_unlock header = {.frag_count = %d}", unlock_header->frag_count)); "process_unlock header = {.frag_count = %d}", unlock_header->frag_count));
/* increase signal count by incoming frags */ /* increase signal count by incoming frags */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count); OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -(int32_t) unlock_header->frag_count);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: processing unlock request from %d. frag count = %d, processed_count = %d", "osc pt2pt: processing unlock request from %d. frag count = %d, processed_count = %d",
@ -1406,7 +1406,7 @@ static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source,
} }
/* signal incoming will increment this counter */ /* signal incoming will increment this counter */
OPAL_THREAD_ADD32(&peer->passive_incoming_frag_count, -1); OPAL_THREAD_ADD_FETCH32(&peer->passive_incoming_frag_count, -1);
return sizeof (*unlock_header); return sizeof (*unlock_header);
} }

Просмотреть файл

@ -1,7 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -105,8 +105,8 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, om
"osc pt2pt: flushing active fragment to target %d. pending: %d", "osc pt2pt: flushing active fragment to target %d. pending: %d",
active_frag->target, active_frag->pending)); active_frag->target, active_frag->pending));
if (opal_atomic_bool_cmpset (&peer->active_frag, active_frag, NULL)) { if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &active_frag, NULL)) {
if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) { if (0 != OPAL_THREAD_ADD_FETCH32(&active_frag->pending, -1)) {
/* communication going on while synchronizing; this is an rma usage bug */ /* communication going on while synchronizing; this is an rma usage bug */
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }

Просмотреть файл

@ -51,7 +51,7 @@ static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_frag_t* buffer) ompi_osc_pt2pt_frag_t* buffer)
{ {
opal_atomic_wmb (); opal_atomic_wmb ();
if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) { if (0 == OPAL_THREAD_ADD_FETCH32(&buffer->pending, -1)) {
opal_atomic_mb (); opal_atomic_mb ();
return ompi_osc_pt2pt_frag_start(module, buffer); return ompi_osc_pt2pt_frag_start(module, buffer);
} }
@ -67,7 +67,7 @@ static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (omp
/* to ensure ordering flush the buffer on the peer */ /* to ensure ordering flush the buffer on the peer */
curr = peer->active_frag; curr = peer->active_frag;
if (NULL != curr && opal_atomic_bool_cmpset (&peer->active_frag, curr, NULL)) { if (NULL != curr && opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, &curr, NULL)) {
/* If there's something pending, the pending finish will /* If there's something pending, the pending finish will
start the buffer. Otherwise, we need to start it now. */ start the buffer. Otherwise, we need to start it now. */
int ret = ompi_osc_pt2pt_frag_finish (module, curr); int ret = ompi_osc_pt2pt_frag_finish (module, curr);
@ -142,11 +142,11 @@ static inline int _ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, i
curr->pending_long_sends = long_send; curr->pending_long_sends = long_send;
peer->active_frag = curr; peer->active_frag = curr;
} else { } else {
OPAL_THREAD_ADD32(&curr->header->num_ops, 1); OPAL_THREAD_ADD_FETCH32(&curr->header->num_ops, 1);
curr->pending_long_sends += long_send; curr->pending_long_sends += long_send;
} }
OPAL_THREAD_ADD32(&curr->pending, 1); OPAL_THREAD_ADD_FETCH32(&curr->pending, 1);
} else { } else {
curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len); curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len);
if (OPAL_UNLIKELY(NULL == curr)) { if (OPAL_UNLIKELY(NULL == curr)) {

Просмотреть файл

@ -64,7 +64,7 @@ static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, omp
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock); acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock);
if (!acquired) { if (!acquired) {
@ -91,7 +91,7 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank); ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank);
int lock_type = lock->sync.lock.type; int lock_type = lock->sync.lock.type;
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -99,9 +99,9 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
"ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status)); "ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status));
if (MPI_LOCK_EXCLUSIVE == lock_type) { if (MPI_LOCK_EXCLUSIVE == lock_type) {
OPAL_THREAD_ADD32(&module->lock_status, 1); OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
ompi_osc_pt2pt_activate_next_lock (module); ompi_osc_pt2pt_activate_next_lock (module);
} else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) { } else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
ompi_osc_pt2pt_activate_next_lock (module); ompi_osc_pt2pt_activate_next_lock (module);
} }
@ -128,7 +128,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -145,7 +145,7 @@ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, omp
ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req)); ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_THREAD_ADD32(&lock->sync_expected, -1); OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, -1);
} else { } else {
ompi_osc_pt2pt_peer_set_locked (peer, true); ompi_osc_pt2pt_peer_set_locked (peer, true);
} }
@ -163,7 +163,7 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_header_unlock_t unlock_req; ompi_osc_pt2pt_header_unlock_t unlock_req;
int ret; int ret;
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -207,7 +207,7 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module,
int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1); int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1);
int ret; int ret;
(void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); (void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
@ -744,14 +744,13 @@ static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, in
break; break;
} }
if (opal_atomic_bool_cmpset_32 (&module->lock_status, lock_status, lock_status + 1)) { if (opal_atomic_compare_exchange_strong_32 (&module->lock_status, &lock_status, lock_status + 1)) {
break; break;
} }
lock_status = module->lock_status;
} while (1); } while (1);
} else { } else {
queue = !opal_atomic_bool_cmpset_32 (&module->lock_status, 0, -1); int32_t _tmp_value = 0;
queue = !opal_atomic_compare_exchange_strong_32 (&module->lock_status, &_tmp_value, -1);
} }
if (queue) { if (queue) {
@ -909,9 +908,9 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source,
} }
if (-1 == module->lock_status) { if (-1 == module->lock_status) {
OPAL_THREAD_ADD32(&module->lock_status, 1); OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
ompi_osc_pt2pt_activate_next_lock (module); ompi_osc_pt2pt_activate_next_lock (module);
} else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) { } else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
ompi_osc_pt2pt_activate_next_lock (module); ompi_osc_pt2pt_activate_next_lock (module);
} }

Просмотреть файл

@ -166,7 +166,7 @@ static inline void ompi_osc_pt2pt_sync_wait_expected (ompi_osc_pt2pt_sync_t *syn
static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync) static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync)
{ {
int32_t new_value = OPAL_THREAD_ADD32 (&sync->sync_expected, -1); int32_t new_value = OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
if (0 == new_value) { if (0 == new_value) {
OPAL_THREAD_LOCK(&sync->lock); OPAL_THREAD_LOCK(&sync->lock);
if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) { if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) {

Просмотреть файл

@ -516,7 +516,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
subreq->internal = true; subreq->internal = true;
subreq->parent_request = request; subreq->parent_request = request;
if (request) { if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
} }
if (result_datatype) { if (result_datatype) {
@ -557,7 +557,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
if (request) { if (request) {
/* release our reference so the request can complete */ /* release our reference so the request can complete */
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
} }
if (source_datatype) { if (source_datatype) {

Просмотреть файл

@ -8,7 +8,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
@ -285,7 +285,9 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result); ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result);
assert (OMPI_SUCCESS == ret); assert (OMPI_SUCCESS == ret);
} else { } else {
result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank); ompi_osc_rdma_lock_t _tmp_value = 0;
result = !ompi_osc_rdma_lock_compare_exchange ((osc_rdma_counter_t *) target, &_tmp_value, 1 + (osc_rdma_counter_t) my_rank);
} }
if (OPAL_LIKELY(0 == result)) { if (OPAL_LIKELY(0 == result)) {

Просмотреть файл

@ -217,7 +217,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
subreq->parent_request = request; subreq->parent_request = request;
if (request) { if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
} }
} else if (!alloc_reqs) { } else if (!alloc_reqs) {
subreq = request; subreq = request;
@ -232,7 +232,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) { if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) {
if (request) { if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
} }
if (alloc_reqs) { if (alloc_reqs) {
@ -266,7 +266,7 @@ static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *loc
ompi_osc_rdma_request_complete (request, OMPI_SUCCESS); ompi_osc_rdma_request_complete (request, OMPI_SUCCESS);
} }
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
} }
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)"); OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)");
@ -551,7 +551,7 @@ static int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p
/* increment the outstanding request counter in the request object */ /* increment the outstanding request counter in the request object */
if (request) { if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
cbcontext = (void *) ((intptr_t) request | 1); cbcontext = (void *) ((intptr_t) request | 1);
request->sync = sync; request->sync = sync;
} else { } else {
@ -643,12 +643,12 @@ static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_
subreq->internal = true; subreq->internal = true;
subreq->type = OMPI_OSC_RDMA_TYPE_RDMA; subreq->type = OMPI_OSC_RDMA_TYPE_RDMA;
subreq->parent_request = request; subreq->parent_request = request;
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1);
ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq); ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OMPI_OSC_RDMA_REQUEST_RETURN(subreq); OMPI_OSC_RDMA_REQUEST_RETURN(subreq);
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
} }
return ret; return ret;

Просмотреть файл

@ -35,7 +35,7 @@ static inline void ompi_osc_rdma_cleanup_rdma (ompi_osc_rdma_sync_t *sync, ompi_
} }
if (request) { if (request) {
(void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, -1);
} }
ompi_osc_rdma_sync_rdma_dec (sync); ompi_osc_rdma_sync_rdma_dec (sync);

Просмотреть файл

@ -37,7 +37,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_rdma_frag_t);
static inline void ompi_osc_rdma_frag_complete (ompi_osc_rdma_frag_t *frag) static inline void ompi_osc_rdma_frag_complete (ompi_osc_rdma_frag_t *frag)
{ {
if (0 == OPAL_THREAD_ADD32(&frag->pending, -1)) { if (0 == OPAL_THREAD_ADD_FETCH32(&frag->pending, -1)) {
opal_atomic_rmb (); opal_atomic_rmb ();
ompi_osc_rdma_deregister (frag->module, frag->handle); ompi_osc_rdma_deregister (frag->module, frag->handle);
@ -113,7 +113,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size
curr->top += request_len; curr->top += request_len;
curr->remain_len -= request_len; curr->remain_len -= request_len;
OPAL_THREAD_ADD32(&curr->pending, 1); OPAL_THREAD_ADD_FETCH32(&curr->pending, 1);
OPAL_THREAD_UNLOCK(&module->lock); OPAL_THREAD_UNLOCK(&module->lock);

Просмотреть файл

@ -17,7 +17,8 @@
static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock) static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock)
{ {
return !ompi_osc_rdma_lock_cmpset (lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); ompi_osc_rdma_lock_t _tmp_value = 0;
return !ompi_osc_rdma_lock_compare_exchange (lock, &_tmp_value, OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
} }
static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock) static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock)

Просмотреть файл

@ -201,14 +201,13 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer,
int32_t flags; int32_t flags;
opal_atomic_mb (); opal_atomic_mb ();
flags = peer->flags;
do { do {
flags = peer->flags;
if (flags & flag) { if (flags & flag) {
return false; return false;
} }
} while (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&peer->flags, &flags, flags | flag));
} while (!OPAL_THREAD_BOOL_CMPSET_32 (&peer->flags, flags, flags | flag));
return true; return true;
} }
@ -221,7 +220,7 @@ static inline bool ompi_osc_rdma_peer_test_set_flag (ompi_osc_rdma_peer_t *peer,
*/ */
static inline void ompi_osc_rdma_peer_clear_flag (ompi_osc_rdma_peer_t *peer, int flag) static inline void ompi_osc_rdma_peer_clear_flag (ompi_osc_rdma_peer_t *peer, int flag)
{ {
OPAL_ATOMIC_AND32(&peer->flags, ~flag); OPAL_ATOMIC_AND_FETCH32(&peer->flags, ~flag);
opal_atomic_mb (); opal_atomic_mb ();
} }

Просмотреть файл

@ -48,7 +48,7 @@ static int request_complete (struct ompi_request_t *request)
{ {
ompi_osc_rdma_request_t *parent_request = ((ompi_osc_rdma_request_t *) request)->parent_request; ompi_osc_rdma_request_t *parent_request = ((ompi_osc_rdma_request_t *) request)->parent_request;
if (parent_request && 0 == OPAL_THREAD_ADD32 (&parent_request->outstanding_requests, -1)) { if (parent_request && 0 == OPAL_THREAD_ADD_FETCH32 (&parent_request->outstanding_requests, -1)) {
ompi_osc_rdma_request_complete (parent_request, OMPI_SUCCESS); ompi_osc_rdma_request_complete (parent_request, OMPI_SUCCESS);
} }

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -25,7 +25,7 @@ typedef int64_t osc_rdma_base_t;
typedef int64_t osc_rdma_size_t; typedef int64_t osc_rdma_size_t;
typedef int64_t osc_rdma_counter_t; typedef int64_t osc_rdma_counter_t;
#define ompi_osc_rdma_counter_add opal_atomic_add_64 #define ompi_osc_rdma_counter_add opal_atomic_add_fetch_64
#else #else
@ -33,7 +33,7 @@ typedef int32_t osc_rdma_base_t;
typedef int32_t osc_rdma_size_t; typedef int32_t osc_rdma_size_t;
typedef int32_t osc_rdma_counter_t; typedef int32_t osc_rdma_counter_t;
#define ompi_osc_rdma_counter_add opal_atomic_add_32 #define ompi_osc_rdma_counter_add opal_atomic_add_fetch_32
#endif #endif
@ -48,18 +48,18 @@ static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value
int64_t new; int64_t new;
opal_atomic_mb (); opal_atomic_mb ();
new = opal_atomic_add_64 (p, value) - value; new = opal_atomic_add_fetch_64 (p, value) - value;
opal_atomic_mb (); opal_atomic_mb ();
return new; return new;
} }
static inline int ompi_osc_rdma_lock_cmpset (volatile int64_t *p, int64_t comp, int64_t value) static inline int ompi_osc_rdma_lock_compare_exchange (volatile int64_t *p, int64_t *comp, int64_t value)
{ {
int ret; int ret;
opal_atomic_mb (); opal_atomic_mb ();
ret = opal_atomic_bool_cmpset_64 (p, comp, value); ret = opal_atomic_compare_exchange_strong_64 (p, comp, value);
opal_atomic_mb (); opal_atomic_mb ();
return ret; return ret;
@ -76,19 +76,19 @@ static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value
int32_t new; int32_t new;
opal_atomic_mb (); opal_atomic_mb ();
/* opal_atomic_add_32 differs from normal atomics in that is returns the new value */ /* opal_atomic_add_fetch_32 differs from normal atomics in that is returns the new value */
new = opal_atomic_add_32 (p, value) - value; new = opal_atomic_add_fetch_32 (p, value) - value;
opal_atomic_mb (); opal_atomic_mb ();
return new; return new;
} }
static inline int ompi_osc_rdma_lock_cmpset (volatile int32_t *p, int32_t comp, int32_t value) static inline int ompi_osc_rdma_lock_compare_exchange (volatile int32_t *p, int32_t *comp, int32_t value)
{ {
int ret; int ret;
opal_atomic_mb (); opal_atomic_mb ();
ret = opal_atomic_bool_cmpset_32 (p, comp, value); ret = opal_atomic_compare_exchange_strong_32 (p, comp, value);
opal_atomic_mb (); opal_atomic_mb ();
return ret; return ret;

Просмотреть файл

@ -1,7 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2014-2017 The University of Tennessee and The University * Copyright (c) 2014-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
@ -130,10 +130,11 @@ ompi_osc_sm_start(struct ompi_group_t *group,
ompi_osc_sm_module_t *module = ompi_osc_sm_module_t *module =
(ompi_osc_sm_module_t*) win->w_osc_module; (ompi_osc_sm_module_t*) win->w_osc_module;
int my_rank = ompi_comm_rank (module->comm); int my_rank = ompi_comm_rank (module->comm);
void *_tmp_ptr = NULL;
OBJ_RETAIN(group); OBJ_RETAIN(group);
if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, NULL, group)) { if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) {
OBJ_RELEASE(group); OBJ_RELEASE(group);
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }
@ -150,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group,
for (int i = 0 ; i < size ; ++i) { for (int i = 0 ; i < size ; ++i) {
int rank_byte = ranks[i] >> OSC_SM_POST_BITS; int rank_byte = ranks[i] >> OSC_SM_POST_BITS;
osc_sm_post_type_t old, rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f); osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);
/* wait for rank to post */ /* wait for rank to post */
while (!(module->posts[my_rank][rank_byte] & rank_bit)) { while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
@ -160,9 +161,11 @@ ompi_osc_sm_start(struct ompi_group_t *group,
opal_atomic_rmb (); opal_atomic_rmb ();
do { #if OPAL_HAVE_ATOMIC_MATH_64
old = module->posts[my_rank][rank_byte]; (void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit);
} while (!opal_atomic_bool_cmpset ((volatile osc_sm_post_type_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit)); #else
(void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit);
#endif
} }
free (ranks); free (ranks);
@ -185,7 +188,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
opal_atomic_mb(); opal_atomic_mb();
group = module->start_group; group = module->start_group;
if (NULL == group || !OPAL_ATOMIC_BOOL_CMPSET_PTR(&module->start_group, group, NULL)) { if (NULL == group || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, &group, NULL)) {
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }
@ -198,7 +201,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
gsize = ompi_group_size(group); gsize = ompi_group_size(group);
for (int i = 0 ; i < gsize ; ++i) { for (int i = 0 ; i < gsize ; ++i) {
(void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1); (void) opal_atomic_add_fetch_32(&module->node_states[ranks[i]].complete_count, 1);
} }
free (ranks); free (ranks);
@ -244,7 +247,7 @@ ompi_osc_sm_post(struct ompi_group_t *group,
gsize = ompi_group_size(module->post_group); gsize = ompi_group_size(module->post_group);
for (int i = 0 ; i < gsize ; ++i) { for (int i = 0 ; i < gsize ; ++i) {
(void) opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit); opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
} }
opal_atomic_wmb (); opal_atomic_wmb ();

Просмотреть файл

@ -26,9 +26,9 @@ lk_fetch_add32(ompi_osc_sm_module_t *module,
size_t offset, size_t offset,
uint32_t delta) uint32_t delta)
{ {
/* opal_atomic_add_32 is an add then fetch so delta needs to be subtracted out to get the /* opal_atomic_add_fetch_32 is an add then fetch so delta needs to be subtracted out to get the
* old value */ * old value */
return opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), return opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
delta) - delta; delta) - delta;
} }
@ -39,7 +39,7 @@ lk_add32(ompi_osc_sm_module_t *module,
size_t offset, size_t offset,
uint32_t delta) uint32_t delta)
{ {
opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), opal_atomic_add_fetch_32((int32_t*) ((char*) &module->node_states[target].lock + offset),
delta); delta);
} }

Просмотреть файл

@ -81,7 +81,7 @@ int mca_pml_base_bsend_init(bool thread_safe)
{ {
size_t tmp; size_t tmp;
if(OPAL_THREAD_ADD32(&mca_pml_bsend_init, 1) > 1) if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init, 1) > 1)
return OMPI_SUCCESS; return OMPI_SUCCESS;
/* initialize static objects */ /* initialize static objects */
@ -109,7 +109,7 @@ int mca_pml_base_bsend_init(bool thread_safe)
*/ */
int mca_pml_base_bsend_fini(void) int mca_pml_base_bsend_fini(void)
{ {
if(OPAL_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0) if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init,-1) > 0)
return OMPI_SUCCESS; return OMPI_SUCCESS;
if(NULL != mca_pml_bsend_allocator) if(NULL != mca_pml_bsend_allocator)

Просмотреть файл

@ -261,7 +261,7 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
*/ */
#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \ #define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \
if (sendreq->req_state == -1) { \ if (sendreq->req_state == -1) { \
OPAL_THREAD_ADD32(&sendreq->req_state, 1); \ OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, 1); \
} }
/* Now check the error state. This request can be in error if the /* Now check the error state. This request can be in error if the

Просмотреть файл

@ -328,7 +328,7 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl,
* protocol has req_state == 0 and as such should not be * protocol has req_state == 0 and as such should not be
* decremented. * decremented.
*/ */
OPAL_THREAD_ADD32(&sendreq->req_state, -1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
} }
if(send_request_pml_complete_check(sendreq) == false) if(send_request_pml_complete_check(sendreq) == false)

Просмотреть файл

@ -206,7 +206,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
(void *) des->des_remote, (void *) des->des_remote,
des->des_remote_count, 0); des->des_remote_count, 0);
} }
OPAL_THREAD_SUB_SIZE_T(&recvreq->req_pipeline_depth, 1); OPAL_THREAD_SUB_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1);
#if PML_BFO #if PML_BFO
btl->btl_free(btl, des); btl->btl_free(btl, des);
@ -217,7 +217,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
#endif /* PML_BFO */ #endif /* PML_BFO */
/* check completion status */ /* check completion status */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
if(recv_request_pml_complete_check(recvreq) == false && if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) { recvreq->req_rdma_offset < recvreq->req_send_offset) {
/* schedule additional rdma operations */ /* schedule additional rdma operations */
@ -388,7 +388,7 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
#endif /* PML_BFO */ #endif /* PML_BFO */
/* is receive request complete */ /* is receive request complete */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
recv_request_pml_complete_check(recvreq); recv_request_pml_complete_check(recvreq);
MCA_PML_BFO_RDMA_FRAG_RETURN(frag); MCA_PML_BFO_RDMA_FRAG_RETURN(frag);
@ -506,7 +506,7 @@ void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq
recvreq->req_recv.req_base.req_datatype); recvreq->req_recv.req_base.req_datatype);
); );
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */ /* check completion status */
if(recv_request_pml_complete_check(recvreq) == false && if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) { recvreq->req_rdma_offset < recvreq->req_send_offset) {
@ -668,7 +668,7 @@ void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq
recvreq->req_recv.req_base.req_datatype); recvreq->req_recv.req_base.req_datatype);
); );
} }
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */ /* check completion status */
if(recv_request_pml_complete_check(recvreq) == false && if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) { recvreq->req_rdma_offset < recvreq->req_send_offset) {
@ -903,7 +903,7 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
#endif /* PML_BFO */ #endif /* PML_BFO */
/* update request state */ /* update request state */
recvreq->req_rdma_offset += size; recvreq->req_rdma_offset += size;
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1);
recvreq->req_rdma[rdma_idx].length -= size; recvreq->req_rdma[rdma_idx].length -= size;
bytes_remaining -= size; bytes_remaining -= size;
} else { } else {

Просмотреть файл

@ -70,12 +70,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t);
static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq) static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
{ {
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1;
} }
static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq) static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
{ {
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
} }
/** /**

Просмотреть файл

@ -207,10 +207,10 @@ mca_pml_bfo_rndv_completion_request( mca_bml_base_btl_t* bml_btl,
&(sendreq->req_send.req_base), PERUSE_SEND ); &(sendreq->req_send.req_base), PERUSE_SEND );
} }
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
/* advance the request */ /* advance the request */
OPAL_THREAD_ADD32(&sendreq->req_state, -1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
send_request_pml_complete_check(sendreq); send_request_pml_complete_check(sendreq);
@ -287,7 +287,7 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
(void *) des->des_local, (void *) des->des_local,
des->des_local_count, 0); des->des_local_count, 0);
if (OPAL_LIKELY(0 < req_bytes_delivered)) { if (OPAL_LIKELY(0 < req_bytes_delivered)) {
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
} }
send_request_pml_complete_check(sendreq); send_request_pml_complete_check(sendreq);
@ -360,8 +360,8 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
des->des_local_count, des->des_local_count,
sizeof(mca_pml_bfo_frag_hdr_t)); sizeof(mca_pml_bfo_frag_hdr_t));
OPAL_THREAD_SUB_SIZE_T(&sendreq->req_pipeline_depth, 1); OPAL_THREAD_SUB_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
#if PML_BFO #if PML_BFO
MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
@ -1164,7 +1164,7 @@ cannot_pack:
range->range_btls[btl_idx].length -= size; range->range_btls[btl_idx].length -= size;
range->range_send_length -= size; range->range_send_length -= size;
range->range_send_offset += size; range->range_send_offset += size;
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1);
if(range->range_send_length == 0) { if(range->range_send_length == 0) {
range = get_next_send_range(sendreq, range); range = get_next_send_range(sendreq, range);
prev_bytes_remaining = 0; prev_bytes_remaining = 0;
@ -1226,7 +1226,7 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
#endif /* PML_BFO */ #endif /* PML_BFO */
/* check for request completion */ /* check for request completion */
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
send_request_pml_complete_check(sendreq); send_request_pml_complete_check(sendreq);
@ -1335,7 +1335,7 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
size_t i, size = 0; size_t i, size = 0;
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) { if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
OPAL_THREAD_ADD32(&sendreq->req_state, -1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
} }
#if PML_BFO #if PML_BFO
MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq); MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq);

Просмотреть файл

@ -78,12 +78,12 @@ OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t);
static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq) static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq)
{ {
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1;
} }
static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq) static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq)
{ {
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0;
} }
static inline void static inline void
@ -445,7 +445,7 @@ mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq )
sendreq->req_pipeline_depth = 0; sendreq->req_pipeline_depth = 0;
sendreq->req_bytes_delivered = 0; sendreq->req_bytes_delivered = 0;
sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD_FETCH32(
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
#if PML_BFO #if PML_BFO
sendreq->req_restartseq = 0; /* counts up restarts */ sendreq->req_restartseq = 0; /* counts up restarts */

Просмотреть файл

@ -151,7 +151,7 @@ int mca_pml_ob1_isend(const void *buf,
} }
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) {
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
} }
if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
@ -220,7 +220,7 @@ int mca_pml_ob1_send(const void *buf,
} }
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) {
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); seqn = (uint16_t) OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
} }
/** /**

Просмотреть файл

@ -56,7 +56,7 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void)
static int mca_pml_ob1_progress_needed = 0; static int mca_pml_ob1_progress_needed = 0;
int mca_pml_ob1_enable_progress(int32_t count) int mca_pml_ob1_enable_progress(int32_t count)
{ {
int32_t progress_count = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count); int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count);
if( 1 < progress_count ) if( 1 < progress_count )
return 0; /* progress was already on */ return 0; /* progress was already on */
@ -119,7 +119,7 @@ int mca_pml_ob1_progress(void)
} }
if( 0 != completed_requests ) { if( 0 != completed_requests ) {
j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests); j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests);
if( 0 == j ) { if( 0 == j ) {
opal_progress_unregister(mca_pml_ob1_progress); opal_progress_unregister(mca_pml_ob1_progress);
} }

Просмотреть файл

@ -445,7 +445,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
* protocol has req_state == 0 and as such should not be * protocol has req_state == 0 and as such should not be
* decremented. * decremented.
*/ */
OPAL_THREAD_ADD32(&sendreq->req_state, -1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
} }
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */ #if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */

Просмотреть файл

@ -190,7 +190,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req; mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
mca_bml_base_btl_t *bml_btl = frag->rdma_bml; mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, -1); OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, -1);
assert ((uint64_t) rdma_size == frag->rdma_length); assert ((uint64_t) rdma_size == frag->rdma_length);
MCA_PML_OB1_RDMA_FRAG_RETURN(frag); MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
@ -198,7 +198,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
if (OPAL_LIKELY(0 < rdma_size)) { if (OPAL_LIKELY(0 < rdma_size)) {
/* check completion status */ /* check completion status */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, rdma_size);
if (recv_request_pml_complete_check(recvreq) == false && if (recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) { recvreq->req_rdma_offset < recvreq->req_send_offset) {
/* schedule additional rdma operations */ /* schedule additional rdma operations */
@ -373,7 +373,7 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_
} }
} else { } else {
/* is receive request complete */ /* is receive request complete */
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
/* TODO: re-add order */ /* TODO: re-add order */
mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc, mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc,
bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag, bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag,
@ -524,7 +524,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
recvreq->req_recv.req_base.req_datatype); recvreq->req_recv.req_base.req_datatype);
); );
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */ /* check completion status */
if(recv_request_pml_complete_check(recvreq) == false && if(recv_request_pml_complete_check(recvreq) == false &&
recvreq->req_rdma_offset < recvreq->req_send_offset) { recvreq->req_rdma_offset < recvreq->req_send_offset) {
@ -601,7 +601,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
* known that the data has been copied out of the descriptor. */ * known that the data has been copied out of the descriptor. */
des->des_cbfunc(NULL, NULL, des, 0); des->des_cbfunc(NULL, NULL, des, 0);
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
/* check completion status */ /* check completion status */
if(recv_request_pml_complete_check(recvreq) == false && if(recv_request_pml_complete_check(recvreq) == false &&
@ -815,7 +815,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq
recvreq->req_recv.req_base.req_count, recvreq->req_recv.req_base.req_count,
recvreq->req_recv.req_base.req_datatype); recvreq->req_recv.req_base.req_datatype);
); );
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received);
} }
/* check completion status */ /* check completion status */
if(recv_request_pml_complete_check(recvreq) == false && if(recv_request_pml_complete_check(recvreq) == false &&
@ -1024,7 +1024,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) { if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
/* update request state */ /* update request state */
recvreq->req_rdma_offset += size; recvreq->req_rdma_offset += size;
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, 1); OPAL_THREAD_ADD_FETCH32(&recvreq->req_pipeline_depth, 1);
recvreq->req_rdma[rdma_idx].length -= size; recvreq->req_rdma[rdma_idx].length -= size;
bytes_remaining -= size; bytes_remaining -= size;
} else { } else {

Просмотреть файл

@ -64,12 +64,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t);
static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq) static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
{ {
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1;
} }
static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq) static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
{ {
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
} }
/** /**

Просмотреть файл

@ -205,10 +205,10 @@ mca_pml_ob1_rndv_completion_request( mca_bml_base_btl_t* bml_btl,
&(sendreq->req_send.req_base), PERUSE_SEND ); &(sendreq->req_send.req_base), PERUSE_SEND );
} }
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
/* advance the request */ /* advance the request */
OPAL_THREAD_ADD32(&sendreq->req_state, -1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
send_request_pml_complete_check(sendreq); send_request_pml_complete_check(sendreq);
@ -261,7 +261,7 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length)
/* count bytes of user data actually delivered and check for request completion */ /* count bytes of user data actually delivered and check for request completion */
if (OPAL_LIKELY(0 < rdma_length)) { if (OPAL_LIKELY(0 < rdma_length)) {
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
} }
send_request_pml_complete_check(sendreq); send_request_pml_complete_check(sendreq);
@ -313,8 +313,8 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
des->des_segment_count, des->des_segment_count,
sizeof(mca_pml_ob1_frag_hdr_t)); sizeof(mca_pml_ob1_frag_hdr_t));
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, -1);
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
if(send_request_pml_complete_check(sendreq) == false) { if(send_request_pml_complete_check(sendreq) == false) {
mca_pml_ob1_send_request_schedule(sendreq); mca_pml_ob1_send_request_schedule(sendreq);
@ -1044,7 +1044,7 @@ cannot_pack:
range->range_btls[btl_idx].length -= size; range->range_btls[btl_idx].length -= size;
range->range_send_length -= size; range->range_send_length -= size;
range->range_send_offset += size; range->range_send_offset += size;
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1);
if(range->range_send_length == 0) { if(range->range_send_length == 0) {
range = get_next_send_range(sendreq, range); range = get_next_send_range(sendreq, range);
prev_bytes_remaining = 0; prev_bytes_remaining = 0;
@ -1060,7 +1060,7 @@ cannot_pack:
range->range_btls[btl_idx].length -= size; range->range_btls[btl_idx].length -= size;
range->range_send_length -= size; range->range_send_length -= size;
range->range_send_offset += size; range->range_send_offset += size;
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_pipeline_depth, 1);
if(range->range_send_length == 0) { if(range->range_send_length == 0) {
range = get_next_send_range(sendreq, range); range = get_next_send_range(sendreq, range);
prev_bytes_remaining = 0; prev_bytes_remaining = 0;
@ -1126,7 +1126,7 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b
0, 0); 0, 0);
/* check for request completion */ /* check for request completion */
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
send_request_pml_complete_check(sendreq); send_request_pml_complete_check(sendreq);
} else { } else {
@ -1200,7 +1200,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_rdma_frag_t* frag; mca_pml_ob1_rdma_frag_t* frag;
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) { if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) {
OPAL_THREAD_ADD32(&sendreq->req_state, -1); OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
} }
sendreq->req_recv.pval = hdr->hdr_recv_req.pval; sendreq->req_recv.pval = hdr->hdr_recv_req.pval;

Просмотреть файл

@ -76,12 +76,12 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_range_t);
static inline bool lock_send_request(mca_pml_ob1_send_request_t *sendreq) static inline bool lock_send_request(mca_pml_ob1_send_request_t *sendreq)
{ {
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1;
} }
static inline bool unlock_send_request(mca_pml_ob1_send_request_t *sendreq) static inline bool unlock_send_request(mca_pml_ob1_send_request_t *sendreq)
{ {
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0;
} }
static inline void static inline void
@ -485,7 +485,7 @@ mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
return OMPI_ERR_UNREACH; return OMPI_ERR_UNREACH;
} }
seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); seqn = OPAL_THREAD_ADD_FETCH32(&ob1_proc->send_sequence, 1);
return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn); return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn);
} }

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
@ -100,6 +100,8 @@ int ompi_request_default_wait_any(size_t count,
num_requests_null_inactive = 0; num_requests_null_inactive = 0;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
void *_tmp_ptr = REQUEST_PENDING;
request = requests[i]; request = requests[i];
/* Check for null or completed persistent request. For /* Check for null or completed persistent request. For
@ -110,7 +112,7 @@ int ompi_request_default_wait_any(size_t count,
continue; continue;
} }
if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync) ) {
assert(REQUEST_COMPLETE(request)); assert(REQUEST_COMPLETE(request));
completed = i; completed = i;
*index = i; *index = i;
@ -136,6 +138,8 @@ int ompi_request_default_wait_any(size_t count,
* user. * user.
*/ */
for(i = completed-1; (i+1) > 0; i--) { for(i = completed-1; (i+1) > 0; i--) {
void *tmp_ptr = &sync;
request = requests[i]; request = requests[i];
if( request->req_state == OMPI_REQUEST_INACTIVE ) { if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -146,7 +150,7 @@ int ompi_request_default_wait_any(size_t count,
* Otherwise, the request has been completed meanwhile, and it * Otherwise, the request has been completed meanwhile, and it
* has been atomically marked as REQUEST_COMPLETE. * has been atomically marked as REQUEST_COMPLETE.
*/ */
if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &tmp_ptr, REQUEST_PENDING) ) {
*index = i; *index = i;
} }
} }
@ -211,6 +215,8 @@ int ompi_request_default_wait_all( size_t count,
WAIT_SYNC_INIT(&sync, count); WAIT_SYNC_INIT(&sync, count);
rptr = requests; rptr = requests;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
void *_tmp_ptr = REQUEST_PENDING;
request = *rptr++; request = *rptr++;
if( request->req_state == OMPI_REQUEST_INACTIVE ) { if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -218,7 +224,7 @@ int ompi_request_default_wait_all( size_t count,
continue; continue;
} }
if (!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) { if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync)) {
if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) { if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) {
failed++; failed++;
} }
@ -246,6 +252,8 @@ int ompi_request_default_wait_all( size_t count,
if (MPI_STATUSES_IGNORE != statuses) { if (MPI_STATUSES_IGNORE != statuses) {
/* fill out status and free request if required */ /* fill out status and free request if required */
for( i = 0; i < count; i++, rptr++ ) { for( i = 0; i < count; i++, rptr++ ) {
void *_tmp_ptr = &sync;
request = *rptr; request = *rptr;
if( request->req_state == OMPI_REQUEST_INACTIVE ) { if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -260,7 +268,7 @@ int ompi_request_default_wait_all( size_t count,
* mark the request as pending then it is neither failed nor complete, and * mark the request as pending then it is neither failed nor complete, and
* we must stop altering it. * we must stop altering it.
*/ */
if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) {
/* /*
* Per MPI 2.2 p 60: * Per MPI 2.2 p 60:
* Allows requests to be marked as MPI_ERR_PENDING if they are * Allows requests to be marked as MPI_ERR_PENDING if they are
@ -306,6 +314,8 @@ int ompi_request_default_wait_all( size_t count,
int rc; int rc;
/* free request if required */ /* free request if required */
for( i = 0; i < count; i++, rptr++ ) { for( i = 0; i < count; i++, rptr++ ) {
void *_tmp_ptr = &sync;
request = *rptr; request = *rptr;
if( request->req_state == OMPI_REQUEST_INACTIVE ) { if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -320,7 +330,7 @@ int ompi_request_default_wait_all( size_t count,
/* If the request is still pending due to a failed request /* If the request is still pending due to a failed request
* then skip it in this loop. * then skip it in this loop.
*/ */
if( OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { if( OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING ) ) {
/* /*
* Per MPI 2.2 p 60: * Per MPI 2.2 p 60:
* Allows requests to be marked as MPI_ERR_PENDING if they are * Allows requests to be marked as MPI_ERR_PENDING if they are
@ -398,6 +408,8 @@ int ompi_request_default_wait_some(size_t count,
num_requests_null_inactive = 0; num_requests_null_inactive = 0;
num_requests_done = 0; num_requests_done = 0;
for (size_t i = 0; i < count; i++, rptr++) { for (size_t i = 0; i < count; i++, rptr++) {
void *_tmp_ptr = REQUEST_PENDING;
request = *rptr; request = *rptr;
/* /*
* Check for null or completed persistent request. * Check for null or completed persistent request.
@ -407,7 +419,7 @@ int ompi_request_default_wait_some(size_t count,
num_requests_null_inactive++; num_requests_null_inactive++;
continue; continue;
} }
indices[i] = OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync); indices[i] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync);
if( !indices[i] ) { if( !indices[i] ) {
/* If the request is completed go ahead and mark it as such */ /* If the request is completed go ahead and mark it as such */
assert( REQUEST_COMPLETE(request) ); assert( REQUEST_COMPLETE(request) );
@ -434,6 +446,8 @@ int ompi_request_default_wait_some(size_t count,
rptr = requests; rptr = requests;
num_requests_done = 0; num_requests_done = 0;
for (size_t i = 0; i < count; i++, rptr++) { for (size_t i = 0; i < count; i++, rptr++) {
void *_tmp_ptr = &sync;
request = *rptr; request = *rptr;
if( request->req_state == OMPI_REQUEST_INACTIVE ) { if( request->req_state == OMPI_REQUEST_INACTIVE ) {
@ -454,7 +468,7 @@ int ompi_request_default_wait_some(size_t count,
*/ */
if( !indices[i] ){ if( !indices[i] ){
indices[num_requests_done++] = i; indices[num_requests_done++] = i;
} else if( !OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { } else if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING) ) {
indices[num_requests_done++] = i; indices[num_requests_done++] = i;
} }
} }

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -396,10 +396,12 @@ static inline int ompi_request_free(ompi_request_t** request)
static inline void ompi_request_wait_completion(ompi_request_t *req) static inline void ompi_request_wait_completion(ompi_request_t *req)
{ {
if (opal_using_threads () && !REQUEST_COMPLETE(req)) { if (opal_using_threads () && !REQUEST_COMPLETE(req)) {
void *_tmp_ptr = REQUEST_PENDING;
ompi_wait_sync_t sync; ompi_wait_sync_t sync;
WAIT_SYNC_INIT(&sync, 1); WAIT_SYNC_INIT(&sync, 1);
if (OPAL_ATOMIC_BOOL_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) { if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) {
SYNC_WAIT(&sync); SYNC_WAIT(&sync);
} else { } else {
/* completed before we had a chance to swap in the sync object */ /* completed before we had a chance to swap in the sync object */
@ -439,7 +441,9 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa
if (0 == rc) { if (0 == rc) {
if( OPAL_LIKELY(with_signal) ) { if( OPAL_LIKELY(with_signal) ) {
if(!OPAL_ATOMIC_BOOL_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) { void *_tmp_ptr = REQUEST_PENDING;
if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) {
ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete,
REQUEST_COMPLETED); REQUEST_COMPLETED);
/* In the case where another thread concurrently changed the request to REQUEST_PENDING */ /* In the case where another thread concurrently changed the request to REQUEST_PENDING */

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reseved. * reseved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -76,7 +76,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost; return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost;
} }
#if OPAL_HAVE_ATOMIC_CMPSET_128 #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
/* Add one element to the FIFO. We will return the last head of the list /* Add one element to the FIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the * to allow the upper level to detect if this element is the first one in the
@ -85,14 +85,12 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
opal_list_item_t *item) opal_list_item_t *item)
{ {
opal_counted_pointer_t tail; opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value};
item->opal_list_next = &fifo->opal_fifo_ghost; item->opal_list_next = &fifo->opal_fifo_ghost;
do { do {
tail.value = fifo->opal_fifo_tail.value; if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) {
if (opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, item)) {
break; break;
} }
} while (1); } while (1);
@ -102,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
if (&fifo->opal_fifo_ghost == tail.data.item) { if (&fifo->opal_fifo_ghost == tail.data.item) {
/* update the head */ /* update the head */
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}; opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value};
opal_update_counted_pointer (&fifo->opal_fifo_head, head, item); opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item);
} else { } else {
/* update previous item */ /* update previous item */
tail.data.item->opal_list_next = item; tail.data.item->opal_list_next = item;
@ -116,29 +114,28 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
*/ */
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{ {
opal_list_item_t *item, *next; opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
opal_counted_pointer_t head, tail; opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail;
do { do {
head.value = fifo->opal_fifo_head.value;
tail.value = fifo->opal_fifo_tail.value; tail.value = fifo->opal_fifo_tail.value;
opal_atomic_rmb (); opal_atomic_rmb ();
item = (opal_list_item_t *) head.data.item; item = (opal_list_item_t *) head.data.item;
next = (opal_list_item_t *) item->opal_list_next; next = (opal_list_item_t *) item->opal_list_next;
if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) { if (ghost == tail.data.item && ghost == item) {
return NULL; return NULL;
} }
/* the head or next pointer are in an inconsistent state. keep looping. */ /* the head or next pointer are in an inconsistent state. keep looping. */
if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item && if (tail.data.item != item && ghost != tail.data.item && ghost == next) {
&fifo->opal_fifo_ghost == next) { head.value = fifo->opal_fifo_head.value;
continue; continue;
} }
/* try popping the head */ /* try popping the head */
if (opal_update_counted_pointer (&fifo->opal_fifo_head, head, next)) { if (opal_update_counted_pointer (&fifo->opal_fifo_head, &head, next)) {
break; break;
} }
} while (1); } while (1);
@ -146,14 +143,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
opal_atomic_wmb (); opal_atomic_wmb ();
/* check for tail and head consistency */ /* check for tail and head consistency */
if (&fifo->opal_fifo_ghost == next) { if (ghost == next) {
/* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */ /* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */
if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) { if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, ghost)) {
/* tail was changed by a push operation. wait for the item's next pointer to be se then /* tail was changed by a push operation. wait for the item's next pointer to be se then
* update the head */ * update the head */
/* wait for next pointer to be updated by push */ /* wait for next pointer to be updated by push */
while (&fifo->opal_fifo_ghost == item->opal_list_next) { while (ghost == item->opal_list_next) {
opal_atomic_rmb (); opal_atomic_rmb ();
} }
@ -166,7 +163,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
head.value = fifo->opal_fifo_head.value; head.value = fifo->opal_fifo_head.value;
next = (opal_list_item_t *) item->opal_list_next; next = (opal_list_item_t *) item->opal_list_next;
assert (&fifo->opal_fifo_ghost == head.data.item); assert (ghost == head.data.item);
fifo->opal_fifo_head.data.item = next; fifo->opal_fifo_head.data.item = next;
opal_atomic_wmb (); opal_atomic_wmb ();
@ -215,14 +212,14 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
*/ */
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{ {
opal_list_item_t *item, *next; opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
#if OPAL_HAVE_ATOMIC_LLSC_PTR #if OPAL_HAVE_ATOMIC_LLSC_PTR
/* use load-linked store-conditional to avoid ABA issues */ /* use load-linked store-conditional to avoid ABA issues */
do { do {
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item); item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
if (&fifo->opal_fifo_ghost == item) { if (ghost == item) {
if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) { if (ghost == fifo->opal_fifo_tail.data.item) {
return NULL; return NULL;
} }
@ -239,7 +236,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
#else #else
/* protect against ABA issues by "locking" the head */ /* protect against ABA issues by "locking" the head */
do { do {
if (opal_atomic_bool_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) { if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) {
break; break;
} }
@ -249,7 +246,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
opal_atomic_wmb(); opal_atomic_wmb();
item = opal_fifo_head (fifo); item = opal_fifo_head (fifo);
if (&fifo->opal_fifo_ghost == item) { if (ghost == item) {
fifo->opal_fifo_head.data.counter = 0; fifo->opal_fifo_head.data.counter = 0;
return NULL; return NULL;
} }
@ -258,9 +255,11 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
fifo->opal_fifo_head.data.item = next; fifo->opal_fifo_head.data.item = next;
#endif #endif
if (&fifo->opal_fifo_ghost == next) { if (ghost == next) {
if (!opal_atomic_bool_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) { void *tmp = item;
while (&fifo->opal_fifo_ghost == item->opal_list_next) {
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) {
while (ghost == item->opal_list_next) {
opal_atomic_rmb (); opal_atomic_rmb ();
} }

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reseved. * reseved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -36,8 +36,8 @@
BEGIN_C_DECLS BEGIN_C_DECLS
/* NTH: temporarily suppress warnings about this not being defined */ /* NTH: temporarily suppress warnings about this not being defined */
#if !defined(OPAL_HAVE_ATOMIC_CMPSET_128) #if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128)
#define OPAL_HAVE_ATOMIC_CMPSET_128 0 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0
#endif #endif
/** /**
@ -50,7 +50,7 @@ union opal_counted_pointer_t {
/** list item pointer */ /** list item pointer */
volatile opal_list_item_t * volatile item; volatile opal_list_item_t * volatile item;
} data; } data;
#if OPAL_HAVE_ATOMIC_CMPSET_128 && HAVE_OPAL_INT128_T #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T
/** used for atomics when there is a cmpset that can operate on /** used for atomics when there is a cmpset that can operate on
* two 64-bit values */ * two 64-bit values */
opal_int128_t value; opal_int128_t value;
@ -59,19 +59,19 @@ union opal_counted_pointer_t {
typedef union opal_counted_pointer_t opal_counted_pointer_t; typedef union opal_counted_pointer_t opal_counted_pointer_t;
#if OPAL_HAVE_ATOMIC_CMPSET_128 #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
/* Add one element to the FIFO. We will return the last head of the list /* Add one element to the FIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the * to allow the upper level to detect if this element is the first one in the
* list (if the list was empty before this operation). * list (if the list was empty before this operation).
*/ */
static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t old, static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *old,
opal_list_item_t *item) opal_list_item_t *item)
{ {
opal_counted_pointer_t new_p; opal_counted_pointer_t new_p;
new_p.data.item = item; new_p.data.item = item;
new_p.data.counter = old.data.counter + 1; new_p.data.counter = old->data.counter + 1;
return opal_atomic_bool_cmpset_128 (&addr->value, old.value, new_p.value); return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value);
} }
#endif #endif
@ -110,7 +110,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
} }
#if OPAL_HAVE_ATOMIC_CMPSET_128 #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
/* Add one element to the LIFO. We will return the last head of the list /* Add one element to the LIFO. We will return the last head of the list
* to allow the upper level to detect if this element is the first one in the * to allow the upper level to detect if this element is the first one in the
@ -119,14 +119,14 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
opal_list_item_t *item) opal_list_item_t *item)
{ {
do { opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
do {
item->opal_list_next = next; item->opal_list_next = next;
opal_atomic_wmb (); opal_atomic_wmb ();
/* to protect against ABA issues it is sufficient to only update the counter in pop */ /* to protect against ABA issues it is sufficient to only update the counter in pop */
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) {
return next; return next;
} }
/* DO some kind of pause to release the bus */ /* DO some kind of pause to release the bus */
@ -141,17 +141,17 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
opal_counted_pointer_t old_head; opal_counted_pointer_t old_head;
opal_list_item_t *item; opal_list_item_t *item;
old_head.data.counter = lifo->opal_lifo_head.data.counter;
opal_atomic_rmb ();
old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
do { do {
item = (opal_list_item_t *) old_head.data.item;
old_head.data.counter = lifo->opal_lifo_head.data.counter;
opal_atomic_rmb ();
old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item;
if (item == &lifo->opal_lifo_ghost) { if (item == &lifo->opal_lifo_ghost) {
return NULL; return NULL;
} }
if (opal_update_counted_pointer (&lifo->opal_lifo_head, old_head, if (opal_update_counted_pointer (&lifo->opal_lifo_head, &old_head,
(opal_list_item_t *) item->opal_list_next)) { (opal_list_item_t *) item->opal_list_next)) {
opal_atomic_wmb (); opal_atomic_wmb ();
item->opal_list_next = NULL; item->opal_list_next = NULL;
@ -169,13 +169,15 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
opal_list_item_t *item) opal_list_item_t *item)
{ {
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
/* item free acts as a mini lock to avoid ABA problems */ /* item free acts as a mini lock to avoid ABA problems */
item->item_free = 1; item->item_free = 1;
do { do {
opal_list_item_t *next = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
item->opal_list_next = next; item->opal_list_next = next;
opal_atomic_wmb(); opal_atomic_wmb();
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, next, item)) { if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &next, item)) {
opal_atomic_wmb (); opal_atomic_wmb ();
/* now safe to pop this item */ /* now safe to pop this item */
item->item_free = 0; item->item_free = 0;
@ -236,8 +238,11 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
*/ */
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
{ {
opal_list_item_t *item; opal_list_item_t *item, *head, *ghost = &lifo->opal_lifo_ghost;
while ((item = (opal_list_item_t *) lifo->opal_lifo_head.data.item) != &lifo->opal_lifo_ghost) {
item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
while (item != ghost) {
/* ensure it is safe to pop the head */ /* ensure it is safe to pop the head */
if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) { if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) {
continue; continue;
@ -245,14 +250,16 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
opal_atomic_wmb (); opal_atomic_wmb ();
head = item;
/* try to swap out the head pointer */ /* try to swap out the head pointer */
if (opal_atomic_bool_cmpset_ptr (&lifo->opal_lifo_head.data.item, item, if (opal_atomic_compare_exchange_strong_ptr (&lifo->opal_lifo_head.data.item, &head,
(void *) item->opal_list_next)) { (void *) item->opal_list_next)) {
break; break;
} }
/* NTH: don't need another atomic here */ /* NTH: don't need another atomic here */
item->item_free = 0; item->item_free = 0;
item = head;
/* Do some kind of pause to release the bus */ /* Do some kind of pause to release the bus */
} }

Просмотреть файл

@ -144,7 +144,7 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx)
/* Spot check: ensure this item is only on the list that we /* Spot check: ensure this item is only on the list that we
just insertted it into */ just insertted it into */
(void)opal_atomic_add( &(item->opal_list_item_refcount), 1 ); opal_atomic_add ( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount); assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list; item->opal_list_item_belong_to = list;
#endif #endif

Просмотреть файл

@ -509,7 +509,7 @@ static inline opal_list_item_t *opal_list_remove_item
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
/* Spot check: ensure that this item is still only on one list */ /* Spot check: ensure that this item is still only on one list */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
assert(0 == item->opal_list_item_refcount); assert(0 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = NULL; item->opal_list_item_belong_to = NULL;
#endif #endif
@ -575,7 +575,7 @@ static inline void _opal_list_append(opal_list_t *list, opal_list_item_t *item
/* Spot check: ensure this item is only on the list that we just /* Spot check: ensure this item is only on the list that we just
appended it to */ appended it to */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount); assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list; item->opal_list_item_belong_to = list;
#endif #endif
@ -625,7 +625,7 @@ static inline void opal_list_prepend(opal_list_t *list,
/* Spot check: ensure this item is only on the list that we just /* Spot check: ensure this item is only on the list that we just
prepended it to */ prepended it to */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount); assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list; item->opal_list_item_belong_to = list;
#endif #endif
@ -686,7 +686,7 @@ static inline opal_list_item_t *opal_list_remove_first(opal_list_t *list)
/* Spot check: ensure that the item we're returning is now on no /* Spot check: ensure that the item we're returning is now on no
lists */ lists */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
assert(0 == item->opal_list_item_refcount); assert(0 == item->opal_list_item_refcount);
#endif #endif
@ -746,7 +746,7 @@ static inline opal_list_item_t *opal_list_remove_last(opal_list_t *list)
/* Spot check: ensure that the item we're returning is now on no /* Spot check: ensure that the item we're returning is now on no
lists */ lists */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), -1 ); OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), -1 );
assert(0 == item->opal_list_item_refcount); assert(0 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = NULL; item->opal_list_item_belong_to = NULL;
#endif #endif
@ -789,7 +789,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos
/* Spot check: double check that this item is only on the list /* Spot check: double check that this item is only on the list
that we just added it to */ that we just added it to */
OPAL_THREAD_ADD32( &(item->opal_list_item_refcount), 1 ); OPAL_THREAD_ADD_FETCH32( &(item->opal_list_item_refcount), 1 );
assert(1 == item->opal_list_item_refcount); assert(1 == item->opal_list_item_refcount);
item->opal_list_item_belong_to = list; item->opal_list_item_belong_to = list;
#endif #endif

Просмотреть файл

@ -510,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__; static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
static inline int opal_obj_update(opal_object_t *object, int inc) static inline int opal_obj_update(opal_object_t *object, int inc)
{ {
return OPAL_THREAD_ADD32(&object->obj_reference_count, inc); return OPAL_THREAD_ADD_FETCH32(&object->obj_reference_count, inc);
} }
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -210,7 +210,7 @@ void opal_tree_add_child(opal_tree_item_t *parent_item,
/* Spot check: ensure this item is only on the list that we just /* Spot check: ensure this item is only on the list that we just
appended it to */ appended it to */
OPAL_THREAD_ADD32( &(new_item->opal_tree_item_refcount), 1 ); OPAL_THREAD_ADD_FETCH32( &(new_item->opal_tree_item_refcount), 1 );
assert(1 == new_item->opal_tree_item_refcount); assert(1 == new_item->opal_tree_item_refcount);
new_item->opal_tree_item_belong_to = new_item->opal_tree_container; new_item->opal_tree_item_belong_to = new_item->opal_tree_container;
#endif #endif

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2010 ARM ltd. All rights reserved. * Copyright (c) 2010 ARM ltd. All rights reserved.
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -104,12 +107,12 @@ void opal_atomic_isync(void)
#if (OPAL_GCC_INLINE_ASSEMBLY && (OPAL_ASM_ARM_VERSION >= 6)) #if (OPAL_GCC_INLINE_ASSEMBLY && (OPAL_ASM_ARM_VERSION >= 6))
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
int32_t ret, tmp; int32_t prev, tmp;
bool ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
"1: ldrex %0, [%2] \n" "1: ldrex %0, [%2] \n"
@ -120,11 +123,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
" bne 1b \n" " bne 1b \n"
"2: \n" "2: \n"
: "=&r" (ret), "=&r" (tmp) : "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval) : "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
/* these two functions aren't inlined in the non-gcc case because then /* these two functions aren't inlined in the non-gcc case because then
@ -132,51 +137,50 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
atomic_?mb can be inlined). Instead, we "inline" them by hand in atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead the assembly, meaning there is one function call overhead instead
of two */ of two */
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
bool rc; bool rc;
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
opal_atomic_rmb(); opal_atomic_rmb();
return rc; return rc;
} }
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
opal_atomic_wmb(); opal_atomic_wmb();
return opal_atomic_bool_cmpset_32(addr, oldval, newval); return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
} }
#if (OPAL_ASM_SUPPORT_64BIT == 1) #if (OPAL_ASM_SUPPORT_64BIT == 1)
#define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
int64_t ret; int64_t prev;
int tmp; int tmp;
bool ret;
__asm__ __volatile__ (
"1: ldrexd %0, %H0, [%2] \n"
" cmp %0, %3 \n"
" it eq \n"
" cmpeq %H0, %H3 \n"
" bne 2f \n"
" strexd %1, %4, %H4, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
"2: \n"
__asm__ __volatile__ ( : "=&r" (prev), "=&r" (tmp)
"1: ldrexd %0, %H0, [%2] \n" : "r" (addr), "r" (*oldval), "r" (newval)
" cmp %0, %3 \n" : "cc", "memory");
" it eq \n"
" cmpeq %H0, %H3 \n"
" bne 2f \n"
" strexd %1, %4, %H4, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp) ret = (prev == *oldval);
: "r" (addr), "r" (oldval), "r" (newval) *oldval = prev;
: "cc", "memory"); return ret;
return (ret == oldval);
} }
/* these two functions aren't inlined in the non-gcc case because then /* these two functions aren't inlined in the non-gcc case because then
@ -184,91 +188,65 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
atomic_?mb can be inlined). Instead, we "inline" them by hand in atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead the assembly, meaning there is one function call overhead instead
of two */ of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
bool rc; bool rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
opal_atomic_rmb(); opal_atomic_rmb();
return rc; return rc;
} }
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
opal_atomic_wmb(); opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval); return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
} }
#endif #endif
#define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc) static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int inc)
{ {
int32_t t; int32_t t, old;
int tmp; int tmp;
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%2] \n" "1: ldrex %1, [%3] \n"
" add %0, %0, %3 \n" " add %0, %1, %4 \n"
" strex %1, %0, [%2] \n" " strex %2, %0, [%3] \n"
" cmp %1, #0 \n" " cmp %2, #0 \n"
" bne 1b \n" " bne 1b \n"
: "=&r" (t), "=&r" (tmp) : "=&r" (t), "=&r" (old), "=&r" (tmp)
: "r" (v), "r" (inc) : "r" (v), "r" (inc)
: "cc", "memory"); : "cc", "memory");
return t; return old;
} }
#define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec) static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int dec)
{ {
int32_t t; int32_t t, old;
int tmp; int tmp;
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%2] \n" "1: ldrex %1, [%3] \n"
" sub %0, %0, %3 \n" " sub %0, %1, %4 \n"
" strex %1, %0, [%2] \n" " strex %2, %0, [%3] \n"
" cmp %1, #0 \n" " cmp %2, #0 \n"
" bne 1b \n" " bne 1b \n"
: "=&r" (t), "=&r" (tmp) : "=&r" (t), "=&r" (old), "=&r" (tmp)
: "r" (v), "r" (dec) : "r" (v), "r" (dec)
: "cc", "memory"); : "cc", "memory");
return t; return t;
}
#else /* OPAL_ASM_ARM_VERSION <=5 or no GCC inline assembly */
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define __kuser_cmpxchg (*((int (*)(int, int, volatile int*))(0xffff0fc0)))
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
return !(__kuser_cmpxchg(oldval, newval, addr));
}
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
/* kernel function includes all necessary memory barriers */
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
}
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
/* kernel function includes all necessary memory barriers */
return opal_atomic_bool_cmpset_32(addr, oldval, newval);
} }
#endif #endif

Просмотреть файл

@ -29,10 +29,10 @@
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_LLSC_32 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1
#define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1
@ -82,10 +82,10 @@ static inline void opal_atomic_isync (void)
* *
*********************************************************************/ *********************************************************************/
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
int32_t ret, tmp; int32_t prev, tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
" cmp %w0, %w3 \n" " cmp %w0, %w3 \n"
@ -93,11 +93,13 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
" stxr %w1, %w4, [%2] \n" " stxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n" " cbnz %w1, 1b \n"
"2: \n" "2: \n"
: "=&r" (ret), "=&r" (tmp) : "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval) : "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
@ -119,10 +121,10 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval
atomic_?mb can be inlined). Instead, we "inline" them by hand in atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead the assembly, meaning there is one function call overhead instead
of two */ of two */
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
int32_t ret, tmp; int32_t prev, tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
" cmp %w0, %w3 \n" " cmp %w0, %w3 \n"
@ -130,18 +132,20 @@ static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr,
" stxr %w1, %w4, [%2] \n" " stxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n" " cbnz %w1, 1b \n"
"2: \n" "2: \n"
: "=&r" (ret), "=&r" (tmp) : "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval) : "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
int32_t ret, tmp; int32_t prev, tmp;
bool ret;
__asm__ __volatile__ ("1: ldxr %w0, [%2] \n" __asm__ __volatile__ ("1: ldxr %w0, [%2] \n"
" cmp %w0, %w3 \n" " cmp %w0, %w3 \n"
@ -149,11 +153,13 @@ static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr,
" stlxr %w1, %w4, [%2] \n" " stlxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n" " cbnz %w1, 1b \n"
"2: \n" "2: \n"
: "=&r" (ret), "=&r" (tmp) : "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval) : "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
@ -179,11 +185,11 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
return ret == 0; return ret == 0;
} }
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
int64_t ret; int64_t prev;
int tmp; int tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n" __asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
" cmp %0, %3 \n" " cmp %0, %3 \n"
@ -191,11 +197,13 @@ static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr,
" stxr %w1, %4, [%2] \n" " stxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n" " cbnz %w1, 1b \n"
"2: \n" "2: \n"
: "=&r" (ret), "=&r" (tmp) : "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval) : "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == oldval);
*oldval = prev;
return ret;
} }
static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval)
@ -218,11 +226,11 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva
atomic_?mb can be inlined). Instead, we "inline" them by hand in atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead the assembly, meaning there is one function call overhead instead
of two */ of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
int64_t ret; int64_t prev;
int tmp; int tmp;
bool ret;
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n" __asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
" cmp %0, %3 \n" " cmp %0, %3 \n"
@ -230,19 +238,21 @@ static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
" stxr %w1, %4, [%2] \n" " stxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n" " cbnz %w1, 1b \n"
"2: \n" "2: \n"
: "=&r" (ret), "=&r" (tmp) : "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval) : "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == oldval);
*oldval = prev;
return ret;
} }
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
int64_t ret; int64_t prev;
int tmp; int tmp;
bool ret;
__asm__ __volatile__ ("1: ldxr %0, [%2] \n" __asm__ __volatile__ ("1: ldxr %0, [%2] \n"
" cmp %0, %3 \n" " cmp %0, %3 \n"
@ -250,11 +260,13 @@ static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
" stlxr %w1, %4, [%2] \n" " stlxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n" " cbnz %w1, 1b \n"
"2: \n" "2: \n"
: "=&r" (ret), "=&r" (tmp) : "=&r" (prev), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval) : "r" (addr), "r" (*oldval), "r" (newval)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == oldval);
*oldval = prev;
return ret;
} }
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
@ -281,20 +293,20 @@ static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
} }
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
{ \ { \
type newval; \ type newval, old; \
int32_t tmp; \ int32_t tmp; \
\ \
__asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \ __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \
" " inst " %" reg "0, %" reg "0, %" reg "3 \n" \ " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \
" stxr %w1, %" reg "0, [%2] \n" \ " stxr %w2, %" reg "0, [%3] \n" \
" cbnz %w1, 1b \n" \ " cbnz %w2, 1b \n" \
: "=&r" (newval), "=&r" (tmp) \ : "=&r" (newval), "=&r" (old), "=&r" (tmp) \
: "r" (addr), "r" (value) \ : "r" (addr), "r" (value) \
: "cc", "memory"); \ : "cc", "memory"); \
\ \
return newval; \ return old; \
} }
OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w")

Просмотреть файл

@ -40,11 +40,11 @@
* *
* - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers * - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers
* - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
* - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly" * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/compare-exchange can be done "atomicly"
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly" * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/compare-exchange can be done "atomicly"
* *
* Note that for the Atomic math, atomic add/sub may be implemented as * Note that for the Atomic math, atomic add/sub may be implemented as
* C code using opal_atomic_bool_cmpset. The appearance of atomic * C code using opal_atomic_compare_exchange. The appearance of atomic
* operation will be upheld in these cases. * operation will be upheld in these cases.
*/ */
@ -107,8 +107,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
*********************************************************************/ *********************************************************************/
#if !OPAL_GCC_INLINE_ASSEMBLY #if !OPAL_GCC_INLINE_ASSEMBLY
#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 0 #define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 0 #define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0
#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0
#define OPAL_HAVE_INLINE_ATOMIC_AND_32 0 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 0
#define OPAL_HAVE_INLINE_ATOMIC_OR_32 0 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 0
@ -123,8 +123,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
#define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0 #define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0
#else #else
#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1
#define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 1
#define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 1
@ -187,14 +187,14 @@ enum {
/* compare and set operations can't really be emulated from software, /* compare and set operations can't really be emulated from software,
so if these defines aren't already set, they should be set to 0 so if these defines aren't already set, they should be set to 0
now */ now */
#ifndef OPAL_HAVE_ATOMIC_CMPSET_32 #ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#define OPAL_HAVE_ATOMIC_CMPSET_32 0 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0
#endif #endif
#ifndef OPAL_HAVE_ATOMIC_CMPSET_64 #ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#define OPAL_HAVE_ATOMIC_CMPSET_64 0 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0
#endif #endif
#ifndef OPAL_HAVE_ATOMIC_CMPSET_128 #ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
#define OPAL_HAVE_ATOMIC_CMPSET_128 0 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0
#endif #endif
#ifndef OPAL_HAVE_ATOMIC_LLSC_32 #ifndef OPAL_HAVE_ATOMIC_LLSC_32
#define OPAL_HAVE_ATOMIC_LLSC_32 0 #define OPAL_HAVE_ATOMIC_LLSC_32 0
@ -270,7 +270,7 @@ void opal_atomic_wmb(void);
/********************************************************************** /**********************************************************************
* *
* Atomic spinlocks - always inlined, if have atomic cmpset * Atomic spinlocks - always inlined, if have atomic compare-and-swap
* *
*********************************************************************/ *********************************************************************/
@ -280,7 +280,7 @@ void opal_atomic_wmb(void);
#define OPAL_HAVE_ATOMIC_SPINLOCKS 0 #define OPAL_HAVE_ATOMIC_SPINLOCKS 0
#endif #endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
/** /**
* Initialize a lock to value * Initialize a lock to value
@ -330,7 +330,7 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock);
#if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 #if OPAL_HAVE_ATOMIC_SPINLOCKS == 0
#undef OPAL_HAVE_ATOMIC_SPINLOCKS #undef OPAL_HAVE_ATOMIC_SPINLOCKS
#define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) #define OPAL_HAVE_ATOMIC_SPINLOCKS (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
#define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1 #define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1
#endif #endif
@ -347,48 +347,48 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock);
#endif #endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32 #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 #if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
static inline static inline
#endif #endif
bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, int32_t oldval, bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval,
int32_t newval); int32_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 #if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
static inline static inline
#endif #endif
bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval,
int32_t newval); int32_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 #if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32
static inline static inline
#endif #endif
bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval,
int32_t newval); int32_t newval);
#endif #endif
#if !defined(OPAL_HAVE_ATOMIC_CMPSET_64) && !defined(DOXYGEN) #if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN)
#define OPAL_HAVE_ATOMIC_CMPSET_64 0 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0
#endif #endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_64 #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 #if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
static inline static inline
#endif #endif
bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, int64_t oldval, bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval,
int64_t newval); int64_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 #if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
static inline static inline
#endif #endif
bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval,
int64_t newval); int64_t newval);
#if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 #if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64
static inline static inline
#endif #endif
bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval,
int64_t newval); int64_t newval);
#endif #endif
@ -397,45 +397,25 @@ bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr, int64_t oldval,
#define OPAL_HAVE_ATOMIC_MATH_32 0 #define OPAL_HAVE_ATOMIC_MATH_32 0
#endif #endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_CMPSET_32 #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides static inline int32_t opal_atomic_add_fetch_32(volatile int32_t *addr, int delta);
a static inline version of it (in assembly). If we have to fall static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int delta);
back on cmpset 32, that too will be inline. */ static inline int32_t opal_atomic_and_fetch_32(volatile int32_t *addr, int32_t value);
#if OPAL_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32) static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value);
static inline static inline int32_t opal_atomic_or_fetch_32(volatile int32_t *addr, int32_t value);
#endif static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value);
int32_t opal_atomic_add_32(volatile int32_t *addr, int delta); static inline int32_t opal_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value);
static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value);
#if OPAL_HAVE_INLINE_ATOMIC_AND_32 || (!defined(OPAL_HAVE_ATOMIC_AND_32) && OPAL_HAVE_ATOMIC_CMPSET_32) static inline int32_t opal_atomic_sub_fetch_32(volatile int32_t *addr, int delta);
static inline static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int delta);
#endif
int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value);
#if OPAL_HAVE_INLINE_ATOMIC_OR_32 || (!defined(OPAL_HAVE_ATOMIC_OR_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value);
#if OPAL_HAVE_INLINE_ATOMIC_XOR_32 || (!defined(OPAL_HAVE_ATOMIC_XOR_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value);
/* OPAL_HAVE_INLINE_ATOMIC_*_32 will be 1 if <arch>/atomic.h provides
a static inline version of it (in assembly). If we have to fall
back to cmpset 32, that too will be inline. */
#if OPAL_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(OPAL_HAVE_ATOMIC_ADD_32) && OPAL_HAVE_ATOMIC_CMPSET_32)
static inline
#endif
int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta);
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */ #endif /* OPAL_HAVE_ATOMIC_MATH_32 */
#if ! OPAL_HAVE_ATOMIC_MATH_32 #if ! OPAL_HAVE_ATOMIC_MATH_32
/* fix up the value of opal_have_atomic_math_32 to allow for C versions */ /* fix up the value of opal_have_atomic_math_32 to allow for C versions */
#undef OPAL_HAVE_ATOMIC_MATH_32 #undef OPAL_HAVE_ATOMIC_MATH_32
#define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_CMPSET_32 #define OPAL_HAVE_ATOMIC_MATH_32 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#endif #endif
#ifndef OPAL_HAVE_ATOMIC_MATH_64 #ifndef OPAL_HAVE_ATOMIC_MATH_64
@ -443,45 +423,24 @@ int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta);
#define OPAL_HAVE_ATOMIC_MATH_64 0 #define OPAL_HAVE_ATOMIC_MATH_64 0
#endif #endif
#if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_CMPSET_64 #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MATH_64 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides static inline int64_t opal_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta);
a static inline version of it (in assembly). If we have to fall static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta);
back to cmpset 64, that too will be inline */ static inline int64_t opal_atomic_and_fetch_64(volatile int64_t *addr, int64_t value);
#if OPAL_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64) static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value);
static inline static inline int64_t opal_atomic_or_fetch_64(volatile int64_t *addr, int64_t value);
#endif static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value);
int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta); static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value);
static inline int64_t opal_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta);
#if OPAL_HAVE_INLINE_ATOMIC_AND_64 || (!defined(OPAL_HAVE_ATOMIC_AND_64) && OPAL_HAVE_ATOMIC_CMPSET_64) static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta);
static inline
#endif
int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value);
#if OPAL_HAVE_INLINE_ATOMIC_OR_64 || (!defined(OPAL_HAVE_ATOMIC_OR_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value);
#if OPAL_HAVE_INLINE_ATOMIC_XOR_64 || (!defined(OPAL_HAVE_ATOMIC_XOR_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value);
/* OPAL_HAVE_INLINE_ATOMIC_*_64 will be 1 if <arch>/atomic.h provides
a static inline version of it (in assembly). If we have to fall
back to cmpset 64, that too will be inline */
#if OPAL_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(OPAL_HAVE_ATOMIC_ADD_64) && OPAL_HAVE_ATOMIC_CMPSET_64)
static inline
#endif
int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta);
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */ #endif /* OPAL_HAVE_ATOMIC_MATH_32 */
#if ! OPAL_HAVE_ATOMIC_MATH_64 #if ! OPAL_HAVE_ATOMIC_MATH_64
/* fix up the value of opal_have_atomic_math_64 to allow for C versions */ /* fix up the value of opal_have_atomic_math_64 to allow for C versions */
#undef OPAL_HAVE_ATOMIC_MATH_64 #undef OPAL_HAVE_ATOMIC_MATH_64
#define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_CMPSET_64 #define OPAL_HAVE_ATOMIC_MATH_64 OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#endif #endif
/* provide a size_t add/subtract. When in debug mode, make it an /* provide a size_t add/subtract. When in debug mode, make it an
@ -491,114 +450,141 @@ int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta);
*/ */
#if defined(DOXYGEN) || OPAL_ENABLE_DEBUG #if defined(DOXYGEN) || OPAL_ENABLE_DEBUG
static inline size_t static inline size_t
opal_atomic_add_size_t(volatile size_t *addr, size_t delta) opal_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta)
{ {
#if SIZEOF_SIZE_T == 4 #if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_add_32((int32_t*) addr, delta); return (size_t) opal_atomic_add_fetch_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8 #elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_add_64((int64_t*) addr, delta); return (size_t) opal_atomic_add_fetch_64((int64_t*) addr, delta);
#else #else
#error "Unknown size_t size" #error "Unknown size_t size"
#endif #endif
} }
static inline size_t static inline size_t
opal_atomic_sub_size_t(volatile size_t *addr, size_t delta) opal_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta)
{ {
#if SIZEOF_SIZE_T == 4 #if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_sub_32((int32_t*) addr, delta); return (size_t) opal_atomic_fetch_add_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8 #elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_sub_64((int64_t*) addr, delta); return (size_t) opal_atomic_fetch_add_64((int64_t*) addr, delta);
#else #else
#error "Unknown size_t size" #error "Unknown size_t size"
#endif #endif
} }
static inline size_t
opal_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta)
{
#if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_sub_fetch_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_sub_fetch_64((int64_t*) addr, delta);
#else
#error "Unknown size_t size"
#endif
}
static inline size_t
opal_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta)
{
#if SIZEOF_SIZE_T == 4
return (size_t) opal_atomic_fetch_sub_32((int32_t*) addr, delta);
#elif SIZEOF_SIZE_T == 8
return (size_t) opal_atomic_fetch_sub_64((int64_t*) addr, delta);
#else
#error "Unknown size_t size"
#endif
}
#else #else
#if SIZEOF_SIZE_T == 4 #if SIZEOF_SIZE_T == 4
#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_32((int32_t*) addr, delta)) #define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_32((volatile int32_t *) addr, delta))
#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_32((int32_t*) addr, delta)) #define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_32((volatile int32_t *) addr, delta))
#elif SIZEOF_SIZE_T ==8 #define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_32((volatile int32_t *) addr, delta))
#define opal_atomic_add_size_t(addr, delta) ((size_t) opal_atomic_add_64((int64_t*) addr, delta)) #define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_32((volatile int32_t *) addr, delta))
#define opal_atomic_sub_size_t(addr, delta) ((size_t) opal_atomic_sub_64((int64_t*) addr, delta)) #elif SIZEOF_SIZE_T == 8
#define opal_atomic_add_fetch_size_t(addr, delta) ((size_t) opal_atomic_add_fetch_64((volatile int64_t *) addr, delta))
#define opal_atomic_fetch_add_size_t(addr, delta) ((size_t) opal_atomic_fetch_add_64((volatile int64_t *) addr, delta))
#define opal_atomic_sub_fetch_size_t(addr, delta) ((size_t) opal_atomic_sub_fetch_64((volatile int64_t *) addr, delta))
#define opal_atomic_fetch_sub_size_t(addr, delta) ((size_t) opal_atomic_fetch_sub_64((volatile int64_t *) addr, delta))
#else #else
#error "Unknown size_t size" #error "Unknown size_t size"
#endif #endif
#endif #endif
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
/* these are always done with inline functions, so always mark as /* these are always done with inline functions, so always mark as
static inline */ static inline */
static inline bool opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length);
static inline bool opal_atomic_bool_cmpset_acq_xx(volatile void* addr,
int64_t oldval, int64_t newval,
size_t length);
static inline bool opal_atomic_bool_cmpset_rel_xx(volatile void* addr,
int64_t oldval, int64_t newval,
size_t length);
static inline bool opal_atomic_bool_cmpset_ptr(volatile void* addr, static inline bool opal_atomic_compare_exchange_strong_xx (volatile void *addr, void *oldval,
void* oldval, int64_t newval, size_t length);
void* newval); static inline bool opal_atomic_compare_exchange_strong_acq_xx (volatile void *addr, void *oldval,
static inline bool opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, int64_t newval, size_t length);
void* oldval, static inline bool opal_atomic_compare_exchange_strong_rel_xx (volatile void *addr, void *oldval,
void* newval); int64_t newval, size_t length);
static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
void* oldval,
void* newval); static inline bool opal_atomic_compare_exchange_strong_ptr (volatile void* addr, void *oldval,
void *newval);
static inline bool opal_atomic_compare_exchange_strong_acq_ptr (volatile void* addr, void *oldval,
void *newval);
static inline bool opal_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval,
void *newval);
/** /**
* Atomic compare and set of pointer with relaxed semantics. This * Atomic compare and set of generic type with relaxed semantics. This
* macro detect at compile time the type of the first argument and * macro detect at compile time the type of the first argument and
* choose the correct function to be called. * choose the correct function to be called.
* *
* \note This macro should only be used for integer types. * \note This macro should only be used for integer types.
* *
* @param addr Address of <TYPE>. * @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>. * @param oldval Comparison value address of <TYPE>.
* @param newval New value to set if comparision is true <TYPE>. * @param newval New value to set if comparision is true <TYPE>.
* *
* See opal_atomic_bool_cmpset_* for pseudo-code. * See opal_atomic_compare_exchange_* for pseudo-code.
*/ */
#define opal_atomic_bool_cmpset( ADDR, OLDVAL, NEWVAL ) \ #define opal_atomic_compare_exchange_strong( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_bool_cmpset_xx( (volatile void*)(ADDR), (intptr_t)(OLDVAL), \ opal_atomic_compare_exchange_strong_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
(intptr_t)(NEWVAL), sizeof(*(ADDR)) ) (intptr_t)(NEWVAL), sizeof(*(ADDR)) )
/** /**
* Atomic compare and set of pointer with acquire semantics. This * Atomic compare and set of generic type with acquire semantics. This
* macro detect at compile time the type of the first argument * macro detect at compile time the type of the first argument and
* and choose the correct function to be called. * choose the correct function to be called.
* *
* \note This macro should only be used for integer types. * \note This macro should only be used for integer types.
* *
* @param addr Address of <TYPE>. * @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>. * @param oldval Comparison value address of <TYPE>.
* @param newval New value to set if comparision is true <TYPE>. * @param newval New value to set if comparision is true <TYPE>.
* *
* See opal_atomic_bool_cmpset_acq_* for pseudo-code. * See opal_atomic_compare_exchange_acq_* for pseudo-code.
*/ */
#define opal_atomic_bool_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \ #define opal_atomic_compare_exchange_strong_acq( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_bool_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ opal_atomic_compare_exchange_strong_acq_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
(int64_t)(NEWVAL), sizeof(*(ADDR)) ) (intptr_t)(NEWVAL), sizeof(*(ADDR)) )
/** /**
* Atomic compare and set of pointer with release semantics. This * Atomic compare and set of generic type with release semantics. This
* macro detect at compile time the type of the first argument * macro detect at compile time the type of the first argument and
* and choose the correct function to b * choose the correct function to be called.
* *
* \note This macro should only be used for integer types. * \note This macro should only be used for integer types.
* *
* @param addr Address of <TYPE>. * @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>. * @param oldval Comparison value address of <TYPE>.
* @param newval New value to set if comparision is true <TYPE>. * @param newval New value to set if comparision is true <TYPE>.
* *
* See opal_atomic_bool_cmpsetrel_* for pseudo-code. * See opal_atomic_compare_exchange_rel_* for pseudo-code.
*/ */
#define opal_atomic_bool_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \ #define opal_atomic_compare_exchange_strong_rel( ADDR, OLDVAL, NEWVAL ) \
opal_atomic_bool_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ opal_atomic_compare_exchange_strong_rel_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \
(int64_t)(NEWVAL), sizeof(*(ADDR)) ) (intptr_t)(NEWVAL), sizeof(*(ADDR)) )
#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */
#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */
#if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64)
@ -606,15 +592,11 @@ static inline void opal_atomic_add_xx(volatile void* addr,
int32_t value, size_t length); int32_t value, size_t length);
static inline void opal_atomic_sub_xx(volatile void* addr, static inline void opal_atomic_sub_xx(volatile void* addr,
int32_t value, size_t length); int32_t value, size_t length);
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ); static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr, void* delta );
static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr, void* delta );
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr, void* delta );
static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta ); static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr, void* delta );
static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta );
#else
#error Atomic arithmetic on pointers not supported
#endif
/** /**
* Atomically increment the content depending on the type. This * Atomically increment the content depending on the type. This

Просмотреть файл

@ -34,20 +34,30 @@
* *
* Some architectures do not provide support for the 64 bits * Some architectures do not provide support for the 64 bits
* atomic operations. Until we find a better solution let's just * atomic operations. Until we find a better solution let's just
* undefine all those functions if there is no 64 bit cmpset * undefine all those functions if there is no 64 bit compare-exchange
* *
*********************************************************************/ *********************************************************************/
#if OPAL_HAVE_ATOMIC_CMPSET_32 #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
{ \
type oldval; \
do { \
oldval = *addr; \
} while (!opal_atomic_compare_exchange_strong_ ## bits (addr, &oldval, oldval operation value)); \
\
return oldval; \
}
#if !defined(OPAL_HAVE_ATOMIC_SWAP_32) #if !defined(OPAL_HAVE_ATOMIC_SWAP_32)
#define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, static inline int32_t opal_atomic_swap_32(volatile int32_t *addr,
int32_t newval) int32_t newval)
{ {
int32_t old; int32_t old = *addr;
do { do {
old = *addr; } while (!opal_atomic_compare_exchange_strong_32 (addr, &old, newval));
} while (!opal_atomic_bool_cmpset_32(addr, old, newval));
return old; return old;
} }
@ -55,161 +65,91 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr,
#if !defined(OPAL_HAVE_ATOMIC_ADD_32) #if !defined(OPAL_HAVE_ATOMIC_ADD_32)
#define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1
static inline int32_t
opal_atomic_add_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval + delta));
return (oldval + delta);
}
#endif /* OPAL_HAVE_ATOMIC_ADD_32 */ #endif /* OPAL_HAVE_ATOMIC_ADD_32 */
#if !defined(OPAL_HAVE_ATOMIC_AND_32) #if !defined(OPAL_HAVE_ATOMIC_AND_32)
#define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1
static inline int32_t
opal_atomic_and_32(volatile int32_t *addr, int32_t value)
{
int32_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval & value));
return (oldval & value);
}
#endif /* OPAL_HAVE_ATOMIC_AND_32 */ #endif /* OPAL_HAVE_ATOMIC_AND_32 */
#if !defined(OPAL_HAVE_ATOMIC_OR_32) #if !defined(OPAL_HAVE_ATOMIC_OR_32)
#define OPAL_HAVE_ATOMIC_OR_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1
static inline int32_t
opal_atomic_or_32(volatile int32_t *addr, int32_t value)
{
int32_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval | value));
return (oldval | value);
}
#endif /* OPAL_HAVE_ATOMIC_OR_32 */ #endif /* OPAL_HAVE_ATOMIC_OR_32 */
#if !defined(OPAL_HAVE_ATOMIC_XOR_32) #if !defined(OPAL_HAVE_ATOMIC_XOR_32)
#define OPAL_HAVE_ATOMIC_XOR_32 1 #define OPAL_HAVE_ATOMIC_XOR_32 1
static inline int32_t
opal_atomic_xor_32(volatile int32_t *addr, int32_t value)
{
int32_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval ^ value));
return (oldval ^ value);
}
#endif /* OPAL_HAVE_ATOMIC_XOR_32 */ #endif /* OPAL_HAVE_ATOMIC_XOR_32 */
#if !defined(OPAL_HAVE_ATOMIC_SUB_32) #if !defined(OPAL_HAVE_ATOMIC_SUB_32)
#define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1
static inline int32_t
opal_atomic_sub_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_32(addr, oldval, oldval - delta));
return (oldval - delta);
}
#endif /* OPAL_HAVE_ATOMIC_SUB_32 */ #endif /* OPAL_HAVE_ATOMIC_SUB_32 */
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ #endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */
#if OPAL_HAVE_ATOMIC_CMPSET_64 #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#if !defined(OPAL_HAVE_ATOMIC_SWAP_64) #if !defined(OPAL_HAVE_ATOMIC_SWAP_64)
#define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, static inline int64_t opal_atomic_swap_64(volatile int64_t *addr,
int64_t newval) int64_t newval)
{ {
int64_t old; int64_t old = *addr;
do { do {
old = *addr; } while (!opal_atomic_compare_exchange_strong_64 (addr, &old, newval));
} while (!opal_atomic_bool_cmpset_64(addr, old, newval));
return old; return old;
} }
#endif /* OPAL_HAVE_ATOMIC_SWAP_32 */ #endif /* OPAL_HAVE_ATOMIC_SWAP_32 */
#if !defined(OPAL_HAVE_ATOMIC_ADD_64) #if !defined(OPAL_HAVE_ATOMIC_ADD_64)
#define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1
static inline int64_t
opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval + delta));
return (oldval + delta);
}
#endif /* OPAL_HAVE_ATOMIC_ADD_64 */ #endif /* OPAL_HAVE_ATOMIC_ADD_64 */
#if !defined(OPAL_HAVE_ATOMIC_AND_64) #if !defined(OPAL_HAVE_ATOMIC_AND_64)
#define OPAL_HAVE_ATOMIC_AND_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1
static inline int64_t
opal_atomic_and_64(volatile int64_t *addr, int64_t value)
{
int64_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval & value));
return (oldval & value);
}
#endif /* OPAL_HAVE_ATOMIC_AND_64 */ #endif /* OPAL_HAVE_ATOMIC_AND_64 */
#if !defined(OPAL_HAVE_ATOMIC_OR_64) #if !defined(OPAL_HAVE_ATOMIC_OR_64)
#define OPAL_HAVE_ATOMIC_OR_64 1 #define OPAL_HAVE_ATOMIC_OR_64 1
static inline int64_t
opal_atomic_or_64(volatile int64_t *addr, int64_t value)
{
int64_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval | value));
return (oldval | value);
}
#endif /* OPAL_HAVE_ATOMIC_OR_64 */ #endif /* OPAL_HAVE_ATOMIC_OR_64 */
#if !defined(OPAL_HAVE_ATOMIC_XOR_64) #if !defined(OPAL_HAVE_ATOMIC_XOR_64)
#define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_XOR_64 1
static inline int64_t
opal_atomic_xor_64(volatile int64_t *addr, int64_t value)
{
int64_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval ^ value));
return (oldval ^ value);
}
#endif /* OPAL_HAVE_ATOMIC_XOR_64 */ #endif /* OPAL_HAVE_ATOMIC_XOR_64 */
#if !defined(OPAL_HAVE_ATOMIC_SUB_64) #if !defined(OPAL_HAVE_ATOMIC_SUB_64)
#define OPAL_HAVE_ATOMIC_SUB_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1
static inline int64_t
opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do { OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub)
oldval = *addr;
} while (!opal_atomic_bool_cmpset_64(addr, oldval, oldval - delta));
return (oldval - delta);
}
#endif /* OPAL_HAVE_ATOMIC_SUB_64 */ #endif /* OPAL_HAVE_ATOMIC_SUB_64 */
#else #else
@ -222,130 +162,70 @@ opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
#define OPAL_HAVE_ATOMIC_SUB_64 0 #define OPAL_HAVE_ATOMIC_SUB_64 0
#endif #endif
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ #endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */
#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64)
#if (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) #if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \
static inline bool static inline bool \
opal_atomic_bool_cmpset_xx(volatile void* addr, int64_t oldval, opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \
int64_t newval, size_t length) int64_t newval, const size_t length) \
{ { \
switch( length ) { switch (length) { \
#if OPAL_HAVE_ATOMIC_CMPSET_32 case 4: \
case 4: return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \
return opal_atomic_bool_cmpset_32( (volatile int32_t*)addr, (int32_t *) oldval, (int32_t) newval); \
(int32_t)oldval, (int32_t)newval ); case 8: \
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, \
(int64_t *) oldval, (int64_t) newval); \
#if OPAL_HAVE_ATOMIC_CMPSET_64 } \
case 8: abort(); \
return opal_atomic_bool_cmpset_64( (volatile int64_t*)addr, }
(int64_t)oldval, (int64_t)newval ); #elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */ #define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \
} static inline bool \
abort(); opal_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \
/* This should never happen, so deliberately abort (hopefully int64_t newval, const size_t length) \
leaving a corefile for analysis) */ { \
} switch (length) { \
case 4: \
return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \
static inline bool (int32_t *) oldval, (int32_t) newval); \
opal_atomic_bool_cmpset_acq_xx(volatile void* addr, int64_t oldval, abort(); \
int64_t newval, size_t length) }
{
switch( length ) {
#if OPAL_HAVE_ATOMIC_CMPSET_32
case 4:
return opal_atomic_bool_cmpset_acq_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
#if OPAL_HAVE_ATOMIC_CMPSET_64
case 8:
return opal_atomic_bool_cmpset_acq_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
}
/* This should never happen, so deliberately abort (hopefully
leaving a corefile for analysis) */
abort();
}
static inline bool
opal_atomic_bool_cmpset_rel_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length)
{
switch( length ) {
#if OPAL_HAVE_ATOMIC_CMPSET_32
case 4:
return opal_atomic_bool_cmpset_rel_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */
#if OPAL_HAVE_ATOMIC_CMPSET_64
case 8:
return opal_atomic_bool_cmpset_rel_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OPAL_HAVE_ATOMIC_CMPSET_64 */
}
/* This should never happen, so deliberately abort (hopefully
leaving a corefile for analysis) */
abort();
}
static inline bool
opal_atomic_bool_cmpset_ptr(volatile void* addr,
void* oldval,
void* newval)
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
return opal_atomic_bool_cmpset_32((int32_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
return opal_atomic_bool_cmpset_64((int64_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#else #else
abort(); #error "Platform does not have required atomic compare-and-swap functionality"
#endif #endif
}
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_)
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_)
OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_)
static inline bool #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32
opal_atomic_bool_cmpset_acq_ptr(volatile void* addr, #define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \
void* oldval, static inline bool \
void* newval) opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \
{ { \
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 return opal_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \
return opal_atomic_bool_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, }
(unsigned long) newval); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 #define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \
return opal_atomic_bool_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, static inline bool \
(unsigned long) newval); opal_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \
{ \
return opal_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \
}
#else #else
abort(); #error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics"
#endif #endif
}
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_)
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_)
OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)
static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr, #endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */
void* oldval,
void* newval)
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32
return opal_atomic_bool_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64
return opal_atomic_bool_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval,
(unsigned long) newval);
#else
abort();
#endif
}
#endif /* (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) */
#if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) #if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64)
@ -383,20 +263,19 @@ static inline bool opal_atomic_bool_cmpset_rel_ptr(volatile void* addr,
#if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 #if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64
static inline void static inline void
opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length) opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length)
{ {
switch( length ) { switch( length ) {
#if OPAL_HAVE_ATOMIC_ADD_32 #if OPAL_HAVE_ATOMIC_ADD_32
case 4: case 4:
opal_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); (void) opal_atomic_fetch_add_32( (volatile int32_t*)addr, (int32_t)value );
break; break;
#endif /* OPAL_HAVE_ATOMIC_CMPSET_32 */ #endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */
#if OPAL_HAVE_ATOMIC_ADD_64 #if OPAL_HAVE_ATOMIC_ADD_64
case 8: case 8:
opal_atomic_add_64( (volatile int64_t*)addr, (int64_t)value ); (void) opal_atomic_fetch_add_64( (volatile int64_t*)addr, (int64_t)value );
break; break;
#endif /* OPAL_HAVE_ATOMIC_ADD_64 */ #endif /* OPAL_HAVE_ATOMIC_ADD_64 */
default: default:
@ -413,13 +292,13 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
switch( length ) { switch( length ) {
#if OPAL_HAVE_ATOMIC_SUB_32 #if OPAL_HAVE_ATOMIC_SUB_32
case 4: case 4:
opal_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value ); (void) opal_atomic_fetch_sub_32( (volatile int32_t*)addr, (int32_t)value );
break; break;
#endif /* OPAL_HAVE_ATOMIC_SUB_32 */ #endif /* OPAL_HAVE_ATOMIC_SUB_32 */
#if OPAL_HAVE_ATOMIC_SUB_64 #if OPAL_HAVE_ATOMIC_SUB_64
case 8: case 8:
opal_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value ); (void) opal_atomic_fetch_sub_64( (volatile int64_t*)addr, (int64_t)value );
break; break;
#endif /* OPAL_HAVE_ATOMIC_SUB_64 */ #endif /* OPAL_HAVE_ATOMIC_SUB_64 */
default: default:
@ -429,47 +308,77 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
} }
} }
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 #define OPAL_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \
static inline int32_t opal_atomic_add_ptr( volatile void* addr, static inline type opal_atomic_ ## op ## _fetch_ ## suffix (volatile ptr_type *addr, type value) \
void* delta ) { \
{ return opal_atomic_fetch_ ## op ## _ ## suffix (addr, value) operation value; \
return opal_atomic_add_32((int32_t*) addr, (unsigned long) delta); }
}
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int32_t, int32_t, 32)
static inline int64_t opal_atomic_add_ptr( volatile void* addr, OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int32_t, int32_t, 32)
void* delta ) OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32)
{ OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32)
return opal_atomic_add_64((int64_t*) addr, (unsigned long) delta); OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32)
}
#else #if OPAL_HAVE_ATOMIC_MATH_64
static inline int32_t opal_atomic_add_ptr( volatile void* addr, OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int64_t, int64_t, 64)
void* delta ) OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int64_t, int64_t, 64)
{ OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64)
abort(); OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64)
return 0; OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64)
}
#endif #endif
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 static inline intptr_t opal_atomic_fetch_add_ptr( volatile void* addr,
static inline int32_t opal_atomic_sub_ptr( volatile void* addr,
void* delta ) void* delta )
{ {
return opal_atomic_sub_32((int32_t*) addr, (unsigned long) delta); #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
} return opal_atomic_fetch_add_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
static inline int64_t opal_atomic_sub_ptr( volatile void* addr, return opal_atomic_fetch_add_64((int64_t*) addr, (unsigned long) delta);
void* delta )
{
return opal_atomic_sub_64((int64_t*) addr, (unsigned long) delta);
}
#else #else
static inline int32_t opal_atomic_sub_ptr( volatile void* addr, abort ();
return 0;
#endif
}
static inline intptr_t opal_atomic_add_fetch_ptr( volatile void* addr,
void* delta ) void* delta )
{ {
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32
return opal_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64
return opal_atomic_add_fetch_64((int64_t*) addr, (unsigned long) delta);
#else
abort ();
return 0;
#endif
}
static inline intptr_t opal_atomic_fetch_sub_ptr( volatile void* addr,
void* delta )
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_fetch_sub_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_fetch_sub_64((int64_t*) addr, (unsigned long) delta);
#else
abort(); abort();
return 0; return 0;
}
#endif #endif
}
static inline intptr_t opal_atomic_sub_fetch_ptr( volatile void* addr,
void* delta )
{
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_sub_fetch_32((int32_t*) addr, (unsigned long) delta);
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32
return opal_atomic_sub_fetch_64((int64_t*) addr, (unsigned long) delta);
#else
abort();
return 0;
#endif
}
#endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */ #endif /* OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 */
@ -493,21 +402,20 @@ opal_atomic_lock_init( opal_atomic_lock_t* lock, int32_t value )
static inline int static inline int
opal_atomic_trylock(opal_atomic_lock_t *lock) opal_atomic_trylock(opal_atomic_lock_t *lock)
{ {
bool ret = opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED;
OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED); bool ret = opal_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED);
return (ret == 0) ? 1 : 0; return (ret == false) ? 1 : 0;
} }
static inline void static inline void
opal_atomic_lock(opal_atomic_lock_t *lock) opal_atomic_lock(opal_atomic_lock_t *lock)
{ {
while( !opal_atomic_bool_cmpset_acq_32( &(lock->u.lock), while (opal_atomic_trylock (lock)) {
OPAL_ATOMIC_LOCK_UNLOCKED, OPAL_ATOMIC_LOCK_LOCKED) ) { while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) {
while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { /* spin */ ;
/* spin */ ; }
} }
}
} }

Просмотреть файл

@ -33,7 +33,7 @@
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1
#define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1
#define OPAL_HAVE_ATOMIC_OR_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1
@ -41,7 +41,7 @@
#define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_MATH_64 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1
#define OPAL_HAVE_ATOMIC_AND_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1
#define OPAL_HAVE_ATOMIC_OR_64 1 #define OPAL_HAVE_ATOMIC_OR_64 1
@ -81,26 +81,20 @@ static inline void opal_atomic_wmb(void)
#pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) #pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L)
#endif #endif
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
return __atomic_compare_exchange_n (addr, &oldval, newval, false, return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
} }
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
return __atomic_compare_exchange_n (addr, &oldval, newval, false, return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
} }
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
return __atomic_compare_exchange_n (addr, &oldval, newval, false, return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
} }
static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval) static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval)
@ -110,51 +104,45 @@ static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newva
return oldval; return oldval;
} }
static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta)
{ {
return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED);
} }
static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value) static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value)
{ {
return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED);
} }
static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value) static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value)
{ {
return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED);
} }
static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value) static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value)
{ {
return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED);
} }
static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta)
{ {
return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED);
} }
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
return __atomic_compare_exchange_n (addr, &oldval, newval, false, return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
} }
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
return __atomic_compare_exchange_n (addr, &oldval, newval, false, return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
__ATOMIC_RELEASE, __ATOMIC_RELAXED);
} }
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
return __atomic_compare_exchange_n (addr, &oldval, newval, false, return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
} }
static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval)
@ -164,52 +152,55 @@ static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newva
return oldval; return oldval;
} }
static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta)
{ {
return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED);
} }
static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value) static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value)
{ {
return __atomic_and_fetch (addr, value, __ATOMIC_RELAXED); return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED);
} }
static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value) static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value)
{ {
return __atomic_or_fetch (addr, value, __ATOMIC_RELAXED); return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED);
} }
static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value) static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value)
{ {
return __atomic_xor_fetch (addr, value, __ATOMIC_RELAXED); return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED);
} }
static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta)
{ {
return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED);
} }
#if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 #if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128
#define OPAL_HAVE_ATOMIC_CMPSET_128 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
opal_int128_t oldval, opal_int128_t newval) opal_int128_t *oldval, opal_int128_t newval)
{ {
return __atomic_compare_exchange_n (addr, &oldval, newval, false, return __atomic_compare_exchange_n (addr, oldval, newval, false,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED); __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
} }
#elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 #elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128
#define OPAL_HAVE_ATOMIC_CMPSET_128 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
/* __atomic version is not lock-free so use legacy __sync version */ /* __atomic version is not lock-free so use legacy __sync version */
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
opal_int128_t oldval, opal_int128_t newval) opal_int128_t *oldval, opal_int128_t newval)
{ {
return __sync_bool_compare_and_swap (addr, oldval, newval); opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
} }
#endif #endif

Просмотреть файл

@ -40,7 +40,7 @@
*********************************************************************/ *********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1
@ -84,15 +84,13 @@ static inline void opal_atomic_isync(void)
*********************************************************************/ *********************************************************************/
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval,
int32_t newval)
{ {
unsigned char ret; unsigned char ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
SMPLOCK "cmpxchgl %3,%2 \n\t" SMPLOCK "cmpxchgl %3,%2 \n\t"
"sete %0 \n\t" "sete %0 \n\t"
: "=qm" (ret), "+a" (oldval), "+m" (*addr) : "=qm" (ret), "+a" (*oldval), "+m" (*addr)
: "q"(newval) : "q"(newval)
: "memory", "cc"); : "memory", "cc");
@ -101,8 +99,8 @@ static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */
#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 #define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 #define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
@ -132,7 +130,7 @@ static inline int32_t opal_atomic_swap_32( volatile int32_t *addr,
* *
* Atomically adds @i to @v. * Atomically adds @i to @v.
*/ */
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i)
{ {
int ret = i; int ret = i;
__asm__ __volatile__( __asm__ __volatile__(
@ -141,7 +139,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
: :
:"memory", "cc" :"memory", "cc"
); );
return (ret+i); return ret;
} }
@ -152,7 +150,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
* *
* Atomically subtracts @i from @v. * Atomically subtracts @i from @v.
*/ */
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i)
{ {
int ret = -i; int ret = -i;
__asm__ __volatile__( __asm__ __volatile__(
@ -161,7 +159,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
: :
:"memory", "cc" :"memory", "cc"
); );
return (ret-i); return ret;
} }
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */

Просмотреть файл

@ -40,7 +40,7 @@
*********************************************************************/ *********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_LLSC_32 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1
@ -53,7 +53,7 @@
#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT
#define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
#define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1
#define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_MATH_64 1
@ -144,24 +144,25 @@ void opal_atomic_isync(void)
#define OPAL_ASM_VALUE64(x) x #define OPAL_ASM_VALUE64(x) x
#endif #endif
static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
static inline bool opal_atomic_bool_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{ {
int32_t ret; int32_t prev;
bool ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
"1: lwarx %0, 0, %2 \n\t" "1: lwarx %0, 0, %2 \n\t"
" cmpw 0, %0, %3 \n\t" " cmpw 0, %0, %3 \n\t"
" bne- 2f \n\t" " bne- 2f \n\t"
" stwcx. %4, 0, %2 \n\t" " stwcx. %4, 0, %2 \n\t"
" bne- 1b \n\t" " bne- 1b \n\t"
"2:" "2:"
: "=&r" (ret), "=m" (*addr) : "=&r" (prev), "=m" (*addr)
: "r" OPAL_ASM_ADDR(addr), "r" (oldval), "r" (newval), "m" (*addr) : "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
@ -195,23 +196,21 @@ static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
atomic_?mb can be inlined). Instead, we "inline" them by hand in atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead the assembly, meaning there is one function call overhead instead
of two */ of two */
static inline bool opal_atomic_bool_cmpset_acq_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
bool rc; bool rc;
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
opal_atomic_rmb(); opal_atomic_rmb();
return rc; return rc;
} }
static inline bool opal_atomic_bool_cmpset_rel_32(volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
opal_atomic_wmb(); opal_atomic_wmb();
return opal_atomic_bool_cmpset_32(addr, oldval, newval); return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
} }
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
@ -236,20 +235,20 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \
static inline int64_t opal_atomic_ ## type ## _64(volatile int64_t* v, int64_t val) \ static inline int64_t opal_atomic_fetch_ ## type ## _64(volatile int64_t* v, int64_t val) \
{ \ { \
int64_t t; \ int64_t t, old; \
\ \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: ldarx %0, 0, %3 \n\t" \ "1: ldarx %1, 0, %4 \n\t" \
" " #instr " %0, %2, %0 \n\t" \ " " #instr " %0, %3, %1 \n\t" \
" stdcx. %0, 0, %3 \n\t" \ " stdcx. %0, 0, %4 \n\t" \
" bne- 1b \n\t" \ " bne- 1b \n\t" \
: "=&r" (t), "=m" (*v) \ : "=&r" (t), "=&r" (old), "=m" (*v) \
: "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \ : "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \
: "cc"); \ : "cc"); \
\ \
return t; \ return old; \
} }
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add)
@ -258,23 +257,25 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or)
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor)
OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf)
static inline bool opal_atomic_bool_cmpset_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
int64_t ret; int64_t prev;
bool ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
"1: ldarx %0, 0, %2 \n\t" "1: ldarx %0, 0, %2 \n\t"
" cmpd 0, %0, %3 \n\t" " cmpd 0, %0, %3 \n\t"
" bne- 2f \n\t" " bne- 2f \n\t"
" stdcx. %4, 0, %2 \n\t" " stdcx. %4, 0, %2 \n\t"
" bne- 1b \n\t" " bne- 1b \n\t"
"2:" "2:"
: "=&r" (ret), "=m" (*addr) : "=&r" (prev), "=m" (*addr)
: "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) : "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr)
: "cc", "memory"); : "cc", "memory");
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
@ -303,29 +304,6 @@ static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
return ret; return ret;
} }
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
bool rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
}
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
{ {
@ -352,9 +330,9 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
int64_t prev;
int ret; int ret;
/* /*
@ -369,67 +347,65 @@ static inline int opal_atomic_bool_cmpset_64(volatile int64_t *addr,
* is very similar to the pure 64 bit version. * is very similar to the pure 64 bit version.
*/ */
__asm__ __volatile__ ( __asm__ __volatile__ (
"ld r4,%2 \n\t" "ld r4,%3 \n\t"
"ld r5,%3 \n\t" "ld r5,%4 \n\t"
"1: ldarx r9, 0, %1 \n\t" "1: ldarx %1, 0, %2 \n\t"
" cmpd 0, r9, r4 \n\t" " cmpd 0, %1, r4 \n\t"
" bne- 2f \n\t" " bne- 2f \n\t"
" stdcx. r5, 0, %1 \n\t" " stdcx. r5, 0, %2 \n\t"
" bne- 1b \n\t" " bne- 1b \n\t"
"2: \n\t" "2: \n\t"
"xor r5,r4,r9 \n\t" "xor r5,r4,%1 \n\t"
"subfic r9,r5,0 \n\t" "subfic r9,r5,0 \n\t"
"adde %0,r9,r5 \n\t" "adde %0,r9,r5 \n\t"
: "=&r" (ret) : "=&r" (ret), "+r" (prev)
: "r"OPAL_ASM_ADDR(addr), : "r"OPAL_ASM_ADDR(addr),
"m"(oldval), "m"(newval) "m"(*oldval), "m"(newval)
: "r4", "r5", "r9", "cc", "memory"); : "r4", "r5", "r9", "cc", "memory");
*oldval = prev;
return ret; return (bool) ret;
}
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_bool_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_bool_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval);
} }
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */
#endif /* OPAL_ASM_SUPPORT_64BIT */ #endif /* OPAL_ASM_SUPPORT_64BIT */
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
bool rc;
rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
}
#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \
static inline int32_t opal_atomic_ ## type ## _32(volatile int32_t* v, int val) \ static inline int32_t opal_atomic_fetch_ ## type ## _32(volatile int32_t* v, int val) \
{ \ { \
int32_t t; \ int32_t t, old; \
\ \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: lwarx %0, 0, %3 \n\t" \ "1: lwarx %1, 0, %4 \n\t" \
" " #instr " %0, %2, %0 \n\t" \ " " #instr " %0, %3, %1 \n\t" \
" stwcx. %0, 0, %3 \n\t" \ " stwcx. %0, 0, %4 \n\t" \
" bne- 1b \n\t" \ " bne- 1b \n\t" \
: "=&r" (t), "=m" (*v) \ : "=&r" (t), "=&r" (old), "=m" (*v) \
: "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \ : "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \
: "cc"); \ : "cc"); \
\ \

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -38,9 +41,9 @@
*********************************************************************/ *********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
/********************************************************************** /**********************************************************************
@ -82,50 +85,49 @@ static inline void opal_atomic_isync(void)
*********************************************************************/ *********************************************************************/
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd) /* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
* *
* if (*(reg(rs1)) == reg(rs2) ) * if (*(reg(rs1)) == reg(rs2) )
* swap reg(rd), *(reg(rs1)) * swap reg(rd), *(reg(rs1))
* else * else
* reg(rd) = *(reg(rs1)) * reg(rd) = *(reg(rs1))
*/ */
int32_t ret = newval; int32_t prev = newval;
bool ret;
__asm__ __volatile__("casa [%1] " ASI_P ", %2, %0" __asm__ __volatile__("casa [%1] " ASI_P ", %2, %0"
: "+r" (ret) : "+r" (prev)
: "r" (addr), "r" (oldval)); : "r" (addr), "r" (*oldval));
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
bool rc; bool rc;
rc = opal_atomic_bool_cmpset_32(addr, oldval, newval); rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
opal_atomic_rmb(); opal_atomic_rmb();
return rc; return rc;
} }
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
opal_atomic_wmb(); opal_atomic_wmb();
return opal_atomic_bool_cmpset_32(addr, oldval, newval); return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval);
} }
#if OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 #if OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd) /* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
* *
@ -134,18 +136,20 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
* else * else
* reg(rd) = *(reg(rs1)) * reg(rd) = *(reg(rs1))
*/ */
int64_t ret = newval; int64_t prev = newval;
bool ret;
__asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0" __asm__ __volatile__("casxa [%1] " ASI_P ", %2, %0"
: "+r" (ret) : "+r" (prev)
: "r" (addr), "r" (oldval)); : "r" (addr), "r" (*oldval));
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
#else /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ #else /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
/* casa [reg(rs1)] %asi, reg(rs2), reg(rd) /* casa [reg(rs1)] %asi, reg(rs2), reg(rd)
* *
@ -155,40 +159,41 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
* reg(rd) = *(reg(rs1)) * reg(rd) = *(reg(rs1))
* *
*/ */
long long ret = newval; int64_t prev = newval;
bool ret;
__asm__ __volatile__( __asm__ __volatile__(
"ldx %0, %%g1 \n\t" /* g1 = ret */ "ldx %0, %%g1 \n\t" /* g1 = ret */
"ldx %2, %%g2 \n\t" /* g2 = oldval */ "ldx %2, %%g2 \n\t" /* g2 = oldval */
"casxa [%1] " ASI_P ", %%g2, %%g1 \n\t" "casxa [%1] " ASI_P ", %%g2, %%g1 \n\t"
"stx %%g1, %0 \n" "stx %%g1, %0 \n"
: "+m"(ret) : "+m"(prev)
: "r"(addr), "m"(oldval) : "r"(addr), "m"(*oldval)
: "%g1", "%g2" : "%g1", "%g2"
); );
return (ret == oldval); ret = (prev == *oldval);
*oldval = prev;
return ret;
} }
#endif /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */ #endif /* OPAL_ASSEMBLY_ARCH == OPAL_SPARCV9_64 */
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
bool rc; bool rc;
rc = opal_atomic_bool_cmpset_64(addr, oldval, newval); rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
opal_atomic_rmb(); opal_atomic_rmb();
return rc; return rc;
} }
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
opal_atomic_wmb(); opal_atomic_wmb();
return opal_atomic_bool_cmpset_64(addr, oldval, newval); return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval);
} }
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */

Просмотреть файл

@ -53,119 +53,110 @@ static inline void opal_atomic_wmb(void)
* *
*********************************************************************/ *********************************************************************/
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
static inline bool opal_atomic_bool_cmpset_acq_32( volatile int32_t *addr,
int32_t oldval, int32_t newval) static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
{ {
return __sync_bool_compare_and_swap(addr, oldval, newval); int32_t prev = __sync_val_compare_and_swap (add, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
} }
#define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
static inline bool opal_atomic_bool_cmpset_rel_32( volatile int32_t *addr, #define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
int32_t oldval, int32_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);}
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);
}
#define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1
static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta)
{ {
return __sync_add_and_fetch(addr, delta); return __sync_fetch_and_add(addr, delta);
} }
#define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1
static inline int32_t opal_atomic_and_32(volatile int32_t *addr, int32_t value) static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value)
{ {
return __sync_and_and_fetch(addr, value); return __sync_fetch_and_and(addr, value);
} }
#define OPAL_HAVE_ATOMIC_OR_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1
static inline int32_t opal_atomic_or_32(volatile int32_t *addr, int32_t value) static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value)
{ {
return __sync_or_and_fetch(addr, value); return __sync_fetch_and_or(addr, value);
} }
#define OPAL_HAVE_ATOMIC_XOR_32 1 #define OPAL_HAVE_ATOMIC_XOR_32 1
static inline int32_t opal_atomic_xor_32(volatile int32_t *addr, int32_t value) static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value)
{ {
return __sync_xor_and_fetch(addr, value); return __sync_fetch_and_xor(addr, value);
} }
#define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1
static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta)
{ {
return __sync_sub_and_fetch(addr, delta); return __sync_fetch_and_sub(addr, delta);
} }
#if OPAL_ASM_SYNC_HAVE_64BIT #if OPAL_ASM_SYNC_HAVE_64BIT
#define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
static inline bool opal_atomic_bool_cmpset_acq_64( volatile int64_t *addr,
int64_t oldval, int64_t newval) static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{ {
return __sync_bool_compare_and_swap(addr, oldval, newval); int64_t prev = __sync_val_compare_and_swap (add, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
} }
static inline bool opal_atomic_bool_cmpset_rel_64( volatile int64_t *addr, #define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64
int64_t oldval, int64_t newval) #define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64
{
return __sync_bool_compare_and_swap(addr, oldval, newval);}
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
return __sync_bool_compare_and_swap(addr, oldval, newval);
}
#define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_MATH_64 1
#define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1
static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta)
{ {
return __sync_add_and_fetch(addr, delta); return __sync_fetch_and_add(addr, delta);
} }
#define OPAL_HAVE_ATOMIC_AND_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1
static inline int64_t opal_atomic_and_64(volatile int64_t *addr, int64_t value) static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value)
{ {
return __sync_and_and_fetch(addr, value); return __sync_fetch_and_and(addr, value);
} }
#define OPAL_HAVE_ATOMIC_OR_64 1 #define OPAL_HAVE_ATOMIC_OR_64 1
static inline int64_t opal_atomic_or_64(volatile int64_t *addr, int64_t value) static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value)
{ {
return __sync_or_and_fetch(addr, value); return __sync_fetch_and_or(addr, value);
} }
#define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_XOR_64 1
static inline int64_t opal_atomic_xor_64(volatile int64_t *addr, int64_t value) static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value)
{ {
return __sync_xor_and_fetch(addr, value); return __sync_fetch_and_xor(addr, value);
} }
#define OPAL_HAVE_ATOMIC_SUB_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1
static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta)
{ {
return __sync_sub_and_fetch(addr, delta); return __sync_fetch_and_sub(addr, delta);
} }
#endif #endif
#if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 #if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr,
opal_int128_t oldval, opal_int128_t newval) opal_int128_t *oldval, opal_int128_t newval)
{ {
return __sync_bool_compare_and_swap(addr, oldval, newval); opal_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval);
bool ret = prev == *oldval;
*oldval = prev;
return ret;
} }
#define OPAL_HAVE_ATOMIC_CMPSET_128 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
#endif #endif

Просмотреть файл

@ -40,9 +40,9 @@
*********************************************************************/ *********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
/********************************************************************** /**********************************************************************
* *
@ -82,14 +82,13 @@ static inline void opal_atomic_isync(void)
*********************************************************************/ *********************************************************************/
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr, static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval)
int32_t oldval, int32_t newval)
{ {
unsigned char ret; unsigned char ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
SMPLOCK "cmpxchgl %3,%2 \n\t" SMPLOCK "cmpxchgl %3,%2 \n\t"
"sete %0 \n\t" "sete %0 \n\t"
: "=qm" (ret), "+a" (oldval), "+m" (*addr) : "=qm" (ret), "+a" (*oldval), "+m" (*addr)
: "q"(newval) : "q"(newval)
: "memory", "cc"); : "memory", "cc");
@ -98,19 +97,18 @@ static inline bool opal_atomic_bool_cmpset_32( volatile int32_t *addr,
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */
#define opal_atomic_bool_cmpset_acq_32 opal_atomic_bool_cmpset_32 #define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32
#define opal_atomic_bool_cmpset_rel_32 opal_atomic_bool_cmpset_32 #define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr, static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
int64_t oldval, int64_t newval)
{ {
unsigned char ret; unsigned char ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
SMPLOCK "cmpxchgq %3,%2 \n\t" SMPLOCK "cmpxchgq %3,%2 \n\t"
"sete %0 \n\t" "sete %0 \n\t"
: "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr)) : "=qm" (ret), "+a" (*oldval), "+m" (*((volatile long*)addr))
: "q"(newval) : "q"(newval)
: "memory", "cc" : "memory", "cc"
); );
@ -120,13 +118,12 @@ static inline bool opal_atomic_bool_cmpset_64( volatile int64_t *addr,
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */
#define opal_atomic_bool_cmpset_acq_64 opal_atomic_bool_cmpset_64 #define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64
#define opal_atomic_bool_cmpset_rel_64 opal_atomic_bool_cmpset_64 #define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64
#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T #if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T
static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, static inline bool opal_atomic_compare_exchange_strong_128 (volatile opal_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval)
opal_int128_t newval)
{ {
unsigned char ret; unsigned char ret;
@ -135,15 +132,14 @@ static inline bool opal_atomic_bool_cmpset_128 (volatile opal_int128_t *addr, op
* at the address is returned in eax:edx. */ * at the address is returned in eax:edx. */
__asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t" __asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t"
"sete %0 \n\t" "sete %0 \n\t"
: "=qm" (ret) : "=qm" (ret), "+a" (((int64_t *)oldval)[0]), "+d" (((int64_t *)oldval)[1])
: "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1])
"a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) : "memory", "cc", "eax", "edx");
: "memory", "cc");
return (bool) ret; return (bool) ret;
} }
#define OPAL_HAVE_ATOMIC_CMPSET_128 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */
@ -200,7 +196,7 @@ static inline int64_t opal_atomic_swap_64( volatile int64_t *addr,
* *
* Atomically adds @i to @v. * Atomically adds @i to @v.
*/ */
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) static inline int32_t opal_atomic_fetch_add_32(volatile int32_t* v, int i)
{ {
int ret = i; int ret = i;
__asm__ __volatile__( __asm__ __volatile__(
@ -209,7 +205,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
: :
:"memory", "cc" :"memory", "cc"
); );
return (ret+i); return ret;
} }
#define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1
@ -221,7 +217,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
* *
* Atomically adds @i to @v. * Atomically adds @i to @v.
*/ */
static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) static inline int64_t opal_atomic_fetch_add_64(volatile int64_t* v, int64_t i)
{ {
int64_t ret = i; int64_t ret = i;
__asm__ __volatile__( __asm__ __volatile__(
@ -230,7 +226,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
: :
:"memory", "cc" :"memory", "cc"
); );
return (ret+i); return ret;
} }
#define OPAL_HAVE_ATOMIC_SUB_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1
@ -242,7 +238,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i)
* *
* Atomically subtracts @i from @v. * Atomically subtracts @i from @v.
*/ */
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t* v, int i)
{ {
int ret = -i; int ret = -i;
__asm__ __volatile__( __asm__ __volatile__(
@ -251,7 +247,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
: :
:"memory", "cc" :"memory", "cc"
); );
return (ret-i); return ret;
} }
#define OPAL_HAVE_ATOMIC_SUB_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1
@ -263,7 +259,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i)
* *
* Atomically subtracts @i from @v. * Atomically subtracts @i from @v.
*/ */
static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i) static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t* v, int64_t i)
{ {
int64_t ret = -i; int64_t ret = -i;
__asm__ __volatile__( __asm__ __volatile__(
@ -272,7 +268,7 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i)
: :
:"memory", "cc" :"memory", "cc"
); );
return (ret-i); return ret;
} }
#endif /* OPAL_GCC_INLINE_ASSEMBLY */ #endif /* OPAL_GCC_INLINE_ASSEMBLY */

Просмотреть файл

@ -1119,7 +1119,7 @@ int mca_btl_openib_add_procs(
} }
if (nprocs_new) { if (nprocs_new) {
opal_atomic_add_32 (&openib_btl->num_peers, nprocs_new); opal_atomic_add_fetch_32 (&openib_btl->num_peers, nprocs_new);
/* adjust cq sizes given the new procs */ /* adjust cq sizes given the new procs */
rc = openib_btl_size_queues (openib_btl); rc = openib_btl_size_queues (openib_btl);
@ -1229,7 +1229,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
/* this is a new process to this openib btl /* this is a new process to this openib btl
* account this procs if need */ * account this procs if need */
opal_atomic_add_32 (&openib_btl->num_peers, 1); opal_atomic_add_fetch_32 (&openib_btl->num_peers, 1);
rc = openib_btl_size_queues(openib_btl); rc = openib_btl_size_queues(openib_btl);
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs")); BTL_ERROR(("error creating cqs"));

Просмотреть файл

@ -237,7 +237,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg)
/* Set the flag to fatal */ /* Set the flag to fatal */
device->got_fatal_event = true; device->got_fatal_event = true;
/* It is not critical to protect the counter */ /* It is not critical to protect the counter */
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1);
/* fall through */ /* fall through */
case IBV_EVENT_CQ_ERR: case IBV_EVENT_CQ_ERR:
case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_FATAL:
@ -280,7 +280,7 @@ static void btl_openib_async_device (int fd, short flags, void *arg)
openib_event_to_str((enum ibv_event_type)event_type)); openib_event_to_str((enum ibv_event_type)event_type));
/* Set the flag to indicate port error */ /* Set the flag to indicate port error */
device->got_port_event = true; device->got_port_event = true;
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); OPAL_THREAD_ADD_FETCH32(&mca_btl_openib_component.error_counter, 1);
break; break;
case IBV_EVENT_COMM_EST: case IBV_EVENT_COMM_EST:
case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_PORT_ACTIVE:
@ -470,7 +470,7 @@ void mca_btl_openib_async_fini (void)
void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device) void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device)
{ {
if (mca_btl_openib_component.async_evbase) { if (mca_btl_openib_component.async_evbase) {
if (1 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, 1)) { if (1 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, 1)) {
mca_btl_openib_async_init (); mca_btl_openib_async_init ();
} }
opal_event_set (mca_btl_openib_component.async_evbase, &device->async_event, opal_event_set (mca_btl_openib_component.async_evbase, &device->async_event,
@ -484,7 +484,7 @@ void mca_btl_openib_async_rem_device (mca_btl_openib_device_t *device)
{ {
if (mca_btl_openib_component.async_evbase) { if (mca_btl_openib_component.async_evbase) {
opal_event_del (&device->async_event); opal_event_del (&device->async_event);
if (0 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, -1)) { if (0 == OPAL_THREAD_ADD_FETCH32 (&btl_openib_async_device_count, -1)) {
mca_btl_openib_async_fini (); mca_btl_openib_async_fini ();
} }
} }

Просмотреть файл

@ -3203,7 +3203,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
credits = hdr->credits; credits = hdr->credits;
if(hdr->cm_seen) if(hdr->cm_seen)
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
/* Now return fragment. Don't touch hdr after this point! */ /* Now return fragment. Don't touch hdr after this point! */
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) { if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
@ -3215,7 +3215,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(ep, erl->tail); tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(ep, erl->tail);
if(MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(tf)) if(MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(tf))
break; break;
OPAL_THREAD_ADD32(&erl->credits, 1); OPAL_THREAD_ADD_FETCH32(&erl->credits, 1);
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(erl->tail); MCA_BTL_OPENIB_RDMA_NEXT_INDEX(erl->tail);
} }
OPAL_THREAD_UNLOCK(&erl->lock); OPAL_THREAD_UNLOCK(&erl->lock);
@ -3233,14 +3233,14 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
MCA_BTL_IB_FRAG_RETURN(frag); MCA_BTL_IB_FRAG_RETURN(frag);
if (BTL_OPENIB_QP_TYPE_PP(rqp)) { if (BTL_OPENIB_QP_TYPE_PP(rqp)) {
if (OPAL_UNLIKELY(is_credit_msg)) { if (OPAL_UNLIKELY(is_credit_msg)) {
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_received, 1); OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_received, 1);
} else { } else {
OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
} }
mca_btl_openib_endpoint_post_rr(ep, cqp); mca_btl_openib_endpoint_post_rr(ep, cqp);
} else { } else {
mca_btl_openib_module_t *btl = ep->endpoint_btl; mca_btl_openib_module_t *btl = ep->endpoint_btl;
OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
mca_btl_openib_post_srr(btl, rqp); mca_btl_openib_post_srr(btl, rqp);
} }
} }
@ -3251,10 +3251,10 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
/* If we got any credits (RDMA or send), then try to progress all /* If we got any credits (RDMA or send), then try to progress all
the no_credits_pending_frags lists */ the no_credits_pending_frags lists */
if (rcredits > 0) { if (rcredits > 0) {
OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits); OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits);
} }
if (credits > 0) { if (credits > 0) {
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
} }
if (rcredits + credits > 0) { if (rcredits + credits > 0) {
int rc; int rc;
@ -3303,7 +3303,7 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
credits = hdr->credits; credits = hdr->credits;
if(hdr->cm_seen) if(hdr->cm_seen)
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen); OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
/* We should not be here with eager, control, or credit messages */ /* We should not be here with eager, control, or credit messages */
assert(openib_frag_type(frag) != MCA_BTL_OPENIB_FRAG_EAGER_RDMA); assert(openib_frag_type(frag) != MCA_BTL_OPENIB_FRAG_EAGER_RDMA);
@ -3314,11 +3314,11 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
/* Otherwise, FRAG_RETURN it and repost if necessary */ /* Otherwise, FRAG_RETURN it and repost if necessary */
MCA_BTL_IB_FRAG_RETURN(frag); MCA_BTL_IB_FRAG_RETURN(frag);
if (BTL_OPENIB_QP_TYPE_PP(rqp)) { if (BTL_OPENIB_QP_TYPE_PP(rqp)) {
OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1); OPAL_THREAD_ADD_FETCH32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
mca_btl_openib_endpoint_post_rr(ep, cqp); mca_btl_openib_endpoint_post_rr(ep, cqp);
} else { } else {
mca_btl_openib_module_t *btl = ep->endpoint_btl; mca_btl_openib_module_t *btl = ep->endpoint_btl;
OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1); OPAL_THREAD_ADD_FETCH32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
mca_btl_openib_post_srr(btl, rqp); mca_btl_openib_post_srr(btl, rqp);
} }
@ -3327,10 +3327,10 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl,
/* If we got any credits (RDMA or send), then try to progress all /* If we got any credits (RDMA or send), then try to progress all
the no_credits_pending_frags lists */ the no_credits_pending_frags lists */
if (rcredits > 0) { if (rcredits > 0) {
OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits); OPAL_THREAD_ADD_FETCH32(&ep->eager_rdma_remote.tokens, rcredits);
} }
if (credits > 0) { if (credits > 0) {
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits); OPAL_THREAD_ADD_FETCH32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
} }
if (rcredits + credits > 0) { if (rcredits + credits > 0) {
int rc; int rc;
@ -3523,7 +3523,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
case IBV_WC_FETCH_ADD: case IBV_WC_FETCH_ADD:
OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE")); OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE"));
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1); OPAL_THREAD_ADD_FETCH32(&endpoint->get_tokens, 1);
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des); mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
@ -3575,7 +3575,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
n = qp_frag_to_wqe(endpoint, qp, to_com_frag(des)); n = qp_frag_to_wqe(endpoint, qp, to_com_frag(des));
if(IBV_WC_SEND == wc->opcode && !BTL_OPENIB_QP_TYPE_PP(qp)) { if(IBV_WC_SEND == wc->opcode && !BTL_OPENIB_QP_TYPE_PP(qp)) {
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n); OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1+n);
/* new SRQ credit available. Try to progress pending frags*/ /* new SRQ credit available. Try to progress pending frags*/
progress_pending_frags_srq(openib_btl, qp); progress_pending_frags_srq(openib_btl, qp);
@ -3601,7 +3601,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
wc->byte_len < mca_btl_openib_component.eager_limit && wc->byte_len < mca_btl_openib_component.eager_limit &&
openib_btl->eager_rdma_channels < openib_btl->eager_rdma_channels <
mca_btl_openib_component.max_eager_rdma && mca_btl_openib_component.max_eager_rdma &&
OPAL_THREAD_ADD32(&endpoint->eager_recv_count, 1) == OPAL_THREAD_ADD_FETCH32(&endpoint->eager_recv_count, 1) ==
mca_btl_openib_component.eager_rdma_threshold) { mca_btl_openib_component.eager_rdma_threshold) {
mca_btl_openib_endpoint_connect_eager_rdma(endpoint); mca_btl_openib_endpoint_connect_eager_rdma(endpoint);
} }
@ -3934,7 +3934,7 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp)
if(OPAL_LIKELY(0 == rc)) { if(OPAL_LIKELY(0 == rc)) {
struct ibv_srq_attr srq_attr; struct ibv_srq_attr srq_attr;
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post); OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.rd_posted, num_post);
if(true == openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag) { if(true == openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag) {
srq_attr.max_wr = openib_btl->qps[qp].u.srq_qp.rd_curr_num; srq_attr.max_wr = openib_btl->qps[qp].u.srq_qp.rd_curr_num;

Просмотреть файл

@ -96,7 +96,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \ #define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \
do { \ do { \
(SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \ (SEQ) = OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1; \
(OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \ (OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \
} while(0) } while(0)
@ -108,7 +108,7 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \ #define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \
do { \ do { \
(OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \ (OLD_HEAD) = (OPAL_THREAD_ADD_FETCH32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \
} while(0) } while(0)
#endif #endif

Просмотреть файл

@ -212,7 +212,7 @@ endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp)
qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */ qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */
rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP); rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP);
if (0 == rc) { if (0 == rc) {
opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr); opal_atomic_add_fetch_32 (&ep_qp->qp->sd_wqe, incr);
} }
} else { } else {
ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe; ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe;
@ -373,11 +373,12 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
/* Release memory resources */ /* Release memory resources */
do { do {
void *_tmp_ptr = NULL;
/* Make sure that mca_btl_openib_endpoint_connect_eager_rdma () /* Make sure that mca_btl_openib_endpoint_connect_eager_rdma ()
* was not in "connect" or "bad" flow (failed to allocate memory) * was not in "connect" or "bad" flow (failed to allocate memory)
* and changed the pointer back to NULL * and changed the pointer back to NULL
*/ */
if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) { if(!opal_atomic_compare_exchange_strong_ptr(&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, (void *) 1)) {
if (NULL != endpoint->eager_rdma_local.reg) { if (NULL != endpoint->eager_rdma_local.reg) {
endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache, endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache,
&endpoint->eager_rdma_local.reg->base); &endpoint->eager_rdma_local.reg->base);
@ -766,9 +767,9 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) { if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) {
do_rdma = true; do_rdma = true;
} else { } else {
if(OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) > if(OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) >
(mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) { (mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp); BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
return; return;
} }
@ -781,7 +782,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
if(cm_return > 255) { if(cm_return > 255) {
frag->hdr->cm_seen = 255; frag->hdr->cm_seen = 255;
cm_return -= 255; cm_return -= 255;
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
} else { } else {
frag->hdr->cm_seen = cm_return; frag->hdr->cm_seen = cm_return;
} }
@ -802,14 +803,14 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr); BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr);
} }
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp); BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.rd_credits,
frag->hdr->credits); frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits,
credits_hdr->rdma_credits); credits_hdr->rdma_credits);
if(do_rdma) if(do_rdma)
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
else else
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1); OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
BTL_ERROR(("error posting send request errno %d says %s", rc, BTL_ERROR(("error posting send request errno %d says %s", rc,
strerror(errno))); strerror(errno)));
@ -823,7 +824,7 @@ static void mca_btl_openib_endpoint_eager_rdma_connect_cb(
int status) int status)
{ {
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
OPAL_THREAD_ADD32(&device->non_eager_rdma_endpoints, -1); OPAL_THREAD_ADD_FETCH32(&device->non_eager_rdma_endpoints, -1);
assert(device->non_eager_rdma_endpoints >= 0); assert(device->non_eager_rdma_endpoints >= 0);
MCA_BTL_IB_FRAG_RETURN(descriptor); MCA_BTL_IB_FRAG_RETURN(descriptor);
} }
@ -894,12 +895,14 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
mca_btl_openib_recv_frag_t *headers_buf; mca_btl_openib_recv_frag_t *headers_buf;
int i, rc; int i, rc;
uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS; uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS;
void *_tmp_ptr = NULL;
/* Set local rdma pointer to 1 temporarily so other threads will not try /* Set local rdma pointer to 1 temporarily so other threads will not try
* to enter the function */ * to enter the function */
if(!opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, if(!opal_atomic_compare_exchange_strong_ptr (&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr,
(void*)1)) (void *) 1)) {
return; return;
}
headers_buf = (mca_btl_openib_recv_frag_t*) headers_buf = (mca_btl_openib_recv_frag_t*)
malloc(sizeof(mca_btl_openib_recv_frag_t) * malloc(sizeof(mca_btl_openib_recv_frag_t) *
@ -975,22 +978,23 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1; endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1;
/* set local rdma pointer to real value */ /* set local rdma pointer to real value */
(void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, endpoint->eager_rdma_local.base.pval = buf;
(void*)1, buf);
endpoint->eager_rdma_local.alloc_base = alloc_base; endpoint->eager_rdma_local.alloc_base = alloc_base;
if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) { if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) {
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
mca_btl_openib_endpoint_t **p; mca_btl_openib_endpoint_t **p;
void *_tmp_ptr;
OBJ_RETAIN(endpoint); OBJ_RETAIN(endpoint);
assert(((opal_object_t*)endpoint)->obj_reference_count == 2); assert(((opal_object_t*)endpoint)->obj_reference_count == 2);
do { do {
_tmp_ptr = NULL;
p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count]; p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count];
} while(!opal_atomic_bool_cmpset_ptr(p, NULL, endpoint)); } while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint));
OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1); OPAL_THREAD_ADD_FETCH32(&openib_btl->eager_rdma_channels, 1);
/* from this point progress function starts to poll new buffer */ /* from this point progress function starts to poll new buffer */
OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1); OPAL_THREAD_ADD_FETCH32(&device->eager_rdma_buffers_count, 1);
return; return;
} }
@ -1001,8 +1005,7 @@ free_headers_buf:
free(headers_buf); free(headers_buf);
unlock_rdma_local: unlock_rdma_local:
/* set local rdma pointer back to zero. Will retry later */ /* set local rdma pointer back to zero. Will retry later */
(void)opal_atomic_bool_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, endpoint->eager_rdma_local.base.pval = NULL;
endpoint->eager_rdma_local.base.pval, NULL);
endpoint->eager_rdma_local.frags = NULL; endpoint->eager_rdma_local.frags = NULL;
} }

Просмотреть файл

@ -277,19 +277,19 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
static inline int32_t qp_get_wqe(mca_btl_openib_endpoint_t *ep, const int qp) static inline int32_t qp_get_wqe(mca_btl_openib_endpoint_t *ep, const int qp)
{ {
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, -1); return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, -1);
} }
static inline int32_t qp_put_wqe(mca_btl_openib_endpoint_t *ep, const int qp) static inline int32_t qp_put_wqe(mca_btl_openib_endpoint_t *ep, const int qp)
{ {
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, 1); return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, 1);
} }
static inline int32_t qp_inc_inflight_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag) static inline int32_t qp_inc_inflight_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag)
{ {
frag->n_wqes_inflight = 0; frag->n_wqes_inflight = 0;
return OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe_inflight, 1); return OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe_inflight, 1);
} }
static inline void qp_inflight_wqe_to_frag(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag) static inline void qp_inflight_wqe_to_frag(mca_btl_openib_endpoint_t *ep, const int qp, mca_btl_openib_com_frag_t *frag)
@ -303,7 +303,7 @@ static inline int qp_frag_to_wqe(mca_btl_openib_endpoint_t *ep, const int qp, mc
{ {
int n; int n;
n = frag->n_wqes_inflight; n = frag->n_wqes_inflight;
OPAL_THREAD_ADD32(&ep->qps[qp].qp->sd_wqe, n); OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].qp->sd_wqe, n);
frag->n_wqes_inflight = 0; frag->n_wqes_inflight = 0;
return n; return n;
@ -420,15 +420,15 @@ static inline int mca_btl_openib_endpoint_post_rr_nolock(
if((rc = post_recvs(ep, qp, num_post)) != OPAL_SUCCESS) { if((rc = post_recvs(ep, qp, num_post)) != OPAL_SUCCESS) {
return rc; return rc;
} }
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_posted, num_post); OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_posted, num_post);
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.rd_credits, num_post); OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.rd_credits, num_post);
/* post buffers for credit management on credit management qp */ /* post buffers for credit management on credit management qp */
if((rc = post_recvs(ep, cqp, cm_received)) != OPAL_SUCCESS) { if((rc = post_recvs(ep, cqp, cm_received)) != OPAL_SUCCESS) {
return rc; return rc;
} }
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_return, cm_received); OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_return, cm_received);
OPAL_THREAD_ADD32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received); OPAL_THREAD_ADD_FETCH32(&ep->qps[qp].u.pp_qp.cm_received, -cm_received);
assert(ep->qps[qp].u.pp_qp.rd_credits <= rd_num && assert(ep->qps[qp].u.pp_qp.rd_credits <= rd_num &&
ep->qps[qp].u.pp_qp.rd_credits >= 0); ep->qps[qp].u.pp_qp.rd_credits >= 0);
@ -446,14 +446,16 @@ static inline int mca_btl_openib_endpoint_post_rr(
return ret; return ret;
} }
#define BTL_OPENIB_CREDITS_SEND_TRYLOCK(E, Q) \ static inline __opal_attribute_always_inline__ bool btl_openib_credits_send_trylock (mca_btl_openib_endpoint_t *ep, int qp)
OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 0, 1) {
#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \ int32_t _tmp_value = 0;
OPAL_ATOMIC_BOOL_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0) return OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32(&ep->qps[qp].rd_credit_send_lock, &_tmp_value, 1);
#define BTL_OPENIB_GET_CREDITS(FROM, TO) \ }
do { \
TO = FROM; \ #define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \
} while(0 == OPAL_ATOMIC_BOOL_CMPSET_32(&FROM, TO, 0)) OPAL_ATOMIC_SWAP_32 (&(E)->qps[(Q)].rd_credit_send_lock, 0)
#define BTL_OPENIB_GET_CREDITS(FROM, TO) \
TO = OPAL_ATOMIC_SWAP_32(&FROM, 0)
static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep) static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep)
@ -486,7 +488,7 @@ static inline void send_credits(mca_btl_openib_endpoint_t *ep, int qp)
return; return;
try_send: try_send:
if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp)) if(btl_openib_credits_send_trylock(ep, qp))
mca_btl_openib_endpoint_send_credits(ep, qp); mca_btl_openib_endpoint_send_credits(ep, qp);
} }
@ -530,8 +532,8 @@ ib_send_flags(uint32_t size, mca_btl_openib_endpoint_qp_t *qp, int do_signal)
static inline int static inline int
acquire_eager_rdma_send_credit(mca_btl_openib_endpoint_t *endpoint) acquire_eager_rdma_send_credit(mca_btl_openib_endpoint_t *endpoint)
{ {
if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) { if(OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
@ -636,8 +638,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
prio = !prio; prio = !prio;
if (BTL_OPENIB_QP_TYPE_PP(qp)) { if (BTL_OPENIB_QP_TYPE_PP(qp)) {
if (OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) { if (OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
if (queue_frag) { if (queue_frag) {
opal_list_append(&endpoint->qps[qp].no_credits_pending_frags[prio], opal_list_append(&endpoint->qps[qp].no_credits_pending_frags[prio],
(opal_list_item_t *)frag); (opal_list_item_t *)frag);
@ -646,8 +648,8 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
} else { } else {
if(OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) { if(OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, -1) < 0) {
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
if (queue_frag) { if (queue_frag) {
OPAL_THREAD_LOCK(&openib_btl->ib_lock); OPAL_THREAD_LOCK(&openib_btl->ib_lock);
opal_list_append(&openib_btl->qps[qp].u.srq_qp.pending_frags[prio], opal_list_append(&openib_btl->qps[qp].u.srq_qp.pending_frags[prio],
@ -682,7 +684,7 @@ static inline int mca_btl_openib_endpoint_credit_acquire (struct mca_btl_base_en
if(cm_return > 255) { if(cm_return > 255) {
hdr->cm_seen = 255; hdr->cm_seen = 255;
cm_return -= 255; cm_return -= 255;
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
} else { } else {
hdr->cm_seen = cm_return; hdr->cm_seen = cm_return;
} }
@ -697,18 +699,18 @@ static inline void mca_btl_openib_endpoint_credit_release (struct mca_btl_base_e
mca_btl_openib_header_t *hdr = frag->hdr; mca_btl_openib_header_t *hdr = frag->hdr;
if (BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) { if (BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits)); OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits, BTL_OPENIB_CREDITS(hdr->credits));
} }
if (do_rdma) { if (do_rdma) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
} else { } else {
if(BTL_OPENIB_QP_TYPE_PP(qp)) { if(BTL_OPENIB_QP_TYPE_PP(qp)) {
OPAL_THREAD_ADD32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits); OPAL_THREAD_ADD_FETCH32 (&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits);
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
} else if BTL_OPENIB_QP_TYPE_SRQ(qp){ } else if BTL_OPENIB_QP_TYPE_SRQ(qp){
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl; mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); OPAL_THREAD_ADD_FETCH32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
} }
} }
} }

Просмотреть файл

@ -148,9 +148,9 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
} }
/* check for a get token */ /* check for a get token */
if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) { if (OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,-1) < 0) {
qp_put_wqe(ep, qp); qp_put_wqe(ep, qp);
OPAL_THREAD_ADD32(&ep->get_tokens,1); OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1);
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
@ -159,7 +159,7 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) { if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) {
qp_put_wqe(ep, qp); qp_put_wqe(ep, qp);
OPAL_THREAD_ADD32(&ep->get_tokens,1); OPAL_THREAD_ADD_FETCH32(&ep->get_tokens,1);
return OPAL_ERROR; return OPAL_ERROR;
} }

Просмотреть файл

@ -423,7 +423,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
curr_proc, curr_proc,
&btl_peer_data[i]); &btl_peer_data[i]);
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, 1);
/* and here we can reach */ /* and here we can reach */
opal_bitmap_set_bit(reachable, i); opal_bitmap_set_bit(reachable, i);
@ -476,7 +476,7 @@ mca_btl_portals4_del_procs(struct mca_btl_base_module_t *btl,
portals4 entry in proc_endpoints instead of the peer_data */ portals4 entry in proc_endpoints instead of the peer_data */
for (i = 0 ; i < nprocs ; ++i) { for (i = 0 ; i < nprocs ; ++i) {
free(btl_peer_data[i]); free(btl_peer_data[i]);
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, -1);
} }
if (0 == portals4_btl->portals_num_procs) if (0 == portals4_btl->portals_num_procs)
@ -537,7 +537,7 @@ mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
if (frag->me_h != PTL_INVALID_HANDLE) { if (frag->me_h != PTL_INVALID_HANDLE) {
frag->me_h = PTL_INVALID_HANDLE; frag->me_h = PTL_INVALID_HANDLE;
} }
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); "mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag); OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag);
@ -622,7 +622,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
return NULL; return NULL;
} }
handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1); handle->key = OPAL_THREAD_ADD_FETCH64(&(portals4_btl->portals_rdma_key), 1);
handle->remote_offset = 0; handle->remote_offset = 0;
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
@ -662,7 +662,7 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
opal_output_verbose(1, opal_btl_base_framework.framework_output, opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n", "%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
return NULL; return NULL;
} }
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,

Просмотреть файл

@ -609,7 +609,7 @@ mca_btl_portals4_component_progress(void)
mca_btl_portals4_free(&portals4_btl->super, &frag->base); mca_btl_portals4_free(&portals4_btl->super, &frag->base);
} }
if (0 != frag->size) { if (0 != frag->size) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n", "PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
portals4_btl->portals_outstanding_ops)); portals4_btl->portals_outstanding_ops));
@ -646,7 +646,7 @@ mca_btl_portals4_component_progress(void)
} }
if (0 != frag->size) { if (0 != frag->size) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops)); "PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
} }
@ -749,7 +749,7 @@ mca_btl_portals4_component_progress(void)
OPAL_SUCCESS); OPAL_SUCCESS);
OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag); OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
goto done; goto done;

Просмотреть файл

@ -53,16 +53,16 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
int ret; int ret;
/* reserve space in the event queue for rdma operations immediately */ /* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) { portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n")); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
mca_btl_portals4_component_progress(); mca_btl_portals4_component_progress();
} }
OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag); OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
if (NULL == frag){ if (NULL == frag){
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
return OPAL_ERROR; return OPAL_ERROR;
} }
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,

Просмотреть файл

@ -49,9 +49,9 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type); BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);
/* reserve space in the event queue for rdma operations immediately */ /* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) { portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"Call to mca_btl_portals4_component_progress (4)\n")); "Call to mca_btl_portals4_component_progress (4)\n"));
mca_btl_portals4_component_progress(); mca_btl_portals4_component_progress();

Просмотреть файл

@ -636,7 +636,7 @@ int mca_btl_smcuda_add_procs(
/* Sync with other local procs. Force the FIFO initialization to always /* Sync with other local procs. Force the FIFO initialization to always
* happens before the readers access it. * happens before the readers access it.
*/ */
(void)opal_atomic_add_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1); (void)opal_atomic_add_fetch_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
while( n_local_procs > while( n_local_procs >
mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) { mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) {
opal_progress(); opal_progress();
@ -976,7 +976,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
* the return code indicates failure, the write has still "completed" from * the return code indicates failure, the write has still "completed" from
* our point of view: it has been posted to a "pending send" queue. * our point of view: it has been posted to a "pending send" queue.
*/ */
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
(void)rc; /* this is safe to ignore as the message is requeued till success */ (void)rc; /* this is safe to ignore as the message is requeued till success */
@ -1026,7 +1026,7 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
* post the descriptor in the queue - post with the relative * post the descriptor in the queue - post with the relative
* address * address
*/ */
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
if( OPAL_LIKELY(0 == rc) ) { if( OPAL_LIKELY(0 == rc) ) {
@ -1241,7 +1241,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b
* the return code indicates failure, the write has still "completed" from * the return code indicates failure, the write has still "completed" from
* our point of view: it has been posted to a "pending send" queue. * our point of view: it has been posted to a "pending send" queue.
*/ */
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output,
"Sending CUDA IPC REQ (try=%d): myrank=%d, mydev=%d, peerrank=%d", "Sending CUDA IPC REQ (try=%d): myrank=%d, mydev=%d, peerrank=%d",
endpoint->ipctries, endpoint->ipctries,

Просмотреть файл

@ -658,7 +658,7 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl,
* the return code indicates failure, the write has still "completed" from * the return code indicates failure, the write has still "completed" from
* our point of view: it has been posted to a "pending send" queue. * our point of view: it has been posted to a "pending send" queue.
*/ */
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1); OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank, MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
@ -980,7 +980,7 @@ void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep)
if(NULL == si) return; /* Another thread got in before us. Thats ok. */ if(NULL == si) return; /* Another thread got in before us. Thats ok. */
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, -1); OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, -1);
MCA_BTL_SMCUDA_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data, MCA_BTL_SMCUDA_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data,
true, false, rc); true, false, rc);
@ -1093,7 +1093,7 @@ int mca_btl_smcuda_component_progress(void)
if( btl_ownership ) { if( btl_ownership ) {
MCA_BTL_SMCUDA_FRAG_RETURN(frag); MCA_BTL_SMCUDA_FRAG_RETURN(frag);
} }
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, -1); OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, -1);
if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) { if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) {
btl_smcuda_process_pending_sends(endpoint); btl_smcuda_process_pending_sends(endpoint);
} }

Просмотреть файл

@ -40,7 +40,7 @@ add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend)
si = (btl_smcuda_pending_send_item_t*)i; si = (btl_smcuda_pending_send_item_t*)i;
si->data = data; si->data = data;
OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, +1); OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, +1);
/* if data was on pending send list then prepend it to the list to /* if data was on pending send list then prepend it to the list to
* minimize reordering */ * minimize reordering */

Просмотреть файл

@ -272,7 +272,7 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags); rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,1); opal_atomic_add_fetch_32(&ugni_module->reg_count,1);
} }
return rc; return rc;
@ -286,7 +286,7 @@ ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg); rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,-1); opal_atomic_add_fetch_32(&ugni_module->reg_count,-1);
} }
return rc; return rc;

Просмотреть файл

@ -543,7 +543,7 @@ int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device)
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
ep->dg_posted = false; ep->dg_posted = false;
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1); (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, -1);
} }
(void) mca_btl_ugni_ep_connect_progress (ep); (void) mca_btl_ugni_ep_connect_progress (ep);

Просмотреть файл

@ -181,7 +181,7 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec
} }
} while (device->dev_smsg_local_cq.active_operations); } while (device->dev_smsg_local_cq.active_operations);
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1); (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
} }
mca_btl_ugni_device_lock (device); mca_btl_ugni_device_lock (device);
@ -278,7 +278,7 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl; ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED; ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1); (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
/* send all pending messages */ /* send all pending messages */
BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list))); BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
@ -312,7 +312,7 @@ static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
ep->remote_attr, sizeof (*ep->remote_attr), ep->remote_attr, sizeof (*ep->remote_attr),
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index); MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) { if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) {
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, 1); (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1);
} }
return mca_btl_rc_ugni_to_opal (rc); return mca_btl_rc_ugni_to_opal (rc);

Просмотреть файл

@ -192,7 +192,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in
opal_atomic_mb (); opal_atomic_mb ();
ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1); ref_cnt = OPAL_THREAD_ADD_FETCH32(&frag->ref_cnt, -1);
if (ref_cnt) { if (ref_cnt) {
assert (ref_cnt > 0); assert (ref_cnt > 0);
return false; return false;

Просмотреть файл

@ -59,12 +59,13 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
mca_btl_ugni_base_frag_t frag; mca_btl_ugni_base_frag_t frag;
mca_btl_base_segment_t seg; mca_btl_base_segment_t seg;
bool disconnect = false; bool disconnect = false;
int32_t _tmp_value = 0;
uintptr_t data_ptr; uintptr_t data_ptr;
gni_return_t rc; gni_return_t rc;
uint32_t len; uint32_t len;
int count = 0; int count = 0;
if (!opal_atomic_bool_cmpset_32 (&ep->smsg_progressing, 0, 1)) { if (!opal_atomic_compare_exchange_strong_32 (&ep->smsg_progressing, &_tmp_value, 1)) {
/* already progressing (we can't support reentry here) */ /* already progressing (we can't support reentry here) */
return 0; return 0;
} }

Просмотреть файл

@ -261,14 +261,14 @@ static inline bool mca_btl_vader_check_fboxes (void)
static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr) static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr)
{ {
if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_SIZE_T (&ep->send_count, 1))) { if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_FETCH_SIZE_T (&ep->send_count, 1))) {
/* protect access to mca_btl_vader_component.segment_offset */ /* protect access to mca_btl_vader_component.segment_offset */
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size && if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size &&
mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) { mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) {
/* verify the remote side will accept another fbox */ /* verify the remote side will accept another fbox */
if (0 <= opal_atomic_add_32 (&ep->fifo->fbox_available, -1)) { if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset;
mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size; mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size;
@ -280,7 +280,7 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc
hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer); hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer);
++mca_btl_vader_component.fbox_count; ++mca_btl_vader_component.fbox_count;
} else { } else {
opal_atomic_add_32 (&ep->fifo->fbox_available, 1); opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1);
} }
opal_atomic_wmb (); opal_atomic_wmb ();

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2014 Los Alamos National Security, LLC. * Copyright (c) 2010-2017 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -30,8 +30,9 @@
#include "btl_vader_endpoint.h" #include "btl_vader_endpoint.h"
#include "btl_vader_frag.h" #include "btl_vader_frag.h"
#define vader_item_compare_exchange(x, y, z) opal_atomic_compare_exchange_strong_ptr ((volatile void **) (x), (void **) (y), (void *) (z))
#if SIZEOF_VOID_P == 8 #if SIZEOF_VOID_P == 8
#define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z))
#define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y)) #define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y))
#define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll #define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll
@ -40,7 +41,6 @@
typedef int64_t fifo_value_t; typedef int64_t fifo_value_t;
#else #else
#define vader_item_cmpset(x, y, z) opal_atomic_bool_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z))
#define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y)) #define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y))
#define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl #define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl
@ -138,7 +138,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m
if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) { if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) {
opal_atomic_rmb(); opal_atomic_rmb();
if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) { if (!vader_item_compare_exchange (&fifo->fifo_tail, &value, VADER_FIFO_FREE)) {
while (VADER_FIFO_FREE == hdr->next) { while (VADER_FIFO_FREE == hdr->next) {
opal_atomic_rmb (); opal_atomic_rmb ();
} }

Просмотреть файл

@ -54,7 +54,7 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
vader_ctx->reg[0] = reg; vader_ctx->reg[0] = reg;
if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) { if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) {
(void)opal_atomic_add (&reg->ref_count, 1); opal_atomic_add (&reg->ref_count, 1);
return 1; return 1;
} }
@ -93,7 +93,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
/* start the new segment from the lower of the two bases */ /* start the new segment from the lower of the two bases */
base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base;
if (OPAL_LIKELY(0 == opal_atomic_add_32 (&reg->ref_count, -1))) { if (OPAL_LIKELY(0 == opal_atomic_add_fetch_32 (&reg->ref_count, -1))) {
/* this pointer is not in use */ /* this pointer is not in use */
(void) xpmem_detach (reg->rcache_context); (void) xpmem_detach (reg->rcache_context);
OBJ_RELEASE(reg); OBJ_RELEASE(reg);
@ -143,7 +143,7 @@ void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
int32_t ref_count; int32_t ref_count;
ref_count = opal_atomic_add_32 (&reg->ref_count, -1); ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
/* protect rcache access */ /* protect rcache access */
mca_rcache_base_vma_delete (vma_module, reg); mca_rcache_base_vma_delete (vma_module, reg);

Просмотреть файл

@ -131,7 +131,7 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp,
} }
/* increment the number of processes that are attached to the segment. */ /* increment the number of processes that are attached to the segment. */
(void)opal_atomic_add_size_t(&map->module_seg->seg_num_procs_inited, 1); (void)opal_atomic_add_fetch_size_t(&map->module_seg->seg_num_procs_inited, 1);
/* commit the changes before we return */ /* commit the changes before we return */
opal_atomic_wmb(); opal_atomic_wmb();

Просмотреть файл

@ -131,7 +131,7 @@ void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep)
if (huge_page->path) { if (huge_page->path) {
int32_t count; int32_t count;
count = opal_atomic_add_32 (&huge_page->count, 1); count = opal_atomic_add_fetch_32 (&huge_page->count, 1);
rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path, rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path,
getpid (), count); getpid (), count);

Просмотреть файл

@ -232,7 +232,7 @@ static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_
/* This segment fits fully within an existing segment. */ /* This segment fits fully within an existing segment. */
rcache_grdma->stat_cache_hit++; rcache_grdma->stat_cache_hit++;
int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1); int32_t ref_cnt = opal_atomic_add_fetch_32 (&grdma_reg->ref_count, 1);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
"returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt)); "returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
(void)ref_cnt; (void)ref_cnt;
@ -296,7 +296,7 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
/* get updated access flags */ /* get updated access flags */
access_flags = find_args.access_flags; access_flags = find_args.access_flags;
OPAL_THREAD_ADD32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1); OPAL_THREAD_ADD_FETCH32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1);
} }
item = opal_free_list_get_mt (&rcache_grdma->reg_list); item = opal_free_list_get_mt (&rcache_grdma->reg_list);
@ -380,7 +380,7 @@ static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr,
(opal_list_item_t*)(*reg)); (opal_list_item_t*)(*reg));
} }
rcache_grdma->stat_cache_found++; rcache_grdma->stat_cache_found++;
opal_atomic_add_32 (&(*reg)->ref_count, 1); opal_atomic_add_fetch_32 (&(*reg)->ref_count, 1);
} else { } else {
rcache_grdma->stat_cache_notfound++; rcache_grdma->stat_cache_notfound++;
} }
@ -398,7 +398,7 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
int rc; int rc;
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock); opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
ref_count = opal_atomic_add_32 (&reg->ref_count, -1); ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
"returning registration %p, remaining references %d", (void *) reg, ref_count)); "returning registration %p, remaining references %d", (void *) reg, ref_count));

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше