aac611237b
This commit expands the OPAL_THREAD macros to include 32- and 64-bit atomic swap. Additionally, macro declararations have been updated to include both OPAL_THREAD_* and OPAL_ATOMIC_*. Before this commit the former was used with add and the later with cmpset. Signed-off-by: Nathan Hjelm <hjelmn@me.com>
329 строки
8.1 KiB
C
329 строки
8.1 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
|
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2014 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "opal/sys/atomic.h"
|
|
#include "ompi/mca/osc/osc.h"
|
|
#include "ompi/mca/osc/base/base.h"
|
|
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
|
|
|
|
#include "osc_sm.h"
|
|
|
|
/**
|
|
* compare_ranks:
|
|
*
|
|
* @param[in] ptra Pointer to integer item
|
|
* @param[in] ptrb Pointer to integer item
|
|
*
|
|
* @returns 0 if *ptra == *ptrb
|
|
* @returns -1 if *ptra < *ptrb
|
|
* @returns 1 otherwise
|
|
*
|
|
* This function is used to sort the rank list. It can be removed if
|
|
* groups are always in order.
|
|
*/
|
|
static int compare_ranks (const void *ptra, const void *ptrb)
|
|
{
|
|
int a = *((int *) ptra);
|
|
int b = *((int *) ptrb);
|
|
|
|
if (a < b) {
|
|
return -1;
|
|
} else if (a > b) {
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* ompi_osc_pt2pt_get_comm_ranks:
|
|
*
|
|
* @param[in] module - OSC PT2PT module
|
|
* @param[in] sub_group - Group with ranks to translate
|
|
*
|
|
* @returns an array of translated ranks on success or NULL on failure
|
|
*
|
|
* Translate the ranks given in {sub_group} into ranks in the
|
|
* communicator used to create {module}.
|
|
*/
|
|
static int *ompi_osc_sm_group_ranks (ompi_group_t *group, ompi_group_t *sub_group)
|
|
{
|
|
int size = ompi_group_size(sub_group);
|
|
int *ranks1, *ranks2;
|
|
int ret;
|
|
|
|
ranks1 = calloc (size, sizeof(int));
|
|
ranks2 = calloc (size, sizeof(int));
|
|
if (NULL == ranks1 || NULL == ranks2) {
|
|
free (ranks1);
|
|
free (ranks2);
|
|
return NULL;
|
|
}
|
|
|
|
for (int i = 0 ; i < size ; ++i) {
|
|
ranks1[i] = i;
|
|
}
|
|
|
|
ret = ompi_group_translate_ranks (sub_group, size, ranks1, group, ranks2);
|
|
free (ranks1);
|
|
if (OMPI_SUCCESS != ret) {
|
|
free (ranks2);
|
|
return NULL;
|
|
}
|
|
|
|
qsort (ranks2, size, sizeof (int), compare_ranks);
|
|
|
|
return ranks2;
|
|
}
|
|
|
|
|
|
int
|
|
ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
|
|
{
|
|
ompi_osc_sm_module_t *module =
|
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
|
|
|
/* ensure all memory operations have completed */
|
|
opal_atomic_mb();
|
|
|
|
if (module->global_state->use_barrier_for_fence) {
|
|
return module->comm->c_coll.coll_barrier(module->comm,
|
|
module->comm->c_coll.coll_barrier_module);
|
|
} else {
|
|
module->my_sense = !module->my_sense;
|
|
pthread_mutex_lock(&module->global_state->mtx);
|
|
module->global_state->count--;
|
|
if (module->global_state->count == 0) {
|
|
module->global_state->count = ompi_comm_size(module->comm);
|
|
module->global_state->sense = module->my_sense;
|
|
pthread_cond_broadcast(&module->global_state->cond);
|
|
} else {
|
|
while (module->global_state->sense != module->my_sense) {
|
|
pthread_cond_wait(&module->global_state->cond, &module->global_state->mtx);
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&module->global_state->mtx);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
}
|
|
|
|
int
|
|
ompi_osc_sm_start(struct ompi_group_t *group,
|
|
int assert,
|
|
struct ompi_win_t *win)
|
|
{
|
|
ompi_osc_sm_module_t *module =
|
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
|
int my_rank = ompi_comm_rank (module->comm);
|
|
|
|
OBJ_RETAIN(group);
|
|
|
|
if (!OPAL_ATOMIC_CMPSET_PTR(&module->start_group, NULL, group)) {
|
|
OBJ_RELEASE(group);
|
|
return OMPI_ERR_RMA_SYNC;
|
|
}
|
|
|
|
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
|
int size;
|
|
|
|
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
|
|
if (NULL == ranks) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
size = ompi_group_size(module->start_group);
|
|
|
|
for (int i = 0 ; i < size ; ++i) {
|
|
int rank_byte = ranks[i] >> 6;
|
|
uint64_t old, rank_bit = ((uint64_t) 1) << (ranks[i] & 0x3f);
|
|
|
|
/* wait for rank to post */
|
|
while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
|
|
opal_progress();
|
|
opal_atomic_mb();
|
|
}
|
|
|
|
opal_atomic_rmb ();
|
|
|
|
do {
|
|
old = module->posts[my_rank][rank_byte];
|
|
} while (!opal_atomic_cmpset_64 ((int64_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit));
|
|
}
|
|
|
|
free (ranks);
|
|
}
|
|
|
|
opal_atomic_mb();
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
int
|
|
ompi_osc_sm_complete(struct ompi_win_t *win)
|
|
{
|
|
ompi_osc_sm_module_t *module =
|
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
|
ompi_group_t *group;
|
|
int gsize;
|
|
|
|
/* ensure all memory operations have completed */
|
|
opal_atomic_mb();
|
|
|
|
group = module->start_group;
|
|
if (NULL == group || !OPAL_ATOMIC_CMPSET_PTR(&module->start_group, group, NULL)) {
|
|
return OMPI_ERR_RMA_SYNC;
|
|
}
|
|
|
|
opal_atomic_mb();
|
|
|
|
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
|
|
if (NULL == ranks) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
gsize = ompi_group_size(group);
|
|
for (int i = 0 ; i < gsize ; ++i) {
|
|
(void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1);
|
|
}
|
|
|
|
free (ranks);
|
|
|
|
OBJ_RELEASE(group);
|
|
|
|
opal_atomic_mb();
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
int
|
|
ompi_osc_sm_post(struct ompi_group_t *group,
|
|
int assert,
|
|
struct ompi_win_t *win)
|
|
{
|
|
ompi_osc_sm_module_t *module =
|
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
|
int my_rank = ompi_comm_rank (module->comm);
|
|
int my_byte = my_rank >> 6;
|
|
uint64_t my_bit = ((uint64_t) 1) << (my_rank & 0x3f);
|
|
int gsize;
|
|
|
|
OPAL_THREAD_LOCK(&module->lock);
|
|
|
|
if (NULL != module->post_group) {
|
|
OPAL_THREAD_UNLOCK(&module->lock);
|
|
return OMPI_ERR_RMA_SYNC;
|
|
}
|
|
|
|
module->post_group = group;
|
|
|
|
OBJ_RETAIN(group);
|
|
|
|
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
|
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
|
|
if (NULL == ranks) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
module->my_node_state->complete_count = 0;
|
|
opal_atomic_mb();
|
|
|
|
gsize = ompi_group_size(module->post_group);
|
|
for (int i = 0 ; i < gsize ; ++i) {
|
|
(void) opal_atomic_add_64 ((int64_t *) module->posts[ranks[i]] + my_byte, my_bit);
|
|
}
|
|
|
|
opal_atomic_wmb ();
|
|
|
|
free (ranks);
|
|
|
|
opal_progress ();
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&module->lock);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
int
|
|
ompi_osc_sm_wait(struct ompi_win_t *win)
|
|
{
|
|
ompi_osc_sm_module_t *module =
|
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
|
ompi_group_t *group;
|
|
|
|
OPAL_THREAD_LOCK(&module->lock);
|
|
|
|
if (NULL == module->post_group) {
|
|
OPAL_THREAD_UNLOCK(&module->lock);
|
|
return OMPI_ERR_RMA_SYNC;
|
|
}
|
|
|
|
group = module->post_group;
|
|
|
|
int size = ompi_group_size (group);
|
|
|
|
while (module->my_node_state->complete_count != size) {
|
|
opal_progress();
|
|
opal_atomic_mb();
|
|
}
|
|
|
|
OBJ_RELEASE(group);
|
|
module->post_group = NULL;
|
|
|
|
OPAL_THREAD_UNLOCK(&module->lock);
|
|
|
|
/* ensure all memory operations have completed */
|
|
opal_atomic_mb();
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
int
|
|
ompi_osc_sm_test(struct ompi_win_t *win,
|
|
int *flag)
|
|
{
|
|
ompi_osc_sm_module_t *module =
|
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
|
|
|
OPAL_THREAD_LOCK(&module->lock);
|
|
|
|
if (NULL == module->post_group) {
|
|
OPAL_THREAD_UNLOCK(&module->lock);
|
|
return OMPI_ERR_RMA_SYNC;
|
|
}
|
|
|
|
int size = ompi_group_size(module->post_group);
|
|
|
|
if (module->my_node_state->complete_count == size) {
|
|
OBJ_RELEASE(module->post_group);
|
|
module->post_group = NULL;
|
|
*flag = 1;
|
|
} else {
|
|
*flag = 0;
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&module->lock);
|
|
|
|
/* ensure all memory operations have completed */
|
|
opal_atomic_mb();
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|