Merge pull request #925 from hjelmn/osc_sm_fix
osc/sm: fix pscw synchronization
Этот коммит содержится в:
Коммит
b80cd56326
@ -114,12 +114,14 @@ static ompi_osc_pt2pt_peer_t **ompi_osc_pt2pt_get_peers (ompi_osc_pt2pt_module_t
|
|||||||
|
|
||||||
static void ompi_osc_pt2pt_release_peers (ompi_osc_pt2pt_peer_t **peers, int npeers)
|
static void ompi_osc_pt2pt_release_peers (ompi_osc_pt2pt_peer_t **peers, int npeers)
|
||||||
{
|
{
|
||||||
|
if (peers) {
|
||||||
for (int i = 0 ; i < npeers ; ++i) {
|
for (int i = 0 ; i < npeers ; ++i) {
|
||||||
OBJ_RELEASE(peers[i]);
|
OBJ_RELEASE(peers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
free (peers);
|
free (peers);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
|
int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
|
||||||
{
|
{
|
||||||
@ -228,20 +230,24 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
|
|||||||
"ompi_osc_pt2pt_start entering with group size %d...",
|
"ompi_osc_pt2pt_start entering with group size %d...",
|
||||||
sync->num_peers));
|
sync->num_peers));
|
||||||
|
|
||||||
if (0 == ompi_group_size (group)) {
|
|
||||||
/* nothing more to do. this is an empty start epoch */
|
|
||||||
OPAL_THREAD_UNLOCK(&module->lock);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
opal_atomic_wmb ();
|
|
||||||
|
|
||||||
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_PSCW;
|
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_PSCW;
|
||||||
|
|
||||||
/* prevent us from entering a passive-target, fence, or another pscw access epoch until
|
/* prevent us from entering a passive-target, fence, or another pscw access epoch until
|
||||||
* the matching complete is called */
|
* the matching complete is called */
|
||||||
sync->epoch_active = true;
|
sync->epoch_active = true;
|
||||||
|
|
||||||
|
/* save the group */
|
||||||
|
OBJ_RETAIN(group);
|
||||||
|
|
||||||
|
if (0 == ompi_group_size (group)) {
|
||||||
|
/* nothing more to do. this is an empty start epoch */
|
||||||
|
sync->eager_send_active = true;
|
||||||
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
opal_atomic_wmb ();
|
||||||
|
|
||||||
/* translate the group ranks into the communicator */
|
/* translate the group ranks into the communicator */
|
||||||
sync->peer_list.peers = ompi_osc_pt2pt_get_peers (module, group);
|
sync->peer_list.peers = ompi_osc_pt2pt_get_peers (module, group);
|
||||||
if (NULL == sync->peer_list.peers) {
|
if (NULL == sync->peer_list.peers) {
|
||||||
@ -249,10 +255,6 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
|
|||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* save the group */
|
|
||||||
OBJ_RETAIN(group);
|
|
||||||
ompi_group_increment_proc_count(group);
|
|
||||||
|
|
||||||
if (!(assert & MPI_MODE_NOCHECK)) {
|
if (!(assert & MPI_MODE_NOCHECK)) {
|
||||||
OPAL_THREAD_LOCK(&sync->lock);
|
OPAL_THREAD_LOCK(&sync->lock);
|
||||||
for (int i = 0 ; i < sync->num_peers ; ++i) {
|
for (int i = 0 ; i < sync->num_peers ; ++i) {
|
||||||
@ -318,12 +320,6 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
|
|||||||
group_size = sync->num_peers;
|
group_size = sync->num_peers;
|
||||||
|
|
||||||
peers = sync->peer_list.peers;
|
peers = sync->peer_list.peers;
|
||||||
if (NULL == peers) {
|
|
||||||
/* empty peer list */
|
|
||||||
OPAL_THREAD_UNLOCK(&(module->lock));
|
|
||||||
OBJ_RELEASE(group);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&module->lock);
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
|
||||||
@ -383,8 +379,10 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
|
|||||||
module->epoch_outgoing_frag_count[rank] = 0;
|
module->epoch_outgoing_frag_count[rank] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (peers) {
|
||||||
/* release our reference to peers in this group */
|
/* release our reference to peers in this group */
|
||||||
ompi_osc_pt2pt_release_peers (peers, group_size);
|
ompi_osc_pt2pt_release_peers (peers, group_size);
|
||||||
|
}
|
||||||
|
|
||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
return ret;
|
return ret;
|
||||||
@ -403,7 +401,6 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
|
|||||||
OPAL_THREAD_UNLOCK(&module->lock);
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
|
||||||
/* phase 2 cleanup group */
|
/* phase 2 cleanup group */
|
||||||
ompi_group_decrement_proc_count(group);
|
|
||||||
OBJ_RELEASE(group);
|
OBJ_RELEASE(group);
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||||
@ -439,7 +436,6 @@ int ompi_osc_pt2pt_post (ompi_group_t *group, int assert, ompi_win_t *win)
|
|||||||
|
|
||||||
/* save the group */
|
/* save the group */
|
||||||
OBJ_RETAIN(group);
|
OBJ_RETAIN(group);
|
||||||
ompi_group_increment_proc_count(group);
|
|
||||||
|
|
||||||
module->pw_group = group;
|
module->pw_group = group;
|
||||||
|
|
||||||
@ -523,7 +519,6 @@ int ompi_osc_pt2pt_wait (ompi_win_t *win)
|
|||||||
module->pw_group = NULL;
|
module->pw_group = NULL;
|
||||||
OPAL_THREAD_UNLOCK(&module->lock);
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
|
||||||
ompi_group_decrement_proc_count(group);
|
|
||||||
OBJ_RELEASE(group);
|
OBJ_RELEASE(group);
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||||
@ -561,7 +556,6 @@ int ompi_osc_pt2pt_test (ompi_win_t *win, int *flag)
|
|||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&(module->lock));
|
OPAL_THREAD_UNLOCK(&(module->lock));
|
||||||
|
|
||||||
ompi_group_decrement_proc_count(group);
|
|
||||||
OBJ_RELEASE(group);
|
OBJ_RELEASE(group);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -173,6 +173,8 @@ static inline void ompi_osc_pt2pt_sync_reset (ompi_osc_pt2pt_sync_t *sync)
|
|||||||
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
|
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
|
||||||
sync->eager_send_active = 0;
|
sync->eager_send_active = 0;
|
||||||
sync->epoch_active = 0;
|
sync->epoch_active = 0;
|
||||||
|
sync->peer_list.peers = NULL;
|
||||||
|
sync->sync.pscw.group = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* OMPI_OSC_PT2PT_SYNC_H */
|
#endif /* OMPI_OSC_PT2PT_SYNC_H */
|
||||||
|
@ -61,6 +61,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
|||||||
static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *info);
|
static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *info);
|
||||||
static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **info_used);
|
static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **info_used);
|
||||||
|
|
||||||
|
static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl);
|
||||||
|
|
||||||
static char *ompi_osc_rdma_btl_names;
|
static char *ompi_osc_rdma_btl_names;
|
||||||
|
|
||||||
ompi_osc_rdma_component_t mca_osc_rdma_component = {
|
ompi_osc_rdma_component_t mca_osc_rdma_component = {
|
||||||
@ -296,10 +298,16 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
|
|||||||
struct ompi_communicator_t *comm, struct ompi_info_t *info,
|
struct ompi_communicator_t *comm, struct ompi_info_t *info,
|
||||||
int flavor)
|
int flavor)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (MPI_WIN_FLAVOR_SHARED == flavor) {
|
if (MPI_WIN_FLAVOR_SHARED == flavor) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (OMPI_SUCCESS != ompi_osc_rdma_query_btls (comm, NULL)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
return mca_osc_rdma_component.priority;
|
return mca_osc_rdma_component.priority;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -619,32 +627,31 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ompi_osc_rdma_find_rdma_endpoints (ompi_osc_rdma_module_t *module)
|
static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl)
|
||||||
{
|
{
|
||||||
struct mca_btl_base_module_t **possible_btls = NULL;
|
struct mca_btl_base_module_t **possible_btls = NULL;
|
||||||
int comm_size = ompi_comm_size (module->comm);
|
int comm_size = ompi_comm_size (comm);
|
||||||
int rc = OMPI_SUCCESS, max_btls = 0;
|
int rc = OMPI_SUCCESS, max_btls = 0;
|
||||||
unsigned int selected_latency = INT_MAX;
|
unsigned int selected_latency = INT_MAX;
|
||||||
mca_btl_base_selected_module_t *selected_btl;
|
struct mca_btl_base_module_t *selected_btl = NULL;
|
||||||
|
mca_btl_base_selected_module_t *item;
|
||||||
int *btl_counts = NULL;
|
int *btl_counts = NULL;
|
||||||
char **btls_to_use;
|
char **btls_to_use;
|
||||||
void *tmp;
|
void *tmp;
|
||||||
|
|
||||||
module->selected_btl = NULL;
|
|
||||||
|
|
||||||
btls_to_use = opal_argv_split (ompi_osc_rdma_btl_names, ',');
|
btls_to_use = opal_argv_split (ompi_osc_rdma_btl_names, ',');
|
||||||
if (btls_to_use) {
|
if (btls_to_use) {
|
||||||
/* rdma and atomics are only supported with BTLs at the moment */
|
/* rdma and atomics are only supported with BTLs at the moment */
|
||||||
OPAL_LIST_FOREACH(selected_btl, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
|
OPAL_LIST_FOREACH(item, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
|
||||||
for (int i = 0 ; btls_to_use[i] ; ++i) {
|
for (int i = 0 ; btls_to_use[i] ; ++i) {
|
||||||
if (0 != strcmp (btls_to_use[i], selected_btl->btl_module->btl_component->btl_version.mca_component_name)) {
|
if (0 != strcmp (btls_to_use[i], item->btl_module->btl_component->btl_version.mca_component_name)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((selected_btl->btl_module->btl_flags & (MCA_BTL_FLAGS_RDMA)) == MCA_BTL_FLAGS_RDMA &&
|
if ((item->btl_module->btl_flags & (MCA_BTL_FLAGS_RDMA)) == MCA_BTL_FLAGS_RDMA &&
|
||||||
(selected_btl->btl_module->btl_flags & (MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS))) {
|
(item->btl_module->btl_flags & (MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS))) {
|
||||||
if (!module->selected_btl || selected_btl->btl_module->btl_latency < module->selected_btl->btl_latency) {
|
if (!selected_btl || item->btl_module->btl_latency < selected_btl->btl_latency) {
|
||||||
module->selected_btl = selected_btl->btl_module;
|
selected_btl = item->btl_module;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -653,14 +660,18 @@ static int ompi_osc_rdma_find_rdma_endpoints (ompi_osc_rdma_module_t *module)
|
|||||||
opal_argv_free (btls_to_use);
|
opal_argv_free (btls_to_use);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NULL != module->selected_btl) {
|
if (btl) {
|
||||||
|
*btl = selected_btl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL != selected_btl) {
|
||||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Selected BTL: %s",
|
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Selected BTL: %s",
|
||||||
module->selected_btl->btl_component->btl_version.mca_component_name));
|
selected_btl->btl_component->btl_version.mca_component_name));
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0 ; i < comm_size ; ++i) {
|
for (int i = 0 ; i < comm_size ; ++i) {
|
||||||
ompi_proc_t *proc = ompi_comm_peer_lookup (module->comm, i);
|
ompi_proc_t *proc = ompi_comm_peer_lookup (comm, i);
|
||||||
mca_bml_base_endpoint_t *endpoint;
|
mca_bml_base_endpoint_t *endpoint;
|
||||||
int num_btls, prev_max;
|
int num_btls, prev_max;
|
||||||
|
|
||||||
@ -733,7 +744,7 @@ static int ompi_osc_rdma_find_rdma_endpoints (ompi_osc_rdma_module_t *module)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (btl_counts[i] == comm_size && possible_btls[i]->btl_latency < selected_latency) {
|
if (btl_counts[i] == comm_size && possible_btls[i]->btl_latency < selected_latency) {
|
||||||
module->selected_btl = possible_btls[i];
|
selected_btl = possible_btls[i];
|
||||||
selected_latency = possible_btls[i]->btl_latency;
|
selected_latency = possible_btls[i]->btl_latency;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -741,13 +752,17 @@ static int ompi_osc_rdma_find_rdma_endpoints (ompi_osc_rdma_module_t *module)
|
|||||||
free (possible_btls);
|
free (possible_btls);
|
||||||
free (btl_counts);
|
free (btl_counts);
|
||||||
|
|
||||||
if (NULL == module->selected_btl) {
|
if (btl) {
|
||||||
|
*btl = selected_btl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL == selected_btl) {
|
||||||
/* no btl = no rdma/atomics */
|
/* no btl = no rdma/atomics */
|
||||||
return OMPI_ERR_NOT_AVAILABLE;
|
return OMPI_ERR_NOT_AVAILABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Selected BTL: %s",
|
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Selected BTL: %s",
|
||||||
module->selected_btl->btl_component->btl_version.mca_component_name));
|
selected_btl->btl_component->btl_version.mca_component_name));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -1017,7 +1032,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* find rdma capable endpoints */
|
/* find rdma capable endpoints */
|
||||||
ret = ompi_osc_rdma_find_rdma_endpoints (module);
|
ret = ompi_osc_rdma_query_btls (module->comm, &module->selected_btl);
|
||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Failed finding RDMA endpoints"));
|
OPAL_OUTPUT_VERBOSE((11, ompi_osc_base_framework.framework_output, "Failed finding RDMA endpoints"));
|
||||||
ompi_osc_rdma_free (win);
|
ompi_osc_rdma_free (win);
|
||||||
|
@ -38,7 +38,6 @@ struct ompi_osc_sm_lock_t {
|
|||||||
typedef struct ompi_osc_sm_lock_t ompi_osc_sm_lock_t;
|
typedef struct ompi_osc_sm_lock_t ompi_osc_sm_lock_t;
|
||||||
|
|
||||||
struct ompi_osc_sm_node_state_t {
|
struct ompi_osc_sm_node_state_t {
|
||||||
int32_t post_count;
|
|
||||||
int32_t complete_count;
|
int32_t complete_count;
|
||||||
ompi_osc_sm_lock_t lock;
|
ompi_osc_sm_lock_t lock;
|
||||||
opal_atomic_lock_t accumulate_lock;
|
opal_atomic_lock_t accumulate_lock;
|
||||||
@ -81,6 +80,9 @@ struct ompi_osc_sm_module_t {
|
|||||||
ompi_osc_sm_global_state_t *global_state;
|
ompi_osc_sm_global_state_t *global_state;
|
||||||
ompi_osc_sm_node_state_t *my_node_state;
|
ompi_osc_sm_node_state_t *my_node_state;
|
||||||
ompi_osc_sm_node_state_t *node_states;
|
ompi_osc_sm_node_state_t *node_states;
|
||||||
|
uint64_t **posts;
|
||||||
|
|
||||||
|
opal_mutex_t lock;
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_sm_module_t ompi_osc_sm_module_t;
|
typedef struct ompi_osc_sm_module_t ompi_osc_sm_module_t;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
||||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2014 The University of Tennessee and The University
|
* Copyright (c) 2014 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
@ -22,6 +22,74 @@
|
|||||||
|
|
||||||
#include "osc_sm.h"
|
#include "osc_sm.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* compare_ranks:
|
||||||
|
*
|
||||||
|
* @param[in] ptra Pointer to integer item
|
||||||
|
* @param[in] ptrb Pointer to integer item
|
||||||
|
*
|
||||||
|
* @returns 0 if *ptra == *ptrb
|
||||||
|
* @returns -1 if *ptra < *ptrb
|
||||||
|
* @returns 1 otherwise
|
||||||
|
*
|
||||||
|
* This function is used to sort the rank list. It can be removed if
|
||||||
|
* groups are always in order.
|
||||||
|
*/
|
||||||
|
static int compare_ranks (const void *ptra, const void *ptrb)
|
||||||
|
{
|
||||||
|
int a = *((int *) ptra);
|
||||||
|
int b = *((int *) ptrb);
|
||||||
|
|
||||||
|
if (a < b) {
|
||||||
|
return -1;
|
||||||
|
} else if (a > b) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ompi_osc_pt2pt_get_comm_ranks:
|
||||||
|
*
|
||||||
|
* @param[in] module - OSC PT2PT module
|
||||||
|
* @param[in] sub_group - Group with ranks to translate
|
||||||
|
*
|
||||||
|
* @returns an array of translated ranks on success or NULL on failure
|
||||||
|
*
|
||||||
|
* Translate the ranks given in {sub_group} into ranks in the
|
||||||
|
* communicator used to create {module}.
|
||||||
|
*/
|
||||||
|
static int *ompi_osc_sm_group_ranks (ompi_group_t *group, ompi_group_t *sub_group)
|
||||||
|
{
|
||||||
|
int size = ompi_group_size(sub_group);
|
||||||
|
int *ranks1, *ranks2;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ranks1 = calloc (size, sizeof(int));
|
||||||
|
ranks2 = calloc (size, sizeof(int));
|
||||||
|
if (NULL == ranks1 || NULL == ranks2) {
|
||||||
|
free (ranks1);
|
||||||
|
free (ranks2);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0 ; i < size ; ++i) {
|
||||||
|
ranks1[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ompi_group_translate_ranks (sub_group, size, ranks1, group, ranks2);
|
||||||
|
free (ranks1);
|
||||||
|
if (OMPI_SUCCESS != ret) {
|
||||||
|
free (ranks2);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort (ranks2, size, sizeof (int), compare_ranks);
|
||||||
|
|
||||||
|
return ranks2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
|
ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
|
||||||
@ -54,7 +122,6 @@ ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
ompi_osc_sm_start(struct ompi_group_t *group,
|
ompi_osc_sm_start(struct ompi_group_t *group,
|
||||||
int assert,
|
int assert,
|
||||||
@ -62,20 +129,43 @@ ompi_osc_sm_start(struct ompi_group_t *group,
|
|||||||
{
|
{
|
||||||
ompi_osc_sm_module_t *module =
|
ompi_osc_sm_module_t *module =
|
||||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||||
|
int my_rank = ompi_comm_rank (module->comm);
|
||||||
|
|
||||||
|
OBJ_RETAIN(group);
|
||||||
|
|
||||||
|
if (!OPAL_ATOMIC_CMPSET(&module->start_group, NULL, group)) {
|
||||||
|
OBJ_RELEASE(group);
|
||||||
|
return OMPI_ERR_RMA_SYNC;
|
||||||
|
}
|
||||||
|
|
||||||
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
OBJ_RETAIN(group);
|
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
|
||||||
module->start_group = group;
|
if (NULL == ranks) {
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
size = ompi_group_size(module->start_group);
|
size = ompi_group_size(module->start_group);
|
||||||
|
|
||||||
while (module->my_node_state->post_count != size) {
|
for (int i = 0 ; i < size ; ++i) {
|
||||||
|
int rank_byte = ranks[i] >> 6;
|
||||||
|
uint64_t old, rank_bit = 1 << (ranks[i] & 0x3f);
|
||||||
|
|
||||||
|
/* wait for rank to post */
|
||||||
|
while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
|
||||||
opal_progress();
|
opal_progress();
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
module->start_group = NULL;
|
opal_atomic_rmb ();
|
||||||
|
|
||||||
|
do {
|
||||||
|
old = module->posts[my_rank][rank_byte];
|
||||||
|
} while (!opal_atomic_cmpset_64 ((int64_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit));
|
||||||
|
}
|
||||||
|
|
||||||
|
free (ranks);
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
@ -88,30 +178,33 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
|
|||||||
{
|
{
|
||||||
ompi_osc_sm_module_t *module =
|
ompi_osc_sm_module_t *module =
|
||||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||||
int gsize, csize;
|
ompi_group_t *group;
|
||||||
|
int gsize;
|
||||||
|
|
||||||
/* ensure all memory operations have completed */
|
/* ensure all memory operations have completed */
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
|
|
||||||
if (NULL != module->start_group) {
|
group = module->start_group;
|
||||||
module->my_node_state->post_count = 0;
|
if (NULL == group || !OPAL_ATOMIC_CMPSET(&module->start_group, group, NULL)) {
|
||||||
|
return OMPI_ERR_RMA_SYNC;
|
||||||
|
}
|
||||||
|
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
|
|
||||||
gsize = ompi_group_size(module->start_group);
|
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
|
||||||
csize = ompi_comm_size(module->comm);
|
if (NULL == ranks) {
|
||||||
for (int i = 0 ; i < gsize ; ++i) {
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
for (int j = 0 ; j < csize ; ++j) {
|
|
||||||
if (ompi_group_peer_lookup(module->start_group, i) ==
|
|
||||||
ompi_comm_peer_lookup(module->comm, j)) {
|
|
||||||
(void)opal_atomic_add_32(&module->node_states[j].complete_count, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OBJ_RELEASE(module->start_group);
|
gsize = ompi_group_size(group);
|
||||||
module->start_group = NULL;
|
for (int i = 0 ; i < gsize ; ++i) {
|
||||||
|
(void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free (ranks);
|
||||||
|
|
||||||
|
OBJ_RELEASE(group);
|
||||||
|
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -124,28 +217,44 @@ ompi_osc_sm_post(struct ompi_group_t *group,
|
|||||||
{
|
{
|
||||||
ompi_osc_sm_module_t *module =
|
ompi_osc_sm_module_t *module =
|
||||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||||
int gsize, csize;
|
int my_rank = ompi_comm_rank (module->comm);
|
||||||
|
int my_byte = my_rank >> 6;
|
||||||
|
uint64_t my_bit = 1 << (my_rank & 0x3f);
|
||||||
|
int gsize;
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&module->lock);
|
||||||
|
|
||||||
|
if (NULL != module->post_group) {
|
||||||
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
return OMPI_ERR_RMA_SYNC;
|
||||||
|
}
|
||||||
|
|
||||||
|
module->post_group = group;
|
||||||
|
|
||||||
|
OBJ_RETAIN(group);
|
||||||
|
|
||||||
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
||||||
OBJ_RETAIN(group);
|
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
|
||||||
module->post_group = group;
|
if (NULL == ranks) {
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
module->my_node_state->complete_count = 0;
|
module->my_node_state->complete_count = 0;
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
|
|
||||||
gsize = ompi_group_size(module->post_group);
|
gsize = ompi_group_size(module->post_group);
|
||||||
csize = ompi_comm_size(module->comm);
|
|
||||||
for (int i = 0 ; i < gsize ; ++i) {
|
for (int i = 0 ; i < gsize ; ++i) {
|
||||||
for (int j = 0 ; j < csize ; ++j) {
|
(void) opal_atomic_add_64 ((int64_t *) module->posts[ranks[i]] + my_byte, my_bit);
|
||||||
if (ompi_group_peer_lookup(module->post_group, i) ==
|
|
||||||
ompi_comm_peer_lookup(module->comm, j)) {
|
|
||||||
(void)opal_atomic_add_32(&module->node_states[j].post_count, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_atomic_wmb ();
|
||||||
|
|
||||||
|
free (ranks);
|
||||||
|
|
||||||
|
opal_progress ();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
module->post_group = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -156,18 +265,28 @@ ompi_osc_sm_wait(struct ompi_win_t *win)
|
|||||||
{
|
{
|
||||||
ompi_osc_sm_module_t *module =
|
ompi_osc_sm_module_t *module =
|
||||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||||
|
ompi_group_t *group;
|
||||||
|
|
||||||
if (NULL != module->post_group) {
|
OPAL_THREAD_LOCK(&module->lock);
|
||||||
int size = ompi_group_size(module->post_group);
|
|
||||||
|
if (NULL == module->post_group) {
|
||||||
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
return OMPI_ERR_RMA_SYNC;
|
||||||
|
}
|
||||||
|
|
||||||
|
group = module->post_group;
|
||||||
|
|
||||||
|
int size = ompi_group_size (group);
|
||||||
|
|
||||||
while (module->my_node_state->complete_count != size) {
|
while (module->my_node_state->complete_count != size) {
|
||||||
opal_progress();
|
opal_progress();
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
}
|
}
|
||||||
|
|
||||||
OBJ_RELEASE(module->post_group);
|
OBJ_RELEASE(group);
|
||||||
module->post_group = NULL;
|
module->post_group = NULL;
|
||||||
}
|
|
||||||
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
|
||||||
/* ensure all memory operations have completed */
|
/* ensure all memory operations have completed */
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
@ -183,7 +302,13 @@ ompi_osc_sm_test(struct ompi_win_t *win,
|
|||||||
ompi_osc_sm_module_t *module =
|
ompi_osc_sm_module_t *module =
|
||||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||||
|
|
||||||
if (NULL != module->post_group) {
|
OPAL_THREAD_LOCK(&module->lock);
|
||||||
|
|
||||||
|
if (NULL == module->post_group) {
|
||||||
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
|
return OMPI_ERR_RMA_SYNC;
|
||||||
|
}
|
||||||
|
|
||||||
int size = ompi_group_size(module->post_group);
|
int size = ompi_group_size(module->post_group);
|
||||||
|
|
||||||
if (module->my_node_state->complete_count == size) {
|
if (module->my_node_state->complete_count == size) {
|
||||||
@ -191,10 +316,8 @@ ompi_osc_sm_test(struct ompi_win_t *win,
|
|||||||
module->post_group = NULL;
|
module->post_group = NULL;
|
||||||
*flag = 1;
|
*flag = 1;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
opal_atomic_mb();
|
OPAL_THREAD_UNLOCK(&module->lock);
|
||||||
*flag = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ensure all memory operations have completed */
|
/* ensure all memory operations have completed */
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
|
@ -58,45 +58,45 @@ ompi_osc_sm_component_t mca_osc_sm_component = {
|
|||||||
|
|
||||||
ompi_osc_sm_module_t ompi_osc_sm_module_template = {
|
ompi_osc_sm_module_t ompi_osc_sm_module_template = {
|
||||||
{
|
{
|
||||||
ompi_osc_sm_shared_query,
|
.osc_win_shared_query = ompi_osc_sm_shared_query,
|
||||||
|
|
||||||
ompi_osc_sm_attach,
|
.osc_win_attach = ompi_osc_sm_attach,
|
||||||
ompi_osc_sm_detach,
|
.osc_win_detach = ompi_osc_sm_detach,
|
||||||
ompi_osc_sm_free,
|
.osc_free = ompi_osc_sm_free,
|
||||||
|
|
||||||
ompi_osc_sm_put,
|
.osc_put = ompi_osc_sm_put,
|
||||||
ompi_osc_sm_get,
|
.osc_get = ompi_osc_sm_get,
|
||||||
ompi_osc_sm_accumulate,
|
.osc_accumulate = ompi_osc_sm_accumulate,
|
||||||
ompi_osc_sm_compare_and_swap,
|
.osc_compare_and_swap = ompi_osc_sm_compare_and_swap,
|
||||||
ompi_osc_sm_fetch_and_op,
|
.osc_fetch_and_op = ompi_osc_sm_fetch_and_op,
|
||||||
ompi_osc_sm_get_accumulate,
|
.osc_get_accumulate = ompi_osc_sm_get_accumulate,
|
||||||
|
|
||||||
ompi_osc_sm_rput,
|
.osc_rput = ompi_osc_sm_rput,
|
||||||
ompi_osc_sm_rget,
|
.osc_rget = ompi_osc_sm_rget,
|
||||||
ompi_osc_sm_raccumulate,
|
.osc_raccumulate = ompi_osc_sm_raccumulate,
|
||||||
ompi_osc_sm_rget_accumulate,
|
.osc_rget_accumulate = ompi_osc_sm_rget_accumulate,
|
||||||
|
|
||||||
ompi_osc_sm_fence,
|
.osc_fence = ompi_osc_sm_fence,
|
||||||
|
|
||||||
ompi_osc_sm_start,
|
.osc_start = ompi_osc_sm_start,
|
||||||
ompi_osc_sm_complete,
|
.osc_complete = ompi_osc_sm_complete,
|
||||||
ompi_osc_sm_post,
|
.osc_post = ompi_osc_sm_post,
|
||||||
ompi_osc_sm_wait,
|
.osc_wait = ompi_osc_sm_wait,
|
||||||
ompi_osc_sm_test,
|
.osc_test = ompi_osc_sm_test,
|
||||||
|
|
||||||
ompi_osc_sm_lock,
|
.osc_lock = ompi_osc_sm_lock,
|
||||||
ompi_osc_sm_unlock,
|
.osc_unlock = ompi_osc_sm_unlock,
|
||||||
ompi_osc_sm_lock_all,
|
.osc_lock_all = ompi_osc_sm_lock_all,
|
||||||
ompi_osc_sm_unlock_all,
|
.osc_unlock_all = ompi_osc_sm_unlock_all,
|
||||||
|
|
||||||
ompi_osc_sm_sync,
|
.osc_sync = ompi_osc_sm_sync,
|
||||||
ompi_osc_sm_flush,
|
.osc_flush = ompi_osc_sm_flush,
|
||||||
ompi_osc_sm_flush_all,
|
.osc_flush_all = ompi_osc_sm_flush_all,
|
||||||
ompi_osc_sm_flush_local,
|
.osc_flush_local = ompi_osc_sm_flush_local,
|
||||||
ompi_osc_sm_flush_local_all,
|
.osc_flush_local_all = ompi_osc_sm_flush_local_all,
|
||||||
|
|
||||||
ompi_osc_sm_set_info,
|
.osc_set_info = ompi_osc_sm_set_info,
|
||||||
ompi_osc_sm_get_info
|
.osc_get_info = ompi_osc_sm_get_info
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -163,6 +163,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
int flavor, int *model)
|
int flavor, int *model)
|
||||||
{
|
{
|
||||||
ompi_osc_sm_module_t *module = NULL;
|
ompi_osc_sm_module_t *module = NULL;
|
||||||
|
int comm_size = ompi_comm_size (comm);
|
||||||
int ret = OMPI_ERROR;
|
int ret = OMPI_ERROR;
|
||||||
|
|
||||||
if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
|
if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
|
||||||
@ -174,6 +175,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
calloc(1, sizeof(ompi_osc_sm_module_t));
|
calloc(1, sizeof(ompi_osc_sm_module_t));
|
||||||
if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
|
||||||
|
OBJ_CONSTRUCT(&module->lock, opal_mutex_t);
|
||||||
|
|
||||||
/* fill in the function pointer part */
|
/* fill in the function pointer part */
|
||||||
memcpy(module, &ompi_osc_sm_module_template,
|
memcpy(module, &ompi_osc_sm_module_template,
|
||||||
sizeof(ompi_osc_base_module_t));
|
sizeof(ompi_osc_base_module_t));
|
||||||
@ -185,7 +188,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
module->flavor = flavor;
|
module->flavor = flavor;
|
||||||
|
|
||||||
/* create the segment */
|
/* create the segment */
|
||||||
if (1 == ompi_comm_size(comm)) {
|
if (1 == comm_size) {
|
||||||
module->segment_base = NULL;
|
module->segment_base = NULL;
|
||||||
module->sizes = malloc(sizeof(size_t));
|
module->sizes = malloc(sizeof(size_t));
|
||||||
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
@ -200,13 +203,16 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
if (NULL == module->global_state) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
if (NULL == module->global_state) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
module->node_states = malloc(sizeof(ompi_osc_sm_node_state_t));
|
module->node_states = malloc(sizeof(ompi_osc_sm_node_state_t));
|
||||||
if (NULL == module->node_states) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
if (NULL == module->node_states) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
module->posts = calloc (1, sizeof(module->posts[0]) + sizeof (uint64_t));
|
||||||
|
if (NULL == module->posts) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
module->posts[0] = (uint64_t *) (module->posts + 1);
|
||||||
} else {
|
} else {
|
||||||
unsigned long total, *rbuf;
|
unsigned long total, *rbuf;
|
||||||
char *data_file;
|
char *data_file;
|
||||||
int i, flag;
|
int i, flag;
|
||||||
size_t pagesize;
|
size_t pagesize;
|
||||||
size_t state_size;
|
size_t state_size;
|
||||||
|
int posts_size, post_size = (comm_size + 63) / 64;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output,
|
||||||
"allocating shared memory region of size %ld\n", (long) size));
|
"allocating shared memory region of size %ld\n", (long) size));
|
||||||
@ -214,7 +220,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
/* get the pagesize */
|
/* get the pagesize */
|
||||||
pagesize = opal_getpagesize();
|
pagesize = opal_getpagesize();
|
||||||
|
|
||||||
rbuf = malloc(sizeof(unsigned long) * ompi_comm_size(module->comm));
|
rbuf = malloc(sizeof(unsigned long) * comm_size);
|
||||||
if (NULL == rbuf) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
if (NULL == rbuf) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
|
||||||
module->noncontig = false;
|
module->noncontig = false;
|
||||||
@ -235,7 +241,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
if (OMPI_SUCCESS != ret) return ret;
|
if (OMPI_SUCCESS != ret) return ret;
|
||||||
|
|
||||||
total = 0;
|
total = 0;
|
||||||
for (i = 0 ; i < ompi_comm_size(module->comm) ; ++i) {
|
for (i = 0 ; i < comm_size ; ++i) {
|
||||||
total += rbuf[i];
|
total += rbuf[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,9 +253,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* user opal/shmem directly to create a shared memory segment */
|
/* user opal/shmem directly to create a shared memory segment */
|
||||||
state_size = sizeof(ompi_osc_sm_global_state_t) + sizeof(ompi_osc_sm_node_state_t) * ompi_comm_size(module->comm);
|
state_size = sizeof(ompi_osc_sm_global_state_t) + sizeof(ompi_osc_sm_node_state_t) * comm_size;
|
||||||
|
posts_size = comm_size * post_size * sizeof (uint64_t);
|
||||||
if (0 == ompi_comm_rank (module->comm)) {
|
if (0 == ompi_comm_rank (module->comm)) {
|
||||||
ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + pagesize + state_size);
|
ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + pagesize + state_size + posts_size);
|
||||||
if (OPAL_SUCCESS != ret) {
|
if (OPAL_SUCCESS != ret) {
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -266,15 +273,22 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
module->sizes = malloc(sizeof(size_t) * ompi_comm_size(module->comm));
|
module->sizes = malloc(sizeof(size_t) * comm_size);
|
||||||
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
module->bases = malloc(sizeof(void*) * ompi_comm_size(module->comm));
|
module->bases = malloc(sizeof(void*) * comm_size);
|
||||||
if (NULL == module->bases) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
if (NULL == module->bases) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
module->posts = calloc (comm_size, sizeof (module->posts[0]));
|
||||||
|
if (NULL == module->posts) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
|
||||||
module->global_state = (ompi_osc_sm_global_state_t *) (module->segment_base);
|
module->global_state = (ompi_osc_sm_global_state_t *) (module->segment_base);
|
||||||
module->node_states = (ompi_osc_sm_node_state_t *) (module->global_state + 1);
|
module->node_states = (ompi_osc_sm_node_state_t *) (module->global_state + 1);
|
||||||
|
module->posts[0] = (uint64_t *) (module->node_states + comm_size);
|
||||||
|
|
||||||
|
for (i = 0, total = state_size + posts_size ; i < comm_size ; ++i) {
|
||||||
|
if (i > 0) {
|
||||||
|
module->posts[i] = module->posts[i - 1] + post_size;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0, total = state_size ; i < ompi_comm_size(module->comm) ; ++i) {
|
|
||||||
module->sizes[i] = rbuf[i];
|
module->sizes[i] = rbuf[i];
|
||||||
if (module->sizes[i]) {
|
if (module->sizes[i]) {
|
||||||
module->bases[i] = ((char *) module->segment_base) + total;
|
module->bases[i] = ((char *) module->segment_base) + total;
|
||||||
@ -296,7 +310,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
opal_atomic_init(&module->my_node_state->accumulate_lock, OPAL_ATOMIC_UNLOCKED);
|
opal_atomic_init(&module->my_node_state->accumulate_lock, OPAL_ATOMIC_UNLOCKED);
|
||||||
|
|
||||||
/* share everyone's displacement units. */
|
/* share everyone's displacement units. */
|
||||||
module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm));
|
module->disp_units = malloc(sizeof(int) * comm_size);
|
||||||
ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT,
|
ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT,
|
||||||
module->disp_units, 1, MPI_INT,
|
module->disp_units, 1, MPI_INT,
|
||||||
module->comm,
|
module->comm,
|
||||||
@ -309,7 +323,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
/* initialize synchronization code */
|
/* initialize synchronization code */
|
||||||
module->my_sense = 1;
|
module->my_sense = 1;
|
||||||
|
|
||||||
module->outstanding_locks = calloc(ompi_comm_size(module->comm), sizeof(enum ompi_osc_sm_locktype_t));
|
module->outstanding_locks = calloc(comm_size, sizeof(enum ompi_osc_sm_locktype_t));
|
||||||
if (NULL == module->outstanding_locks) {
|
if (NULL == module->outstanding_locks) {
|
||||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
goto error;
|
goto error;
|
||||||
@ -346,7 +360,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
}
|
}
|
||||||
module->global_state->use_barrier_for_fence = 0;
|
module->global_state->use_barrier_for_fence = 0;
|
||||||
module->global_state->sense = module->my_sense;
|
module->global_state->sense = module->my_sense;
|
||||||
module->global_state->count = ompi_comm_size(module->comm);
|
module->global_state->count = comm_size;
|
||||||
pthread_mutexattr_destroy(&mattr);
|
pthread_mutexattr_destroy(&mattr);
|
||||||
} else {
|
} else {
|
||||||
module->global_state->use_barrier_for_fence = 1;
|
module->global_state->use_barrier_for_fence = 1;
|
||||||
@ -367,8 +381,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
if (NULL != module->comm) ompi_comm_free(&module->comm);
|
win->w_osc_module = &module->super;
|
||||||
if (NULL != module) free(module);
|
ompi_osc_sm_free (win);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -459,10 +473,14 @@ ompi_osc_sm_free(struct ompi_win_t *win)
|
|||||||
free(module->sizes);
|
free(module->sizes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free (module->posts);
|
||||||
|
|
||||||
/* cleanup */
|
/* cleanup */
|
||||||
ompi_comm_free(&module->comm);
|
ompi_comm_free(&module->comm);
|
||||||
free(module);
|
free(module);
|
||||||
|
|
||||||
|
OBJ_DESTRUCT(&module->lock);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user