1
1
Per https://github.com/open-mpi/ompi/wiki/5.0.x-FeatureList, remove
the OSC pt2pt component.

Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
Этот коммит содержится в:
Jeff Squyres 2020-04-13 12:15:17 -07:00
родитель 37c69ee883
Коммит 8999bae25e
21 изменённых файлов: 2 добавлений и 7704 удалений

Просмотреть файл

@ -1,62 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2014 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2015 Intel, Inc. All rights reserved
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_ompidata_DATA = help-osc-pt2pt.txt
pt2pt_sources = \
osc_pt2pt.h \
osc_pt2pt_module.c \
osc_pt2pt_comm.c \
osc_pt2pt_component.c \
osc_pt2pt_data_move.h \
osc_pt2pt_data_move.c \
osc_pt2pt_frag.h \
osc_pt2pt_frag.c \
osc_pt2pt_header.h \
osc_pt2pt_pending_frag.h \
osc_pt2pt_request.h \
osc_pt2pt_request.c \
osc_pt2pt_active_target.c \
osc_pt2pt_passive_target.c \
osc_pt2pt_sync.h \
osc_pt2pt_sync.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_osc_pt2pt_DSO
component_noinst =
component_install = mca_osc_pt2pt.la
else
component_noinst = libmca_osc_pt2pt.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_osc_pt2pt_la_SOURCES = $(pt2pt_sources)
mca_osc_pt2pt_la_LDFLAGS = -module -avoid-version
mca_osc_pt2pt_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_osc_pt2pt_la_SOURCES = $(pt2pt_sources)
libmca_osc_pt2pt_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,20 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
# Copyright (c) 2014 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ompi_osc_pt2pt_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
# We can always build, unless we were explicitly disabled.
AC_DEFUN([MCA_ompi_osc_pt2pt_CONFIG],[
AC_CONFIG_FILES([ompi/mca/osc/pt2pt/Makefile])
[$1]
])dnl

Просмотреть файл

@ -1,15 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2016 Los Alamos National Security, LLC. All rights
# reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[mpi-thread-multiple-not-supported]
The OSC pt2pt component does not support MPI_THREAD_MULTIPLE in this release.
Workarounds are to run on a single node, or to use a system with an RDMA
capable network such as Infiniband.

Просмотреть файл

@ -1,961 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 FUJITSU LIMITED. All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_PT2PT_H
#define OMPI_OSC_PT2PT_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/class/opal_hash_table.h"
#include "opal/mca/threads/threads.h"
#include "opal/util/output.h"
#include "ompi/win/win.h"
#include "ompi/info/info.h"
#include "ompi/communicator/communicator.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/request/request.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/memchecker.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_sync.h"
BEGIN_C_DECLS
struct ompi_osc_pt2pt_frag_t;
struct ompi_osc_pt2pt_receive_t;
struct ompi_osc_pt2pt_component_t {
/** Extend the basic osc component interface */
ompi_osc_base_component_t super;
/** lock access to modules */
opal_mutex_t lock;
/** cid -> module mapping */
opal_hash_table_t modules;
/** module count */
int module_count;
/** number of buffers per window */
int receive_count;
/** free list of ompi_osc_pt2pt_frag_t structures */
opal_free_list_t frags;
/** Free list of requests */
opal_free_list_t requests;
/** PT2PT component buffer size */
unsigned int buffer_size;
/** Lock for pending_operations */
opal_mutex_t pending_operations_lock;
/** List of operations that need to be processed */
opal_list_t pending_operations;
/** List of receives to be processed */
opal_list_t pending_receives;
/** Lock for pending_receives */
opal_mutex_t pending_receives_lock;
/** Is the progress function enabled? */
bool progress_enable;
};
typedef struct ompi_osc_pt2pt_component_t ompi_osc_pt2pt_component_t;
enum {
/** peer has sent an unexpected post message (no matching start) */
OMPI_OSC_PT2PT_PEER_FLAG_UNEX = 1,
/** eager sends are active on this peer */
OMPI_OSC_PT2PT_PEER_FLAG_EAGER = 2,
/** peer has been locked (on-demand locking for lock_all) */
OMPI_OSC_PT2PT_PEER_FLAG_LOCK = 4,
};
struct ompi_osc_pt2pt_peer_t {
/** make this an opal object */
opal_object_t super;
/** rank of this peer */
int rank;
/** pointer to the current send fragment for each outgoing target */
opal_atomic_intptr_t active_frag;
/** lock for this peer */
opal_mutex_t lock;
/** fragments queued to this target */
opal_list_t queued_frags;
/** number of fragments incomming (negative - expected, positive - unsynchronized) */
opal_atomic_int32_t passive_incoming_frag_count;
/** peer flags */
opal_atomic_int32_t flags;
};
typedef struct ompi_osc_pt2pt_peer_t ompi_osc_pt2pt_peer_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_peer_t);
static inline bool ompi_osc_pt2pt_peer_locked (ompi_osc_pt2pt_peer_t *peer)
{
return !!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_LOCK);
}
static inline bool ompi_osc_pt2pt_peer_unex (ompi_osc_pt2pt_peer_t *peer)
{
return !!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_UNEX);
}
static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer)
{
return !!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER);
}
static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value)
{
if (value) {
OPAL_ATOMIC_OR_FETCH32 (&peer->flags, flag);
} else {
OPAL_ATOMIC_AND_FETCH32 (&peer->flags, ~flag);
}
}
static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value)
{
ompi_osc_pt2pt_peer_set_flag (peer, OMPI_OSC_PT2PT_PEER_FLAG_LOCK, value);
}
static inline void ompi_osc_pt2pt_peer_set_unex (ompi_osc_pt2pt_peer_t *peer, bool value)
{
ompi_osc_pt2pt_peer_set_flag (peer, OMPI_OSC_PT2PT_PEER_FLAG_UNEX, value);
}
static inline void ompi_osc_pt2pt_peer_set_eager_active (ompi_osc_pt2pt_peer_t *peer, bool value)
{
ompi_osc_pt2pt_peer_set_flag (peer, OMPI_OSC_PT2PT_PEER_FLAG_EAGER, value);
}
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_peer_t);
/** Module structure. Exactly one of these is associated with each
PT2PT window */
struct ompi_osc_pt2pt_module_t {
/** Extend the basic osc module interface */
ompi_osc_base_module_t super;
/** window should have accumulate ordering... */
bool accumulate_ordering;
/** no locks info key value */
bool no_locks;
/** pointer to free on cleanup (may be NULL) */
void *free_after;
/** Base pointer for local window */
void *baseptr;
/** communicator created with this window. This is the cid used
in the component's modules mapping. */
ompi_communicator_t *comm;
/** Local displacement unit. */
int disp_unit;
/** Mutex lock protecting module data */
opal_recursive_mutex_t lock;
/** condition variable associated with lock */
opal_condition_t cond;
/** hash table of peer objects */
opal_hash_table_t peer_hash;
/** lock protecting peer_hash */
opal_mutex_t peer_lock;
/** Nmber of communication fragments started for this epoch, by
peer. Not in peer data to make fence more manageable. */
opal_atomic_uint32_t *epoch_outgoing_frag_count;
/** cyclic counter for a unique tage for long messages. */
opal_atomic_uint32_t tag_counter;
/** number of outgoing fragments still to be completed */
opal_atomic_int32_t outgoing_frag_count;
/** number of incoming fragments */
opal_atomic_int32_t active_incoming_frag_count;
/** Number of targets locked/being locked */
unsigned int passive_target_access_epoch;
/** Indicates the window is in a pcsw or all access (fence, lock_all) epoch */
ompi_osc_pt2pt_sync_t all_sync;
/* ********************* PWSC data ************************ */
struct ompi_group_t *pw_group;
/** Number of "count" messages from the remote complete group
we've received */
opal_atomic_int32_t num_complete_msgs;
/* ********************* LOCK data ************************ */
/** Status of the local window lock. One of 0 (unlocked),
MPI_LOCK_EXCLUSIVE, or MPI_LOCK_SHARED. */
opal_atomic_int32_t lock_status;
/** lock for locks_pending list */
opal_mutex_t locks_pending_lock;
/** target side list of lock requests we couldn't satisfy yet */
opal_list_t locks_pending;
/** origin side list of locks currently outstanding */
opal_hash_table_t outstanding_locks;
/** receive fragments */
struct ompi_osc_pt2pt_receive_t *recv_frags;
/** number of receive fragments */
unsigned int recv_frag_count;
/* enforce accumulate semantics */
opal_atomic_lock_t accumulate_lock;
/** accumulate operations pending the accumulation lock */
opal_list_t pending_acc;
/** lock for pending_acc */
opal_mutex_t pending_acc_lock;
/** Lock for garbage collection lists */
opal_mutex_t gc_lock;
/** List of buffers that need to be freed */
opal_list_t buffer_gc;
};
typedef struct ompi_osc_pt2pt_module_t ompi_osc_pt2pt_module_t;
OMPI_MODULE_DECLSPEC extern ompi_osc_pt2pt_component_t mca_osc_pt2pt_component;
static inline ompi_osc_pt2pt_peer_t *ompi_osc_pt2pt_peer_lookup (ompi_osc_pt2pt_module_t *module,
int rank)
{
ompi_osc_pt2pt_peer_t *peer = NULL;
(void) opal_hash_table_get_value_uint32 (&module->peer_hash, rank, (void **) &peer);
if (OPAL_UNLIKELY(NULL == peer)) {
OPAL_THREAD_LOCK(&module->peer_lock);
(void) opal_hash_table_get_value_uint32 (&module->peer_hash, rank, (void **) &peer);
if (NULL == peer) {
peer = OBJ_NEW(ompi_osc_pt2pt_peer_t);
peer->rank = rank;
(void) opal_hash_table_set_value_uint32 (&module->peer_hash, rank, (void *) peer);
}
OPAL_THREAD_UNLOCK(&module->peer_lock);
}
return peer;
}
struct ompi_osc_pt2pt_pending_t {
opal_list_item_t super;
ompi_osc_pt2pt_module_t *module;
int source;
ompi_osc_pt2pt_header_t header;
};
typedef struct ompi_osc_pt2pt_pending_t ompi_osc_pt2pt_pending_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_pending_t);
struct ompi_osc_pt2pt_receive_t {
opal_list_item_t super;
ompi_osc_pt2pt_module_t *module;
ompi_request_t *pml_request;
void *buffer;
};
typedef struct ompi_osc_pt2pt_receive_t ompi_osc_pt2pt_receive_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_receive_t);
#define GET_MODULE(win) ((ompi_osc_pt2pt_module_t*) win->w_osc_module)
extern bool ompi_osc_pt2pt_no_locks;
int ompi_osc_pt2pt_attach(struct ompi_win_t *win, void *base, size_t len);
int ompi_osc_pt2pt_detach(struct ompi_win_t *win, const void *base);
int ompi_osc_pt2pt_free(struct ompi_win_t *win);
int ompi_osc_pt2pt_put(const void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
ptrdiff_t target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_pt2pt_accumulate(const void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
ptrdiff_t target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_pt2pt_get(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
ptrdiff_t target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_pt2pt_compare_and_swap(const void *origin_addr,
const void *compare_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
ptrdiff_t target_disp,
struct ompi_win_t *win);
int ompi_osc_pt2pt_fetch_and_op(const void *origin_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
ptrdiff_t target_disp,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_pt2pt_get_accumulate(const void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_pt2pt_rput(const void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
ptrdiff_t target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_pt2pt_rget(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
ptrdiff_t target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_pt2pt_raccumulate(const void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
ptrdiff_t target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_pt2pt_rget_accumulate(const void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_pt2pt_fence(int assert, struct ompi_win_t *win);
/* received a post message */
void osc_pt2pt_incoming_post (ompi_osc_pt2pt_module_t *module, int source);
/* received a complete message */
void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, int frag_count);
int ompi_osc_pt2pt_start(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_pt2pt_complete(struct ompi_win_t *win);
int ompi_osc_pt2pt_post(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_pt2pt_wait(struct ompi_win_t *win);
int ompi_osc_pt2pt_test(struct ompi_win_t *win,
int *flag);
int ompi_osc_pt2pt_lock(int lock_type,
int target,
int assert,
struct ompi_win_t *win);
int ompi_osc_pt2pt_unlock(int target,
struct ompi_win_t *win);
int ompi_osc_pt2pt_lock_all(int assert,
struct ompi_win_t *win);
int ompi_osc_pt2pt_unlock_all(struct ompi_win_t *win);
int ompi_osc_pt2pt_sync(struct ompi_win_t *win);
int ompi_osc_pt2pt_flush(int target,
struct ompi_win_t *win);
int ompi_osc_pt2pt_flush_all(struct ompi_win_t *win);
int ompi_osc_pt2pt_flush_local(int target,
struct ompi_win_t *win);
int ompi_osc_pt2pt_flush_local_all(struct ompi_win_t *win);
int ompi_osc_pt2pt_set_info(struct ompi_win_t *win, struct opal_info_t *info);
int ompi_osc_pt2pt_get_info(struct ompi_win_t *win, struct opal_info_t **info_used);
int ompi_osc_pt2pt_component_irecv(ompi_osc_pt2pt_module_t *module,
void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t *comm);
int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock);
/**
* ompi_osc_pt2pt_progress_pending_acc:
*
* @short Progress one pending accumulation or compare and swap operation.
*
* @param[in] module - OSC PT2PT module
*
* @long If the accumulation lock can be aquired progress one pending
* accumulate or compare and swap operation.
*/
int ompi_osc_pt2pt_progress_pending_acc (ompi_osc_pt2pt_module_t *module);
/**
* mark_incoming_completion:
*
* @short Increment incoming completeion count.
*
* @param[in] module - OSC PT2PT module
* @param[in] source - Passive target source or MPI_PROC_NULL (active target)
*
* @long This function incremements either the passive or active incoming counts.
* If the count reaches the signal count we signal the module's condition.
* This function uses atomics if necessary so it is not necessary to hold
* the module lock before calling this function.
*/
static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, int source)
{
int32_t new_value;
if (MPI_PROC_NULL == source) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking active incoming complete. module %p, count = %d",
(void *) module, (int) module->active_incoming_frag_count + 1));
new_value = OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, 1);
if (new_value >= 0) {
OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast(&module->cond);
OPAL_THREAD_UNLOCK(&module->lock);
}
} else {
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking passive incoming complete. module %p, source = %d, count = %d",
(void *) module, source, (int) peer->passive_incoming_frag_count + 1));
new_value = OPAL_THREAD_ADD_FETCH32((opal_atomic_int32_t *) &peer->passive_incoming_frag_count, 1);
if (0 == new_value) {
OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast(&module->cond);
OPAL_THREAD_UNLOCK(&module->lock);
}
}
}
/**
* mark_outgoing_completion:
*
* @short Increment outgoing count.
*
* @param[in] module - OSC PT2PT module
*
* @long This function is used to signal that an outgoing send is complete. It
* incrememnts only the outgoing fragment count and signals the module
* condition the fragment count is >= the signal count. This function
* uses atomics if necessary so it is not necessary to hold the module
* lock before calling this function.
*/
static inline void mark_outgoing_completion (ompi_osc_pt2pt_module_t *module)
{
int32_t new_value = OPAL_THREAD_ADD_FETCH32((opal_atomic_int32_t *) &module->outgoing_frag_count, 1);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_outgoing_completion: outgoing_frag_count = %d", new_value));
if (new_value >= 0) {
OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast(&module->cond);
OPAL_THREAD_UNLOCK(&module->lock);
}
}
/**
* ompi_osc_signal_outgoing:
*
* @short Increment outgoing signal counters.
*
* @param[in] module - OSC PT2PT module
* @param[in] target - Passive target rank or MPI_PROC_NULL (active target)
* @param[in] count - Number of outgoing messages to signal.
*
* @long This function uses atomics if necessary so it is not necessary to hold
* the module lock before calling this function.
*/
static inline void ompi_osc_signal_outgoing (ompi_osc_pt2pt_module_t *module, int target, int count)
{
OPAL_THREAD_ADD_FETCH32((opal_atomic_int32_t *) &module->outgoing_frag_count, -count);
if (MPI_PROC_NULL != target) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target,
count, module->epoch_outgoing_frag_count[target] + count));
OPAL_THREAD_ADD_FETCH32((opal_atomic_int32_t *) (module->epoch_outgoing_frag_count + target), count);
}
}
/**
* osc_pt2pt_copy_on_recv:
*
* @short Helper function. Copies data from source to target through the
* convertor.
*
* @param[in] target - destination for the data
* @param[in] source - packed data
* @param[in] source_len - length of source buffer
* @param[in] proc - proc that packed the source data
* @param[in] count - count of datatype items
* @param[in] datatype - datatype used for unpacking
*
* @long This functions unpacks data from the source buffer into the target
* buffer. The copy is done with a convertor generated from proc,
* datatype, and count.
*/
static inline void osc_pt2pt_copy_on_recv (void *target, void *source, size_t source_len, ompi_proc_t *proc,
int count, ompi_datatype_t *datatype)
{
opal_convertor_t convertor;
uint32_t iov_count = 1;
struct iovec iov;
size_t max_data;
/* create convertor */
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
/* initialize convertor */
opal_convertor_copy_and_prepare_for_recv(proc->super.proc_convertor, &datatype->super, count, target,
0, &convertor);
iov.iov_len = source_len;
iov.iov_base = (IOVBASE_TYPE *) source;
max_data = iov.iov_len;
MEMCHECKER(memchecker_convertor_call(&opal_memchecker_base_mem_defined, &convertor));
opal_convertor_unpack (&convertor, &iov, &iov_count, &max_data);
MEMCHECKER(memchecker_convertor_call(&opal_memchecker_base_mem_noaccess, &convertor));
OBJ_DESTRUCT(&convertor);
}
/**
* osc_pt2pt_copy_for_send:
*
* @short: Helper function. Copies data from source to target through the
* convertor.
*
* @param[in] target - destination for the packed data
* @param[in] target_len - length of the target buffer
* @param[in] source - original data
* @param[in] proc - proc this data will be sent to
* @param[in] count - count of datatype items
* @param[in] datatype - datatype used for packing
*
* @long This functions packs data from the source buffer into the target
* buffer. The copy is done with a convertor generated from proc,
* datatype, and count.
*/
static inline void osc_pt2pt_copy_for_send (void *target, size_t target_len, const void *source, ompi_proc_t *proc,
int count, ompi_datatype_t *datatype)
{
opal_convertor_t convertor;
uint32_t iov_count = 1;
struct iovec iov;
size_t max_data;
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
opal_convertor_copy_and_prepare_for_send(proc->super.proc_convertor, &datatype->super,
count, source, 0, &convertor);
iov.iov_len = target_len;
iov.iov_base = (IOVBASE_TYPE *) target;
opal_convertor_pack(&convertor, &iov, &iov_count, &max_data);
OBJ_DESTRUCT(&convertor);
}
/**
* osc_pt2pt_gc_clean:
*
* @short Release finished PML requests and accumulate buffers.
*
* @long This function exists because it is not possible to free a buffer from
* a request completion callback. We instead put requests and buffers on the
* module's garbage collection lists and release then at a later time.
*/
static inline void osc_pt2pt_gc_clean (ompi_osc_pt2pt_module_t *module)
{
opal_list_item_t *item;
OPAL_THREAD_LOCK(&module->gc_lock);
while (NULL != (item = opal_list_remove_first (&module->buffer_gc))) {
OBJ_RELEASE(item);
}
OPAL_THREAD_UNLOCK(&module->gc_lock);
}
static inline void osc_pt2pt_gc_add_buffer (ompi_osc_pt2pt_module_t *module, opal_list_item_t *buffer)
{
OPAL_THREAD_SCOPED_LOCK(&module->gc_lock,
opal_list_append (&module->buffer_gc, buffer));
}
static inline void osc_pt2pt_add_pending (ompi_osc_pt2pt_pending_t *pending)
{
OPAL_THREAD_SCOPED_LOCK(&mca_osc_pt2pt_component.pending_operations_lock,
opal_list_append (&mca_osc_pt2pt_component.pending_operations, &pending->super));
}
#define OSC_PT2PT_FRAG_TAG 0x10000
#define OSC_PT2PT_FRAG_MASK 0x0ffff
/**
* get_tag:
*
* @short Get a send/recv base tag for large memory operations.
*
* @param[in] module - OSC PT2PT module
*
* @long This function acquires a 16-bit tag for use with large memory operations. The
* tag will be odd or even depending on if this is in a passive target access
* or not. An actual tag that will be passed to PML send/recv function is given
* by tag_to_target or tag_to_origin function depending on the communication
* direction.
*/
static inline int get_tag(ompi_osc_pt2pt_module_t *module)
{
/* the LSB of the tag is used be the receiver to determine if the
message is a passive or active target (ie, where to mark
completion). */
int32_t tmp = OPAL_THREAD_ADD_FETCH32((opal_atomic_int32_t *) &module->tag_counter, 4);
return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch);
}
/**
* tag_to_target:
*
* @short Get a tag used for PML send/recv communication from an origin to a target.
*
* @param[in] tag - base tag given by get_tag function.
*/
static inline int tag_to_target(int tag)
{
/* (returned_tag >> 1) & 0x1 == 0 */
return tag + 0;
}
/**
* tag_to_origin:
*
* @short Get a tag used for PML send/recv communication from a target to an origin.
*
* @param[in] tag - base tag given by get_tag function.
*/
static inline int tag_to_origin(int tag)
{
/* (returned_tag >> 1) & 0x1 == 1 */
return tag + 2;
}
/**
* ompi_osc_pt2pt_accumulate_lock:
*
* @short Internal function that spins until the accumulation lock has
* been aquired.
*
* @param[in] module - OSC PT2PT module
*
* @returns 0
*
* @long This functions blocks until the accumulation lock has been aquired. This
* behavior is only acceptable from a user-level call as blocking in a
* callback may cause deadlock. If a callback needs the accumulate lock and
* it is not available it should be placed on the pending_acc list of the
* module. It will be released by ompi_osc_pt2pt_accumulate_unlock().
*/
static inline int ompi_osc_pt2pt_accumulate_lock (ompi_osc_pt2pt_module_t *module)
{
while (opal_atomic_trylock (&module->accumulate_lock)) {
opal_progress ();
}
return 0;
}
/**
* ompi_osc_pt2pt_accumulate_trylock:
*
* @short Try to aquire the accumulation lock.
*
* @param[in] module - OSC PT2PT module
*
* @returns 0 if the accumulation lock was aquired
* @returns 1 if the lock was not available
*
* @long This function will try to aquire the accumulation lock. This function
* is safe to call from a callback.
*/
static inline int ompi_osc_pt2pt_accumulate_trylock (ompi_osc_pt2pt_module_t *module)
{
return opal_atomic_trylock (&module->accumulate_lock);
}
/**
* @brief check if this process has this process is in a passive target access epoch
*
* @param[in] module osc pt2pt module
*/
static inline bool ompi_osc_pt2pt_in_passive_epoch (ompi_osc_pt2pt_module_t *module)
{
return 0 != module->passive_target_access_epoch;
}
/**
* ompi_osc_pt2pt_accumulate_unlock:
*
* @short Unlock the accumulation lock and release a pending accumulation operation.
*
* @param[in] module - OSC PT2PT module
*
* @long This function unlocks the accumulation lock and release a single pending
* accumulation operation if one exists. This function may be called recursively.
*/
static inline void ompi_osc_pt2pt_accumulate_unlock (ompi_osc_pt2pt_module_t *module)
{
opal_atomic_unlock (&module->accumulate_lock);
if (0 != opal_list_get_size (&module->pending_acc)) {
ompi_osc_pt2pt_progress_pending_acc (module);
}
}
/**
* Find the first outstanding lock of the target.
*
* @param[in] module osc pt2pt module
* @param[in] target target rank
* @param[out] peer peer object associated with the target
*
* @returns an outstanding lock on success
*
* This function looks for an outstanding lock to the target. If a lock exists it is returned.
*/
static inline ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_module_lock_find (ompi_osc_pt2pt_module_t *module, int target,
ompi_osc_pt2pt_peer_t **peer)
{
ompi_osc_pt2pt_sync_t *outstanding_lock = NULL;
(void) opal_hash_table_get_value_uint32 (&module->outstanding_locks, (uint32_t) target, (void **) &outstanding_lock);
if (NULL != outstanding_lock && peer) {
*peer = outstanding_lock->peer_list.peer;
}
return outstanding_lock;
}
/**
* Add an outstanding lock
*
* @param[in] module osc pt2pt module
* @param[in] lock lock object
*
* This function inserts a lock object to the list of outstanding locks. The caller must be holding the module
* lock.
*/
static inline void ompi_osc_pt2pt_module_lock_insert (struct ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock)
{
(void) opal_hash_table_set_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target, (void *) lock);
}
/**
* Remove an outstanding lock
*
* @param[in] module osc pt2pt module
* @param[in] lock lock object
*
* This function removes a lock object to the list of outstanding locks. The caller must be holding the module
* lock.
*/
static inline void ompi_osc_pt2pt_module_lock_remove (struct ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock)
{
(void) opal_hash_table_remove_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target);
}
/**
* Lookup a synchronization object associated with the target
*
* @param[in] module osc pt2pt module
* @param[in] target target rank
* @param[out] peer peer object
*
* @returns NULL if the target is not locked, fenced, or part of a pscw sync
* @returns synchronization object on success
*
* This function returns the synchronization object associated with an access epoch for
* the target. If the target is not part of any current access epoch then NULL is returned.
*/
static inline ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_module_sync_lookup (ompi_osc_pt2pt_module_t *module, int target,
struct ompi_osc_pt2pt_peer_t **peer)
{
ompi_osc_pt2pt_peer_t *tmp;
if (NULL == peer) {
peer = &tmp;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc/pt2pt: looking for synchronization object for target %d", target));
switch (module->all_sync.type) {
case OMPI_OSC_PT2PT_SYNC_TYPE_NONE:
if (!module->no_locks) {
return ompi_osc_pt2pt_module_lock_find (module, target, peer);
}
return NULL;
case OMPI_OSC_PT2PT_SYNC_TYPE_FENCE:
case OMPI_OSC_PT2PT_SYNC_TYPE_LOCK:
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc/pt2pt: found fence/lock_all access epoch for target %d", target));
/* fence epoch is now active */
module->all_sync.epoch_active = true;
*peer = ompi_osc_pt2pt_peer_lookup (module, target);
if (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type && !ompi_osc_pt2pt_peer_locked (*peer)) {
(void) ompi_osc_pt2pt_lock_remote (module, target, &module->all_sync);
}
return &module->all_sync;
case OMPI_OSC_PT2PT_SYNC_TYPE_PSCW:
if (ompi_osc_pt2pt_sync_pscw_peer (module, target, peer)) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc/pt2pt: found PSCW access epoch target for %d", target));
return &module->all_sync;
}
}
return NULL;
}
/**
* @brief check if an access epoch is active
*
* @param[in] module osc pt2pt module
*
* @returns true if any type of access epoch is active
* @returns false otherwise
*
* This function is used to check for conflicting access epochs.
*/
static inline bool ompi_osc_pt2pt_access_epoch_active (ompi_osc_pt2pt_module_t *module)
{
return (module->all_sync.epoch_active || ompi_osc_pt2pt_in_passive_epoch (module));
}
static inline bool ompi_osc_pt2pt_peer_sends_active (ompi_osc_pt2pt_module_t *module, int rank)
{
ompi_osc_pt2pt_sync_t *sync;
ompi_osc_pt2pt_peer_t *peer;
sync = ompi_osc_pt2pt_module_sync_lookup (module, rank, &peer);
if (!sync) {
return false;
}
return sync->eager_send_active || ompi_osc_pt2pt_peer_eager_active (peer);
}
END_C_DECLS
#endif /* OMPI_OSC_PT2PT_H */

Просмотреть файл

@ -1,616 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010-2016 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_pt2pt.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_data_move.h"
#include "osc_pt2pt_frag.h"
#include "mpi.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/threads/mutex.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/osc/base/base.h"
/**
* compare_ranks:
*
* @param[in] ptra Pointer to integer item
* @param[in] ptrb Pointer to integer item
*
* @returns 0 if *ptra == *ptrb
* @returns -1 if *ptra < *ptrb
* @returns 1 otherwise
*
* This function is used to sort the rank list. It can be removed if
* groups are always in order.
*/
static int compare_ranks (const void *ptra, const void *ptrb)
{
int a = *((int *) ptra);
int b = *((int *) ptrb);
if (a < b) {
return -1;
} else if (a > b) {
return 1;
}
return 0;
}
/**
* ompi_osc_pt2pt_get_comm_ranks:
*
* @param[in] module - OSC PT2PT module
* @param[in] sub_group - Group with ranks to translate
*
* @returns an array of translated ranks on success or NULL on failure
*
* Translate the ranks given in {sub_group} into ranks in the
* communicator used to create {module}.
*/
static ompi_osc_pt2pt_peer_t **ompi_osc_pt2pt_get_peers (ompi_osc_pt2pt_module_t *module, ompi_group_t *sub_group)
{
int size = ompi_group_size(sub_group);
ompi_osc_pt2pt_peer_t **peers;
int *ranks1, *ranks2;
int ret;
ranks1 = calloc (size, sizeof(int));
ranks2 = calloc (size, sizeof(int));
peers = calloc (size, sizeof (ompi_osc_pt2pt_peer_t *));
if (NULL == ranks1 || NULL == ranks2 || NULL == peers) {
free (ranks1);
free (ranks2);
free (peers);
return NULL;
}
for (int i = 0 ; i < size ; ++i) {
ranks1[i] = i;
}
ret = ompi_group_translate_ranks (sub_group, size, ranks1, module->comm->c_local_group,
ranks2);
free (ranks1);
if (OMPI_SUCCESS != ret) {
free (ranks2);
free (peers);
return NULL;
}
qsort (ranks2, size, sizeof (int), compare_ranks);
for (int i = 0 ; i < size ; ++i) {
peers[i] = ompi_osc_pt2pt_peer_lookup (module, ranks2[i]);
OBJ_RETAIN(peers[i]);
}
free (ranks2);
return peers;
}
static void ompi_osc_pt2pt_release_peers (ompi_osc_pt2pt_peer_t **peers, int npeers)
{
if (peers) {
for (int i = 0 ; i < npeers ; ++i) {
OBJ_RELEASE(peers[i]);
}
free (peers);
}
}
int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
uint32_t incoming_reqs;
int ret = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: fence start"));
/* can't enter an active target epoch when in a passive target epoch */
if (ompi_osc_pt2pt_in_passive_epoch (module)) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: could not enter fence. already in an access epoch"));
return OMPI_ERR_RMA_SYNC;
}
/* active sends are now active (we will close the epoch if NOSUCCEED is specified) */
if (0 == (assert & MPI_MODE_NOSUCCEED)) {
module->all_sync.type = OMPI_OSC_PT2PT_SYNC_TYPE_FENCE;
module->all_sync.eager_send_active = true;
}
/* short-circuit the noprecede case */
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
module->comm->c_coll->coll_barrier (module->comm, module->comm->c_coll->coll_barrier_module);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: fence end (short circuit)"));
return ret;
}
/* try to start all requests. */
ret = ompi_osc_pt2pt_frag_flush_all(module);
if (OMPI_SUCCESS != ret) {
return ret;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: fence done sending"));
/* find out how much data everyone is going to send us. */
ret = module->comm->c_coll->coll_reduce_scatter_block ((void *) module->epoch_outgoing_frag_count,
&incoming_reqs, 1, MPI_UINT32_T,
MPI_SUM, module->comm,
module->comm->c_coll->coll_reduce_scatter_block_module);
if (OMPI_SUCCESS != ret) {
return ret;
}
OPAL_THREAD_LOCK(&module->lock);
bzero ((void *) module->epoch_outgoing_frag_count, sizeof(uint32_t) * ompi_comm_size(module->comm));
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: fence expects %d requests",
incoming_reqs));
/* set our complete condition for incoming requests */
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -incoming_reqs);
/* wait for completion */
while (module->outgoing_frag_count < 0 || module->active_incoming_frag_count < 0) {
opal_condition_wait(&module->cond, &module->lock);
}
if (assert & MPI_MODE_NOSUCCEED) {
/* as specified in MPI-3 p 438 3-5 the fence can end an epoch. it isn't explicitly
* stated that MPI_MODE_NOSUCCEED ends the epoch but it is a safe assumption. */
ompi_osc_pt2pt_sync_reset (&module->all_sync);
}
module->all_sync.epoch_active = false;
OPAL_THREAD_UNLOCK(&module->lock);
module->comm->c_coll->coll_barrier (module->comm, module->comm->c_coll->coll_barrier_module);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: fence end: %d", ret));
return OMPI_SUCCESS;
}
int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_sync_t *sync = &module->all_sync;
OPAL_THREAD_LOCK(&sync->lock);
/* check if we are already in an access epoch */
if (ompi_osc_pt2pt_access_epoch_active (module)) {
OPAL_THREAD_UNLOCK(&sync->lock);
return OMPI_ERR_RMA_SYNC;
}
/* mark all procs in this group as being in an access epoch */
sync->num_peers = ompi_group_size (group);
sync->sync.pscw.group = group;
/* haven't processed any post messages yet */
sync->sync_expected = sync->num_peers;
/* If the previous epoch was from Fence, then eager_send_active is still
* set to true at this time, but it shoulnd't be true until we get our
* incoming Posts. So reset to 'false' for this new epoch.
*/
sync->eager_send_active = false;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_start entering with group size %d...",
sync->num_peers));
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_PSCW;
/* prevent us from entering a passive-target, fence, or another pscw access epoch until
* the matching complete is called */
sync->epoch_active = true;
/* save the group */
OBJ_RETAIN(group);
if (0 == ompi_group_size (group)) {
/* nothing more to do. this is an empty start epoch */
sync->eager_send_active = true;
OPAL_THREAD_UNLOCK(&sync->lock);
return OMPI_SUCCESS;
}
opal_atomic_wmb ();
/* translate the group ranks into the communicator */
sync->peer_list.peers = ompi_osc_pt2pt_get_peers (module, group);
if (NULL == sync->peer_list.peers) {
OPAL_THREAD_UNLOCK(&sync->lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (!(assert & MPI_MODE_NOCHECK)) {
for (int i = 0 ; i < sync->num_peers ; ++i) {
ompi_osc_pt2pt_peer_t *peer = sync->peer_list.peers[i];
if (ompi_osc_pt2pt_peer_unex (peer)) {
/* the peer already sent a post message for this pscw access epoch */
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"found unexpected post from %d",
peer->rank));
OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
ompi_osc_pt2pt_peer_set_unex (peer, false);
}
}
} else {
sync->sync_expected = 0;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"post messages still needed: %d", sync->sync_expected));
/* if we've already received all the post messages, we can eager
send. Otherwise, eager send will be enabled when
numb_post_messages reaches 0 */
if (0 == sync->sync_expected) {
sync->eager_send_active = true;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_start complete. eager sends active: %d",
sync->eager_send_active));
OPAL_THREAD_UNLOCK(&sync->lock);
return OMPI_SUCCESS;
}
int ompi_osc_pt2pt_complete (ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_sync_t *sync = &module->all_sync;
int my_rank = ompi_comm_rank (module->comm);
ompi_osc_pt2pt_peer_t **peers;
int ret = OMPI_SUCCESS;
ompi_group_t *group;
size_t group_size;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_complete entering..."));
OPAL_THREAD_LOCK(&sync->lock);
if (OMPI_OSC_PT2PT_SYNC_TYPE_PSCW != sync->type) {
OPAL_THREAD_UNLOCK(&sync->lock);
return OMPI_ERR_RMA_SYNC;
}
/* wait for all the post messages */
ompi_osc_pt2pt_sync_wait_nolock (sync);
/* phase 1 cleanup sync object */
group = sync->sync.pscw.group;
group_size = sync->num_peers;
peers = sync->peer_list.peers;
/* need to reset the sync here to avoid processing incorrect post messages */
ompi_osc_pt2pt_sync_reset (sync);
OPAL_THREAD_UNLOCK(&sync->lock);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_complete all posts received. sending complete messages..."));
/* for each process in group, send a control message with number
of updates coming, then start all the requests. Note that the
control send is processed as another message in a fragment, so
this might get queued until the flush_all (which is fine).
At the same time, clean out the outgoing count for the next
round. */
for (size_t i = 0 ; i < group_size ; ++i) {
ompi_osc_pt2pt_header_complete_t complete_req;
int rank = peers[i]->rank;
if (my_rank == rank) {
/* shortcut for self */
osc_pt2pt_incoming_complete (module, rank, 0);
continue;
}
complete_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE;
complete_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
complete_req.frag_count = module->epoch_outgoing_frag_count[rank];
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
#if OPAL_ENABLE_DEBUG
complete_req.padding[0] = 0;
complete_req.padding[1] = 0;
#endif
osc_pt2pt_hton(&complete_req, ompi_comm_peer_lookup (module->comm, rank));
#endif
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, rank);
/* XXX -- TODO -- since fragment are always delivered in order we do not need to count anything but long
* requests. once that is done this can be removed. */
if (peer->active_frag) {
ompi_osc_pt2pt_frag_t *active_frag = (ompi_osc_pt2pt_frag_t *) peer->active_frag;
if (active_frag->remain_len < sizeof (complete_req)) {
++complete_req.frag_count;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_complete sending complete message to %d. frag_count: %u",
rank, complete_req.frag_count));
ret = ompi_osc_pt2pt_control_send (module, rank, &complete_req,
sizeof(ompi_osc_pt2pt_header_complete_t));
if (OMPI_SUCCESS != ret) {
break;
}
ret = ompi_osc_pt2pt_frag_flush_target (module, rank);
if (OMPI_SUCCESS != ret) {
break;
}
/* zero the fragment counts here to ensure they are zerod */
module->epoch_outgoing_frag_count[rank] = 0;
}
if (peers) {
/* release our reference to peers in this group */
ompi_osc_pt2pt_release_peers (peers, group_size);
}
if (OMPI_SUCCESS != ret) {
return ret;
}
OPAL_THREAD_LOCK(&module->lock);
/* wait for outgoing requests to complete. Don't wait for incoming, as
we're only completing the access epoch, not the exposure epoch */
while (module->outgoing_frag_count < 0) {
opal_condition_wait(&module->cond, &module->lock);
}
/* unlock here, as group cleanup can take a while... */
OPAL_THREAD_UNLOCK(&module->lock);
/* phase 2 cleanup group */
OBJ_RELEASE(group);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_complete complete"));
return OMPI_SUCCESS;
}
int ompi_osc_pt2pt_post (ompi_group_t *group, int assert, ompi_win_t *win)
{
int ret = OMPI_SUCCESS;
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_header_post_t post_req;
ompi_osc_pt2pt_peer_t **peers;
/* can't check for all access epoch here due to fence */
if (module->pw_group) {
return OMPI_ERR_RMA_SYNC;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_post entering with group size %d...",
ompi_group_size (group)));
OPAL_THREAD_LOCK(&module->lock);
/* ensure we're not already in a post */
if (NULL != module->pw_group) {
OPAL_THREAD_UNLOCK(&(module->lock));
return OMPI_ERR_RMA_SYNC;
}
/* save the group */
OBJ_RETAIN(group);
module->pw_group = group;
/* Update completion counter. Can't have received any completion
messages yet; complete won't send a completion header until
we've sent a post header. */
module->num_complete_msgs = -ompi_group_size(module->pw_group);
OPAL_THREAD_UNLOCK(&(module->lock));
if ((assert & MPI_MODE_NOCHECK) || 0 == ompi_group_size (group)) {
return OMPI_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"sending post messages"));
/* translate group ranks into the communicator */
peers = ompi_osc_pt2pt_get_peers (module, module->pw_group);
if (OPAL_UNLIKELY(NULL == peers)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* send a hello counter to everyone in group */
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
ompi_osc_pt2pt_peer_t *peer = peers[i];
int rank = peer->rank;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", rank));
ompi_proc_t *proc = ompi_comm_peer_lookup (module->comm, rank);
/* shortcut for self */
if (ompi_proc_local() == proc) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self post"));
osc_pt2pt_incoming_post (module, ompi_comm_rank(module->comm));
continue;
}
post_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_POST;
post_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
osc_pt2pt_hton(&post_req, proc);
/* we don't want to send any data, since we're the exposure
epoch only, so use an unbuffered send */
ret = ompi_osc_pt2pt_control_send_unbuffered(module, rank, &post_req,
sizeof(ompi_osc_pt2pt_header_post_t));
if (OMPI_SUCCESS != ret) {
break;
}
}
ompi_osc_pt2pt_release_peers (peers, ompi_group_size(module->pw_group));
return ret;
}
int ompi_osc_pt2pt_wait (ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_group_t *group;
if (NULL == module->pw_group) {
return OMPI_ERR_RMA_SYNC;
}
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_wait entering... module %p", (void *) module));
OPAL_THREAD_LOCK(&module->lock);
while (0 != module->num_complete_msgs || module->active_incoming_frag_count < 0) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "module %p, num_complete_msgs = %d, "
"active_incoming_frag_count = %d", (void *) module, module->num_complete_msgs,
module->active_incoming_frag_count));
opal_condition_wait(&module->cond, &module->lock);
}
group = module->pw_group;
module->pw_group = NULL;
OPAL_THREAD_UNLOCK(&module->lock);
OBJ_RELEASE(group);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_wait complete"));
return OMPI_SUCCESS;
}
int ompi_osc_pt2pt_test (ompi_win_t *win, int *flag)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_group_t *group;
int ret = OMPI_SUCCESS;
#if !OPAL_ENABLE_PROGRESS_THREADS
opal_progress();
#endif
if (NULL == module->pw_group) {
return OMPI_ERR_RMA_SYNC;
}
OPAL_THREAD_LOCK(&(module->lock));
if (0 != module->num_complete_msgs || module->active_incoming_frag_count < 0) {
*flag = 0;
} else {
*flag = 1;
group = module->pw_group;
module->pw_group = NULL;
OBJ_RELEASE(group);
}
OPAL_THREAD_UNLOCK(&(module->lock));
return ret;
}
void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, int frag_count)
{
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: process_complete got complete message from %d. expected fragment count %d. "
"current incomming count: %d. expected complete msgs: %d", source,
frag_count, module->active_incoming_frag_count, module->num_complete_msgs));
/* the current fragment is not part of the frag_count so we need to add it here */
OPAL_THREAD_ADD_FETCH32(&module->active_incoming_frag_count, -frag_count);
/* make sure the signal count is written before changing the complete message count */
opal_atomic_wmb ();
if (0 == OPAL_THREAD_ADD_FETCH32(&module->num_complete_msgs, 1)) {
OPAL_THREAD_LOCK(&module->lock);
opal_condition_broadcast (&module->cond);
OPAL_THREAD_UNLOCK(&module->lock);
}
}
void osc_pt2pt_incoming_post (ompi_osc_pt2pt_module_t *module, int source)
{
ompi_osc_pt2pt_sync_t *sync = &module->all_sync;
OPAL_THREAD_LOCK(&sync->lock);
/* verify that this proc is part of the current start group */
if (!ompi_osc_pt2pt_sync_pscw_peer (module, source, NULL)) {
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"received unexpected post message from %d for future PSCW synchronization",
source));
ompi_osc_pt2pt_peer_set_unex (peer, true);
OPAL_THREAD_UNLOCK(&sync->lock);
} else {
OPAL_THREAD_UNLOCK(&sync->lock);
ompi_osc_pt2pt_sync_expected (sync);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"received post message for PSCW synchronization. post messages still needed: %d",
sync->sync_expected));
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,522 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/util/show_help.h"
#include "opal/util/printf.h"
#include <string.h>
#include "osc_pt2pt.h"
#include "osc_pt2pt_frag.h"
#include "osc_pt2pt_request.h"
#include "osc_pt2pt_data_move.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
static int component_register(void);
static int component_init(bool enable_progress_threads, bool enable_mpi_threads);
static int component_finalize(void);
static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor);
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor, int *model);
ompi_osc_pt2pt_component_t mca_osc_pt2pt_component = {
{ /* ompi_osc_base_component_t */
.osc_version = {
OMPI_OSC_BASE_VERSION_3_0_0,
.mca_component_name = "pt2pt",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
.mca_register_component_params = component_register,
},
.osc_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
.osc_init = component_init,
.osc_query = component_query,
.osc_select = component_select,
.osc_finalize = component_finalize,
}
};
ompi_osc_pt2pt_module_t ompi_osc_pt2pt_module_template = {
.super = {
.osc_win_attach = ompi_osc_pt2pt_attach,
.osc_win_detach = ompi_osc_pt2pt_detach,
.osc_free = ompi_osc_pt2pt_free,
.osc_put = ompi_osc_pt2pt_put,
.osc_get = ompi_osc_pt2pt_get,
.osc_accumulate = ompi_osc_pt2pt_accumulate,
.osc_compare_and_swap = ompi_osc_pt2pt_compare_and_swap,
.osc_fetch_and_op = ompi_osc_pt2pt_fetch_and_op,
.osc_get_accumulate = ompi_osc_pt2pt_get_accumulate,
.osc_rput = ompi_osc_pt2pt_rput,
.osc_rget = ompi_osc_pt2pt_rget,
.osc_raccumulate = ompi_osc_pt2pt_raccumulate,
.osc_rget_accumulate = ompi_osc_pt2pt_rget_accumulate,
.osc_fence = ompi_osc_pt2pt_fence,
.osc_start = ompi_osc_pt2pt_start,
.osc_complete = ompi_osc_pt2pt_complete,
.osc_post = ompi_osc_pt2pt_post,
.osc_wait = ompi_osc_pt2pt_wait,
.osc_test = ompi_osc_pt2pt_test,
.osc_lock = ompi_osc_pt2pt_lock,
.osc_unlock = ompi_osc_pt2pt_unlock,
.osc_lock_all = ompi_osc_pt2pt_lock_all,
.osc_unlock_all = ompi_osc_pt2pt_unlock_all,
.osc_sync = ompi_osc_pt2pt_sync,
.osc_flush = ompi_osc_pt2pt_flush,
.osc_flush_all = ompi_osc_pt2pt_flush_all,
.osc_flush_local = ompi_osc_pt2pt_flush_local,
.osc_flush_local_all = ompi_osc_pt2pt_flush_local_all,
}
};
bool ompi_osc_pt2pt_no_locks = false;
static bool using_thread_multiple = false;
/* look up parameters for configuring this window. The code first
looks in the info structure passed by the user, then through mca
parameters. */
static bool check_config_value_bool(char *key, opal_info_t *info, bool result)
{
int flag;
(void) opal_info_get_bool (info, key, &result, &flag);
return result;
}
static int component_register (void)
{
ompi_osc_pt2pt_no_locks = false;
(void) mca_base_component_var_register(&mca_osc_pt2pt_component.super.osc_version,
"no_locks",
"Enable optimizations available only if MPI_LOCK is "
"not used. "
"Info key of same name overrides this value.",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_osc_pt2pt_no_locks);
mca_osc_pt2pt_component.buffer_size = 8192;
(void) mca_base_component_var_register (&mca_osc_pt2pt_component.super.osc_version, "buffer_size",
"Data transfers smaller than this limit may be coalesced before "
"being transferred (default: 8k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_osc_pt2pt_component.buffer_size);
mca_osc_pt2pt_component.receive_count = 4;
(void) mca_base_component_var_register (&mca_osc_pt2pt_component.super.osc_version, "receive_count",
"Number of receives to post for each window for incoming fragments "
"(default: 4)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_pt2pt_component.receive_count);
return OMPI_SUCCESS;
}
static int component_progress (void)
{
int completed = 0;
int pending_count = opal_list_get_size (&mca_osc_pt2pt_component.pending_operations);
int recv_count = opal_list_get_size (&mca_osc_pt2pt_component.pending_receives);
ompi_osc_pt2pt_pending_t *pending, *next;
if (recv_count) {
for (int i = 0 ; i < recv_count ; ++i) {
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.pending_receives_lock);
ompi_osc_pt2pt_receive_t *recv = (ompi_osc_pt2pt_receive_t *) opal_list_remove_first (&mca_osc_pt2pt_component.pending_receives);
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.pending_receives_lock);
if (NULL == recv) {
break;
}
(void) ompi_osc_pt2pt_process_receive (recv);
completed++;
}
}
/* process one incoming request */
if (pending_count) {
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.pending_operations_lock);
OPAL_LIST_FOREACH_SAFE(pending, next, &mca_osc_pt2pt_component.pending_operations, ompi_osc_pt2pt_pending_t) {
int ret;
switch (pending->header.base.type) {
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ:
ret = ompi_osc_pt2pt_process_flush (pending->module, pending->source,
&pending->header.flush);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ:
ret = ompi_osc_pt2pt_process_unlock (pending->module, pending->source,
&pending->header.unlock);
break;
default:
/* shouldn't happen */
assert (0);
abort ();
}
if (OMPI_SUCCESS == ret) {
opal_list_remove_item (&mca_osc_pt2pt_component.pending_operations, &pending->super);
OBJ_RELEASE(pending);
completed++;
}
}
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.pending_operations_lock);
}
return completed;
}
static int
component_init(bool enable_progress_threads,
bool enable_mpi_threads)
{
int ret;
if (enable_mpi_threads) {
using_thread_multiple = true;
}
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_operations, opal_list_t);
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_operations_lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_receives, opal_list_t);
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_receives_lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.modules,
opal_hash_table_t);
opal_hash_table_init(&mca_osc_pt2pt_component.modules, 2);
mca_osc_pt2pt_component.progress_enable = false;
mca_osc_pt2pt_component.module_count = 0;
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.frags, opal_free_list_t);
ret = opal_free_list_init (&mca_osc_pt2pt_component.frags,
sizeof(ompi_osc_pt2pt_frag_t), 8,
OBJ_CLASS(ompi_osc_pt2pt_frag_t),
mca_osc_pt2pt_component.buffer_size +
sizeof (ompi_osc_pt2pt_frag_header_t),
8, 1, -1, 1, NULL, 0, NULL, NULL, NULL);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: opal_free_list_init failed: %d",
__FILE__, __LINE__, ret);
return ret;
}
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.requests, opal_free_list_t);
ret = opal_free_list_init (&mca_osc_pt2pt_component.requests,
sizeof(ompi_osc_pt2pt_request_t), 8,
OBJ_CLASS(ompi_osc_pt2pt_request_t),
0, 0, 0, -1, 32, NULL, 0, NULL, NULL, NULL);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: opal_free_list_init failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
return ret;
}
int
component_finalize(void)
{
size_t num_modules;
if (mca_osc_pt2pt_component.progress_enable) {
opal_progress_unregister (component_progress);
}
if (0 !=
(num_modules = opal_hash_table_get_size(&mca_osc_pt2pt_component.modules))) {
opal_output(ompi_osc_base_framework.framework_output,
"WARNING: There were %d Windows created but not freed.",
(int) num_modules);
}
OBJ_DESTRUCT(&mca_osc_pt2pt_component.frags);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.modules);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.lock);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.requests);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_operations);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_operations_lock);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_receives);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_receives_lock);
return OMPI_SUCCESS;
}
static int
component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor)
{
if (MPI_WIN_FLAVOR_SHARED == flavor) return -1;
return 10;
}
static int
component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor, int *model)
{
ompi_osc_pt2pt_module_t *module = NULL;
int ret;
char *name;
/* We don't support shared windows; that's for the sm onesided
component */
if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED;
/*
* workaround for issue https://github.com/open-mpi/ompi/issues/2614
* The following check needs to be removed once 2614 is addressed.
*/
if (using_thread_multiple) {
opal_show_help("help-osc-pt2pt.txt", "mpi-thread-multiple-not-supported", true);
return OMPI_ERR_NOT_SUPPORTED;
}
/* create module structure with all fields initialized to zero */
module = (ompi_osc_pt2pt_module_t*)
calloc(1, sizeof(ompi_osc_pt2pt_module_t));
if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
/* fill in the function pointer part */
memcpy(module, &ompi_osc_pt2pt_module_template,
sizeof(ompi_osc_base_module_t));
/* initialize the objects, so that always free in cleanup */
OBJ_CONSTRUCT(&module->lock, opal_recursive_mutex_t);
OBJ_CONSTRUCT(&module->cond, opal_condition_t);
OBJ_CONSTRUCT(&module->locks_pending, opal_list_t);
OBJ_CONSTRUCT(&module->locks_pending_lock, opal_mutex_t);
OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
OBJ_CONSTRUCT(&module->pending_acc, opal_list_t);
OBJ_CONSTRUCT(&module->pending_acc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&module->buffer_gc, opal_list_t);
OBJ_CONSTRUCT(&module->gc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&module->all_sync, ompi_osc_pt2pt_sync_t);
OBJ_CONSTRUCT(&module->peer_hash, opal_hash_table_t);
OBJ_CONSTRUCT(&module->peer_lock, opal_mutex_t);
ret = opal_hash_table_init (&module->outstanding_locks, 64);
if (OPAL_SUCCESS != ret) {
goto cleanup;
}
ret = opal_hash_table_init (&module->peer_hash, 128);
if (OPAL_SUCCESS != ret) {
goto cleanup;
}
/* options */
/* FIX ME: should actually check this value... */
#if 1
module->accumulate_ordering = 1;
#else
ompi_osc_base_config_value_equal("accumulate_ordering", info, "none");
#endif
/* fill in our part */
if (MPI_WIN_FLAVOR_ALLOCATE == flavor && size) {
module->free_after = *base = malloc(size);
if (NULL == *base) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
}
}
/* in the dynamic case base is MPI_BOTTOM */
if (MPI_WIN_FLAVOR_DYNAMIC != flavor) {
module->baseptr = *base;
}
ret = ompi_comm_dup(comm, &module->comm);
if (OMPI_SUCCESS != ret) goto cleanup;
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
"pt2pt component creating window with id %d",
ompi_comm_get_cid(module->comm)));
/* record my displacement unit. Always resolved at target */
module->disp_unit = disp_unit;
/* peer op count data */
module->epoch_outgoing_frag_count = calloc (ompi_comm_size(comm), sizeof(uint32_t));
if (NULL == module->epoch_outgoing_frag_count) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
}
/* the statement below (from Brian) does not seem correct so disable active target on the
* window. if this end up being incorrect please revert this one change */
#if 0
/* initially, we're in that pseudo-fence state, so we allow eager
sends (yay for Fence). Other protocols will disable before
they start their epochs, so this isn't a problem. */
module->all_sync.type = OMPI_OSC_PT2PT_SYNC_TYPE_FENCE;
module->all_sync.eager_send_active = true;
#endif
/* lock data */
module->no_locks = check_config_value_bool ("no_locks", info, ompi_osc_pt2pt_no_locks);
/* update component data */
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.lock);
ret = opal_hash_table_set_value_uint32(&mca_osc_pt2pt_component.modules,
ompi_comm_get_cid(module->comm),
module);
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.lock);
if (OMPI_SUCCESS != ret) goto cleanup;
/* fill in window information */
*model = MPI_WIN_UNIFIED;
win->w_osc_module = (ompi_osc_base_module_t*) module;
opal_asprintf(&name, "pt2pt window %d", ompi_comm_get_cid(module->comm));
ompi_win_set_name(win, name);
free(name);
/* sync memory - make sure all initialization completed */
opal_atomic_mb();
ret = ompi_osc_pt2pt_frag_start_receive (module);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
goto cleanup;
}
/* barrier to prevent arrival of lock requests before we're
fully created */
ret = module->comm->c_coll->coll_barrier(module->comm,
module->comm->c_coll->coll_barrier_module);
if (OMPI_SUCCESS != ret) goto cleanup;
if (!mca_osc_pt2pt_component.progress_enable) {
opal_progress_register (component_progress);
mca_osc_pt2pt_component.progress_enable = true;
}
if (module->no_locks) {
win->w_flags |= OMPI_WIN_NO_LOCKS;
}
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
"done creating pt2pt window %d", ompi_comm_get_cid(module->comm)));
return OMPI_SUCCESS;
cleanup:
/* set the module so we properly cleanup */
win->w_osc_module = (ompi_osc_base_module_t*) module;
ompi_osc_pt2pt_free (win);
return ret;
}
int
ompi_osc_pt2pt_set_info(struct ompi_win_t *win, struct opal_info_t *info)
{
ompi_osc_pt2pt_module_t *module =
(ompi_osc_pt2pt_module_t*) win->w_osc_module;
/* enforce collectiveness... */
return module->comm->c_coll->coll_barrier(module->comm,
module->comm->c_coll->coll_barrier_module);
}
int
ompi_osc_pt2pt_get_info(struct ompi_win_t *win, struct opal_info_t **info_used)
{
opal_info_t *info = OBJ_NEW(opal_info_t);
if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
*info_used = info;
return OMPI_SUCCESS;
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_pending_t, opal_list_item_t, NULL, NULL);
static void ompi_osc_pt2pt_receive_construct (ompi_osc_pt2pt_receive_t *recv)
{
recv->buffer = NULL;
recv->pml_request = NULL;
}
static void ompi_osc_pt2pt_receive_destruct (ompi_osc_pt2pt_receive_t *recv)
{
free (recv->buffer);
if (recv->pml_request && MPI_REQUEST_NULL != recv->pml_request) {
recv->pml_request->req_complete_cb = NULL;
ompi_request_cancel (recv->pml_request);
ompi_request_free (&recv->pml_request);
}
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_receive_t, opal_list_item_t,
ompi_osc_pt2pt_receive_construct,
ompi_osc_pt2pt_receive_destruct);
static void ompi_osc_pt2pt_peer_construct (ompi_osc_pt2pt_peer_t *peer)
{
OBJ_CONSTRUCT(&peer->queued_frags, opal_list_t);
OBJ_CONSTRUCT(&peer->lock, opal_mutex_t);
peer->active_frag = 0;
peer->passive_incoming_frag_count = 0;
peer->flags = 0;
}
static void ompi_osc_pt2pt_peer_destruct (ompi_osc_pt2pt_peer_t *peer)
{
OBJ_DESTRUCT(&peer->queued_frags);
OBJ_DESTRUCT(&peer->lock);
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_peer_t, opal_object_t,
ompi_osc_pt2pt_peer_construct,
ompi_osc_pt2pt_peer_destruct);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,159 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MCA_OSC_PT2PT_DATA_MOVE_H
#define OMPI_MCA_OSC_PT2PT_DATA_MOVE_H
#include "osc_pt2pt_header.h"
int ompi_osc_pt2pt_control_send(ompi_osc_pt2pt_module_t *module,
int target,
void *data,
size_t len);
/**
* ompi_osc_pt2pt_control_send_unbuffered:
*
* @short Send an unbuffered control message to a peer.
*
* @param[in] module - OSC PT2PT module
* @param[in] target - Target rank
* @param[in] data - Data to send
* @param[in] len - Length of data
*
* @long Directly send a control message. This does not allocate a
* fragment, so should only be used when sending other messages would
* be erroneous (such as complete messages, when there may be queued
* transactions from an overlapping post that has already heard back
* from its peer). The buffer specified by data will be available
* when this call returns.
*/
int ompi_osc_pt2pt_control_send_unbuffered (ompi_osc_pt2pt_module_t *module,
int target, void *data, size_t len);
/**
* ompi_osc_pt2pt_isend_w_cb:
*
* @short Post a non-blocking send with a specified callback.
*
* @param[in] ptr - Source buffer. Will be available when the callback fires
* @param[in] count - Number of elements to send
* @param[in] datatype - Datatype of elements
* @param[in] source - Ranks to send data to
* @param[in] tag - Tag to use
* @param[in] comm - Communicator for communicating with rank
* @param[in] cb - Function to call when the request is complete
* @param[in] ctx - Context to store in new request for callback
*
* @long This function posts a new send request. Upon completion the function cb will
* be called with the associated request. The context specified in ctx will be stored in
* the req_completion_cb_data member of the ompi_request_t for use by the callback.
*/
int ompi_osc_pt2pt_isend_w_cb (const void *ptr, int count, ompi_datatype_t *datatype, int target, int tag,
ompi_communicator_t *comm, ompi_request_complete_fn_t cb, void *ctx);
/**
* ompi_osc_pt2pt_irecv_w_cb:
*
* @short Post a non-blocking receive with a specified callback.
*
* @param[inout] ptr - Destination for incoming data
* @param[in] count - Number of elements to receive
* @param[in] datatype - Datatype of elements
* @param[in] source - Ranks to receive data from
* @param[in] tag - Tag to use
* @param[in] comm - Communicator for communicating with rank
* @param[in] request_out - Location to store new receive request (may be NULL)
* @param[in] cb - Function to call when the request is complete
* @param[in] ctx - Context to store in new request for callback
*
* @long This function posts a new request and stores the request in request_out if
* provided. Upon completion the function cb will be called with the associated
* request. The context specified in ctx will be stored in the req_completion_cb_data
* member of the ompi_request_t for use by the callback.
*/
int ompi_osc_pt2pt_irecv_w_cb (void *ptr, int count, ompi_datatype_t *datatype, int source, int tag,
ompi_communicator_t *comm, ompi_request_t **request_out,
ompi_request_complete_fn_t cb, void *ctx);
int ompi_osc_pt2pt_process_lock(ompi_osc_pt2pt_module_t* module,
int source,
struct ompi_osc_pt2pt_header_lock_t* lock_header);
void ompi_osc_pt2pt_process_lock_ack(ompi_osc_pt2pt_module_t* module,
struct ompi_osc_pt2pt_header_lock_ack_t* lock_header);
int ompi_osc_pt2pt_process_unlock(ompi_osc_pt2pt_module_t* module,
int source,
struct ompi_osc_pt2pt_header_unlock_t* lock_header);
int ompi_osc_pt2pt_process_flush (ompi_osc_pt2pt_module_t *module, int source,
ompi_osc_pt2pt_header_flush_t *flush_header);
/**
* ompi_osc_pt2pt_process_unlock_ack:
*
* @short Process an incoming unlock acknowledgement.
*
* @param[in] module - OSC PT2PT module
* @param[in] source - Source rank
* @param[in] unlock_ack_header - Incoming unlock ack header
*/
void ompi_osc_pt2pt_process_unlock_ack (ompi_osc_pt2pt_module_t *module, int source,
ompi_osc_pt2pt_header_unlock_ack_t *unlock_ack_header);
/**
* ompi_osc_pt2pt_process_flush_ack:
*
* @short Process an incoming flush acknowledgement.
*
* @param[in] module - OSC PT2PT module
* @param[in] source - Source rank
* @param[in] flush_ack_header - Incoming flush ack header
*/
void ompi_osc_pt2pt_process_flush_ack (ompi_osc_pt2pt_module_t *module, int source,
ompi_osc_pt2pt_header_flush_ack_t *flush_ack_header);
/**
* ompi_osc_pt2pt_frag_start_receive:
*
* @short Start receiving fragments on the OSC module.
*
* @param[in] module - OSC module
*
* @long This function starts receiving eager fragments on the module. The current
* implementation uses the pml to transfer eager fragments.
*/
int ompi_osc_pt2pt_frag_start_receive (ompi_osc_pt2pt_module_t *module);
/**
* ompi_osc_pt2pt_process_receive:
*
* @short Report a receive request
*
* @param[in] recv - Receive structure
*
* @long This function reposts a receive request. This function should not be called from
* a pml request callback as it can lead to deep recursion during heavy load.
*/
int ompi_osc_pt2pt_process_receive (ompi_osc_pt2pt_receive_t *recv);
#endif

Просмотреть файл

@ -1,198 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017-2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "osc_pt2pt.h"
#include "osc_pt2pt_frag.h"
#include "osc_pt2pt_data_move.h"
static void ompi_osc_pt2pt_frag_constructor (ompi_osc_pt2pt_frag_t *frag)
{
frag->buffer = frag->super.ptr;
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_frag_t, opal_free_list_item_t,
ompi_osc_pt2pt_frag_constructor, NULL);
static int frag_send_cb (ompi_request_t *request)
{
ompi_osc_pt2pt_frag_t *frag =
(ompi_osc_pt2pt_frag_t*) request->req_complete_cb_data;
ompi_osc_pt2pt_module_t *module = frag->module;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: frag_send complete to %d, frag = %p, request = %p",
frag->target, (void *) frag, (void *) request));
mark_outgoing_completion(module);
opal_free_list_return (&mca_osc_pt2pt_component.frags, &frag->super);
ompi_request_free (&request);
return 1;
}
static int frag_send (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *frag)
{
int count;
count = (int)((uintptr_t) frag->top - (uintptr_t) frag->buffer);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: frag_send called to %d, frag = %p, count = %d",
frag->target, (void *) frag, count));
OSC_PT2PT_HTON(frag->header, module, frag->target);
return ompi_osc_pt2pt_isend_w_cb (frag->buffer, count, MPI_BYTE, frag->target, OSC_PT2PT_FRAG_TAG,
module->comm, frag_send_cb, frag);
}
int ompi_osc_pt2pt_frag_start (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_frag_t *frag)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, frag->target);
int ret;
assert(0 == frag->pending && peer->active_frag != (intptr_t) frag);
/* we need to signal now that a frag is outgoing to ensure the count sent
* with the unlock message is correct */
ompi_osc_signal_outgoing (module, frag->target, 1);
/* if eager sends are not active, can't send yet, so buffer and
get out... */
if (!ompi_osc_pt2pt_peer_sends_active (module, frag->target) || opal_list_get_size (&peer->queued_frags)) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "queuing fragment to peer %d",
frag->target));
OPAL_THREAD_SCOPED_LOCK(&peer->lock,
opal_list_append(&peer->queued_frags, (opal_list_item_t *) frag));
return OMPI_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "sending fragment to peer %d",
frag->target));
ret = frag_send(module, frag);
opal_condition_broadcast(&module->cond);
return ret;
}
static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_peer_t *peer)
{
ompi_osc_pt2pt_frag_t *active_frag = (ompi_osc_pt2pt_frag_t *) peer->active_frag;
int ret = OMPI_SUCCESS;
if (NULL == active_frag) {
/* nothing to do */
return OMPI_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: flushing active fragment to target %d. pending: %d",
active_frag->target, active_frag->pending));
if (opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, (intptr_t *) &active_frag, 0)) {
if (0 != OPAL_THREAD_ADD_FETCH32(&active_frag->pending, -1)) {
/* communication going on while synchronizing; this is an rma usage bug */
return OMPI_ERR_RMA_SYNC;
}
ompi_osc_signal_outgoing (module, active_frag->target, 1);
ret = frag_send (module, active_frag);
}
return ret;
}
int ompi_osc_pt2pt_frag_flush_pending (ompi_osc_pt2pt_module_t *module, int target)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
ompi_osc_pt2pt_frag_t *frag;
int ret = OMPI_SUCCESS;
/* walk through the pending list and send */
OPAL_THREAD_LOCK(&peer->lock);
while (NULL != (frag = ((ompi_osc_pt2pt_frag_t *) opal_list_remove_first (&peer->queued_frags)))) {
ret = frag_send(module, frag);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
break;
}
}
OPAL_THREAD_UNLOCK(&peer->lock);
return ret;
}
int ompi_osc_pt2pt_frag_flush_pending_all (ompi_osc_pt2pt_module_t *module)
{
int ret = OPAL_SUCCESS;
for (int i = 0 ; i < ompi_comm_size (module->comm) ; ++i) {
ret = ompi_osc_pt2pt_frag_flush_pending (module, i);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
return ret;
}
int ompi_osc_pt2pt_frag_flush_target (ompi_osc_pt2pt_module_t *module, int target)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
int ret = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: frag flush to target target %d. queue fragments: %lu",
target, (unsigned long) opal_list_get_size (&peer->queued_frags)));
ret = ompi_osc_pt2pt_frag_flush_pending (module, target);
if (OMPI_SUCCESS != ret) {
/* XXX -- TODO -- better error handling */
return ret;
}
/* flush the active frag */
ret = ompi_osc_pt2pt_flush_active_frag (module, peer);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: frag flush target %d finished", target));
return ret;
}
int ompi_osc_pt2pt_frag_flush_all (ompi_osc_pt2pt_module_t *module)
{
int ret = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: frag flush all begin"));
/* try to start frags queued to all peers */
for (int i = 0 ; i < ompi_comm_size (module->comm) ; ++i) {
ret = ompi_osc_pt2pt_frag_flush_target (module, i);
if (OMPI_SUCCESS != ret) {
break;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: frag flush all done. ret: %d", ret));
return ret;
}

Просмотреть файл

@ -1,201 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OSC_PT2PT_FRAG_H
#define OSC_PT2PT_FRAG_H
#include "ompi/communicator/communicator.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_request.h"
#include "opal/align.h"
/** Communication buffer for packing messages */
struct ompi_osc_pt2pt_frag_t {
opal_free_list_item_t super;
/* target rank of buffer */
int target;
unsigned char *buffer;
/* space remaining in buffer */
size_t remain_len;
/* start of unused space */
char *top;
/* Number of operations which have started writing into the frag, but not yet completed doing so */
opal_atomic_int32_t pending;
int32_t pending_long_sends;
ompi_osc_pt2pt_frag_header_t *header;
ompi_osc_pt2pt_module_t *module;
};
typedef struct ompi_osc_pt2pt_frag_t ompi_osc_pt2pt_frag_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_frag_t);
int ompi_osc_pt2pt_frag_start(ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *buffer);
int ompi_osc_pt2pt_frag_flush_target(ompi_osc_pt2pt_module_t *module, int target);
int ompi_osc_pt2pt_frag_flush_all(ompi_osc_pt2pt_module_t *module);
int ompi_osc_pt2pt_frag_flush_pending (ompi_osc_pt2pt_module_t *module, int target);
int ompi_osc_pt2pt_frag_flush_pending_all (ompi_osc_pt2pt_module_t *module);
static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_frag_t* buffer)
{
opal_atomic_wmb ();
if (0 == OPAL_THREAD_ADD_FETCH32(&buffer->pending, -1)) {
opal_atomic_mb ();
return ompi_osc_pt2pt_frag_start(module, buffer);
}
return OMPI_SUCCESS;
}
static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_peer_t *peer,
size_t request_len)
{
ompi_osc_pt2pt_frag_t *curr;
/* to ensure ordering flush the buffer on the peer */
curr = (ompi_osc_pt2pt_frag_t *) peer->active_frag;
if (NULL != curr && opal_atomic_compare_exchange_strong_ptr (&peer->active_frag, (intptr_t *) &curr, 0)) {
/* If there's something pending, the pending finish will
start the buffer. Otherwise, we need to start it now. */
int ret = ompi_osc_pt2pt_frag_finish (module, curr);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return NULL;
}
}
curr = (ompi_osc_pt2pt_frag_t *) opal_free_list_get (&mca_osc_pt2pt_component.frags);
if (OPAL_UNLIKELY(NULL == curr)) {
return NULL;
}
curr->target = peer->rank;
curr->header = (ompi_osc_pt2pt_frag_header_t*) curr->buffer;
curr->top = (char*) (curr->header + 1);
curr->remain_len = mca_osc_pt2pt_component.buffer_size;
curr->module = module;
curr->pending = 1;
curr->header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_FRAG;
curr->header->base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
#if OPAL_ENABLE_DEBUG
curr->header->padding[0] = 0;
curr->header->padding[1] = 0;
#endif
if (module->passive_target_access_epoch) {
curr->header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
}
curr->header->source = ompi_comm_rank(module->comm);
curr->header->num_ops = 1;
#if OPAL_ENABLE_DEBUG
curr->header->pad = 0;
#endif
return curr;
}
/*
* Note: this function takes the module lock
*
* buffered sends will cache the fragment on the peer object associated with the
* target. unbuffered-sends will cause the target fragment to be flushed and
* will not be cached on the peer. this causes the fragment to be flushed as
* soon as it is sent. this allows request-based rma fragments to be completed
* so MPI_Test/MPI_Wait/etc will work as expected.
*/
static inline int _ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, int target,
size_t request_len, ompi_osc_pt2pt_frag_t **buffer,
char **ptr, bool long_send, bool buffered)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
ompi_osc_pt2pt_frag_t *curr;
/* osc pt2pt headers can have 64-bit values. these will need to be aligned
* on an 8-byte boundary on some architectures so we up align the allocation
* size here. */
request_len = OPAL_ALIGN(request_len, 8, size_t);
if (request_len > mca_osc_pt2pt_component.buffer_size) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output,
"attempting to allocate buffer for %lu bytes to target %d. long send: %d, "
"buffered: %d", (unsigned long) request_len, target, long_send, buffered));
OPAL_THREAD_LOCK(&module->lock);
if (buffered) {
curr = (ompi_osc_pt2pt_frag_t *) peer->active_frag;
if (NULL == curr || curr->remain_len < request_len || (long_send && curr->pending_long_sends == 32)) {
curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len);
if (OPAL_UNLIKELY(NULL == curr)) {
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
curr->pending_long_sends = long_send;
peer->active_frag = (uintptr_t) curr;
} else {
OPAL_THREAD_ADD_FETCH32(&curr->header->num_ops, 1);
curr->pending_long_sends += long_send;
}
OPAL_THREAD_ADD_FETCH32(&curr->pending, 1);
} else {
curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len);
if (OPAL_UNLIKELY(NULL == curr)) {
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
*ptr = curr->top;
*buffer = curr;
curr->top += request_len;
curr->remain_len -= request_len;
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_SUCCESS;
}
static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, int target,
size_t request_len, ompi_osc_pt2pt_frag_t **buffer,
char **ptr, bool long_send, bool buffered)
{
int ret;
if (request_len > mca_osc_pt2pt_component.buffer_size) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
do {
ret = _ompi_osc_pt2pt_frag_alloc (module, target, request_len , buffer, ptr, long_send, buffered);
if (OPAL_LIKELY(OMPI_SUCCESS == ret || OMPI_ERR_OUT_OF_RESOURCE != ret)) {
break;
}
ompi_osc_pt2pt_frag_flush_pending_all (module);
opal_progress ();
} while (1);
return ret;
}
#endif

Просмотреть файл

@ -1,438 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MCA_OSC_PT2PT_HDR_H
#define OMPI_MCA_OSC_PT2PT_HDR_H
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#include "opal/types.h"
#include "opal/util/arch.h"
enum ompi_osc_pt2pt_hdr_type_t {
OMPI_OSC_PT2PT_HDR_TYPE_PUT = 0x01,
OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG = 0x02,
OMPI_OSC_PT2PT_HDR_TYPE_ACC = 0x03,
OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG = 0x04,
OMPI_OSC_PT2PT_HDR_TYPE_GET = 0x05,
OMPI_OSC_PT2PT_HDR_TYPE_CSWAP = 0x06,
OMPI_OSC_PT2PT_HDR_TYPE_CSWAP_LONG = 0x07,
OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC = 0x08,
OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG = 0x09,
OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE = 0x10,
OMPI_OSC_PT2PT_HDR_TYPE_POST = 0x11,
OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ = 0x12,
OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK = 0x13,
OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ = 0x14,
OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK = 0x15,
OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ = 0x16,
OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK = 0x17,
OMPI_OSC_PT2PT_HDR_TYPE_FRAG = 0x20,
};
typedef enum ompi_osc_pt2pt_hdr_type_t ompi_osc_pt2pt_hdr_type_t;
#define OMPI_OSC_PT2PT_HDR_FLAG_NBO 0x01
#define OMPI_OSC_PT2PT_HDR_FLAG_VALID 0x02
#define OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET 0x04
#define OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE 0x08
struct ompi_osc_pt2pt_header_base_t {
/** fragment type. 8 bits */
uint8_t type;
/** fragment flags. 8 bits */
uint8_t flags;
};
typedef struct ompi_osc_pt2pt_header_base_t ompi_osc_pt2pt_header_base_t;
struct ompi_osc_pt2pt_header_put_t {
ompi_osc_pt2pt_header_base_t base;
uint16_t tag;
uint32_t count;
uint64_t len;
uint64_t displacement;
};
typedef struct ompi_osc_pt2pt_header_put_t ompi_osc_pt2pt_header_put_t;
struct ompi_osc_pt2pt_header_acc_t {
ompi_osc_pt2pt_header_base_t base;
uint16_t tag;
uint32_t count;
uint64_t len;
uint64_t displacement;
uint32_t op;
};
typedef struct ompi_osc_pt2pt_header_acc_t ompi_osc_pt2pt_header_acc_t;
struct ompi_osc_pt2pt_header_get_t {
ompi_osc_pt2pt_header_base_t base;
uint16_t tag;
uint32_t count;
uint64_t len;
uint64_t displacement;
};
typedef struct ompi_osc_pt2pt_header_get_t ompi_osc_pt2pt_header_get_t;
struct ompi_osc_pt2pt_header_complete_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[2];
#endif
int frag_count;
};
typedef struct ompi_osc_pt2pt_header_complete_t ompi_osc_pt2pt_header_complete_t;
struct ompi_osc_pt2pt_header_cswap_t {
ompi_osc_pt2pt_header_base_t base;
uint16_t tag;
uint32_t len;
uint64_t displacement;
};
typedef struct ompi_osc_pt2pt_header_cswap_t ompi_osc_pt2pt_header_cswap_t;
struct ompi_osc_pt2pt_header_post_t {
ompi_osc_pt2pt_header_base_t base;
};
typedef struct ompi_osc_pt2pt_header_post_t ompi_osc_pt2pt_header_post_t;
struct ompi_osc_pt2pt_header_lock_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[2];
#endif
int32_t lock_type;
uint64_t lock_ptr;
};
typedef struct ompi_osc_pt2pt_header_lock_t ompi_osc_pt2pt_header_lock_t;
struct ompi_osc_pt2pt_header_lock_ack_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[2];
#endif
uint32_t source;
uint64_t lock_ptr;
};
typedef struct ompi_osc_pt2pt_header_lock_ack_t ompi_osc_pt2pt_header_lock_ack_t;
struct ompi_osc_pt2pt_header_unlock_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[2];
#endif
int32_t lock_type;
uint64_t lock_ptr;
uint32_t frag_count;
};
typedef struct ompi_osc_pt2pt_header_unlock_t ompi_osc_pt2pt_header_unlock_t;
struct ompi_osc_pt2pt_header_unlock_ack_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[6];
#endif
uint64_t lock_ptr;
};
typedef struct ompi_osc_pt2pt_header_unlock_ack_t ompi_osc_pt2pt_header_unlock_ack_t;
struct ompi_osc_pt2pt_header_flush_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[2];
#endif
uint32_t frag_count;
uint64_t lock_ptr;
};
typedef struct ompi_osc_pt2pt_header_flush_t ompi_osc_pt2pt_header_flush_t;
struct ompi_osc_pt2pt_header_flush_ack_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[6];
#endif
uint64_t lock_ptr;
};
typedef struct ompi_osc_pt2pt_header_flush_ack_t ompi_osc_pt2pt_header_flush_ack_t;
struct ompi_osc_pt2pt_frag_header_t {
ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT || OPAL_ENABLE_DEBUG
uint8_t padding[2];
#endif
uint32_t source; /* rank in window of source process */
opal_atomic_int32_t num_ops; /* number of operations in this buffer */
uint32_t pad; /* ensure the fragment header is a multiple of 8 bytes */
};
typedef struct ompi_osc_pt2pt_frag_header_t ompi_osc_pt2pt_frag_header_t;
union ompi_osc_pt2pt_header_t {
ompi_osc_pt2pt_header_base_t base;
ompi_osc_pt2pt_header_put_t put;
ompi_osc_pt2pt_header_acc_t acc;
ompi_osc_pt2pt_header_get_t get;
ompi_osc_pt2pt_header_complete_t complete;
ompi_osc_pt2pt_header_cswap_t cswap;
ompi_osc_pt2pt_header_post_t post;
ompi_osc_pt2pt_header_lock_t lock;
ompi_osc_pt2pt_header_lock_ack_t lock_ack;
ompi_osc_pt2pt_header_unlock_t unlock;
ompi_osc_pt2pt_header_unlock_ack_t unlock_ack;
ompi_osc_pt2pt_header_flush_t flush;
ompi_osc_pt2pt_header_flush_ack_t flush_ack;
ompi_osc_pt2pt_frag_header_t frag;
};
typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
#define MCA_OSC_PT2PT_FRAG_HDR_NTOH(h) \
(h).source = ntohl((h).source); \
(h).num_ops = ntohl((h).num_ops); \
(h).pad = ntohl((h).pad);
#define MCA_OSC_PT2PT_FRAG_HDR_HTON(h) \
(h).source = htonl((h).source); \
(h).num_ops = htonl((h).num_ops); \
(h).pad = htonl((h).pad);
#define MCA_OSC_PT2PT_PUT_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).count = ntohl((h).count); \
(h).len = ntoh64((h).len); \
(h).displacement = ntoh64((h).displacement);
#define MCA_OSC_PT2PT_PUT_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).count = htonl((h).count); \
(h).len = hton64((h).len); \
(h).displacement = hton64((h).displacement);
#define MCA_OSC_PT2PT_GET_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).count = ntohl((h).count); \
(h).len = ntoh64((h).len); \
(h).displacement = ntoh64((h).displacement);
#define MCA_OSC_PT2PT_GET_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).count = htonl((h).count); \
(h).len = hton64((h).len); \
(h).displacement = hton64((h).displacement);
#define MCA_OSC_PT2PT_ACC_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).count = ntohl((h).count); \
(h).len = ntoh64((h).len); \
(h).displacement = ntoh64((h).displacement);\
(h).op = ntohl((h).op);
#define MCA_OSC_PT2PT_ACC_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).count = htonl((h).count); \
(h).len = hton64((h).len); \
(h).displacement = hton64((h).displacement);\
(h).op = htonl((h).op);
#define MCA_OSC_PT2PT_LOCK_HDR_NTOH(h) \
(h).lock_type = ntohl((h).lock_type)
#define MCA_OSC_PT2PT_LOCK_HDR_HTON(h) \
(h).lock_type = htonl((h).lock_type)
#define MCA_OSC_PT2PT_UNLOCK_HDR_NTOH(h) \
(h).lock_type = ntohl((h).lock_type); \
(h).frag_count = ntohl((h).frag_count)
#define MCA_OSC_PT2PT_UNLOCK_HDR_HTON(h) \
(h).lock_type = htonl((h).lock_type); \
(h).frag_count = htonl((h).frag_count)
#define MCA_OSC_PT2PT_LOCK_ACK_HDR_NTOH(h) \
(h).source = ntohl((h).source)
#define MCA_OSC_PT2PT_LOCK_ACK_HDR_HTON(h) \
(h).source= htonl((h).source)
#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(h)
#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(h)
#define MCA_OSC_PT2PT_COMPLETE_HDR_NTOH(h) \
(h).frag_count = ntohl((h).frag_count)
#define MCA_OSC_PT2PT_COMPLETE_HDR_HTON(h) \
(h).frag_count = htonl((h).frag_count)
#define MCA_OSC_PT2PT_FLUSH_HDR_NTOH(h) \
(h).frag_count = ntohl((h).frag_count)
#define MCA_OSC_PT2PT_FLUSH_HDR_HTON(h) \
(h).frag_count = htonl((h).frag_count)
#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(h)
#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(h)
#define MCA_OSC_PT2PT_POST_HDR_NTOH(h)
#define MCA_OSC_PT2PT_POST_HDR_HTON(h)
#define MCA_OSC_PT2PT_CSWAP_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).len = ntohl((h).len); \
(h).displacement = ntoh64((h).displacement)
#define MCA_OSC_PT2PT_CSWAP_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).len = htonl((h).len); \
(h).displacement = hton64((h).displacement)
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
static inline __opal_attribute_always_inline__ void
osc_pt2pt_ntoh(ompi_osc_pt2pt_header_t *hdr)
{
if(!(hdr->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO))
return;
switch(hdr->base.type) {
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
case OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG:
MCA_OSC_PT2PT_PUT_HDR_NTOH(hdr->put);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG:
MCA_OSC_PT2PT_ACC_HDR_NTOH(hdr->acc);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_GET:
MCA_OSC_PT2PT_GET_HDR_NTOH(hdr->get);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP:
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP_LONG:
MCA_OSC_PT2PT_CSWAP_HDR_NTOH(hdr->cswap);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE:
MCA_OSC_PT2PT_COMPLETE_HDR_NTOH(hdr->complete);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
MCA_OSC_PT2PT_POST_HDR_NTOH(hdr->post);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ:
MCA_OSC_PT2PT_LOCK_HDR_NTOH(hdr->lock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK:
MCA_OSC_PT2PT_LOCK_ACK_HDR_NTOH(hdr->lock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ:
MCA_OSC_PT2PT_UNLOCK_HDR_NTOH(hdr->unlock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK:
MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(hdr->unlock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ:
MCA_OSC_PT2PT_FLUSH_HDR_NTOH(hdr->flush);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK:
MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(hdr->flush_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
MCA_OSC_PT2PT_FRAG_HDR_NTOH(hdr->frag);
break;
default:
assert(0);
break;
}
}
#else
#define osc_pt2pt_ntoh(h) \
do { } while (0)
#endif /* !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
#define osc_pt2pt_hton(h, p) \
osc_pt2pt_hton_intr((ompi_osc_pt2pt_header_t *)(h), (p));
static inline __opal_attribute_always_inline__ void
osc_pt2pt_hton_intr(ompi_osc_pt2pt_header_t *hdr, const ompi_proc_t *proc)
{
#ifdef WORDS_BIGENDIAN
hdr->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
#else
if(!(proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN))
return;
hdr->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
switch(hdr->base.type) {
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
case OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG:
MCA_OSC_PT2PT_PUT_HDR_HTON(hdr->put);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG:
MCA_OSC_PT2PT_ACC_HDR_HTON(hdr->acc);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_GET:
MCA_OSC_PT2PT_GET_HDR_HTON(hdr->get);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP:
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP_LONG:
MCA_OSC_PT2PT_CSWAP_HDR_HTON(hdr->cswap);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE:
MCA_OSC_PT2PT_COMPLETE_HDR_HTON(hdr->complete);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
MCA_OSC_PT2PT_POST_HDR_HTON(hdr->post);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ:
MCA_OSC_PT2PT_LOCK_HDR_HTON(hdr->lock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK:
MCA_OSC_PT2PT_LOCK_ACK_HDR_HTON(hdr->lock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ:
MCA_OSC_PT2PT_UNLOCK_HDR_HTON(hdr->unlock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK:
MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(hdr->unlock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ:
MCA_OSC_PT2PT_FLUSH_HDR_HTON(hdr->flush);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK:
MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(hdr->flush_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
MCA_OSC_PT2PT_FRAG_HDR_HTON(hdr->frag);
break;
default:
assert(0);
break;
}
#endif /* WORDS_BIGENDIAN */
}
#define OSC_PT2PT_HTON(h, m, r) \
osc_pt2pt_hton_intr((ompi_osc_pt2pt_header_t *)(h), ompi_comm_peer_lookup((m)->comm, (r)));
#else
#define osc_pt2pt_hton(h, p) \
do { } while (0)
#define OSC_PT2PT_HTON(h, m, r) \
do { } while (0)
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
#endif /* OMPI_MCA_OSC_PT2PT_HDR_H */

Просмотреть файл

@ -1,118 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "osc_pt2pt.h"
int ompi_osc_pt2pt_attach(struct ompi_win_t *win, void *base, size_t len)
{
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_detach(struct ompi_win_t *win, const void *base)
{
return OMPI_SUCCESS;
}
int ompi_osc_pt2pt_free(ompi_win_t *win)
{
int ret = OMPI_SUCCESS;
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_peer_t *peer;
uint32_t key;
void *node;
if (NULL == module) {
return OMPI_SUCCESS;
}
if (NULL != module->comm) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"pt2pt component destroying window with id %d",
ompi_comm_get_cid(module->comm));
/* finish with a barrier */
if (ompi_group_size(win->w_group) > 1) {
(void) module->comm->c_coll->coll_barrier (module->comm,
module->comm->c_coll->coll_barrier_module);
}
/* remove from component information */
OPAL_THREAD_SCOPED_LOCK(&mca_osc_pt2pt_component.lock,
opal_hash_table_remove_value_uint32(&mca_osc_pt2pt_component.modules,
ompi_comm_get_cid(module->comm)));
}
win->w_osc_module = NULL;
OBJ_DESTRUCT(&module->outstanding_locks);
OBJ_DESTRUCT(&module->locks_pending);
OBJ_DESTRUCT(&module->locks_pending_lock);
OBJ_DESTRUCT(&module->cond);
OBJ_DESTRUCT(&module->lock);
OBJ_DESTRUCT(&module->all_sync);
/* it is erroneous to close a window with active operations on it so we should
* probably produce an error here instead of cleaning up */
OPAL_LIST_DESTRUCT(&module->pending_acc);
OBJ_DESTRUCT(&module->pending_acc_lock);
osc_pt2pt_gc_clean (module);
OPAL_LIST_DESTRUCT(&module->buffer_gc);
OBJ_DESTRUCT(&module->gc_lock);
ret = opal_hash_table_get_first_key_uint32 (&module->peer_hash, &key, (void **) &peer, &node);
while (OPAL_SUCCESS == ret) {
OBJ_RELEASE(peer);
ret = opal_hash_table_get_next_key_uint32 (&module->peer_hash, &key, (void **) &peer, node,
&node);
}
OBJ_DESTRUCT(&module->peer_hash);
OBJ_DESTRUCT(&module->peer_lock);
if (NULL != module->recv_frags) {
for (unsigned int i = 0 ; i < module->recv_frag_count ; ++i) {
OBJ_DESTRUCT(module->recv_frags + i);
}
free (module->recv_frags);
}
free ((void *) module->epoch_outgoing_frag_count);
if (NULL != module->comm) {
ompi_comm_free(&module->comm);
}
free ((void *) module->free_after);
free (module);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,952 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010-2016 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_pt2pt.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_data_move.h"
#include "osc_pt2pt_frag.h"
#include "mpi.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/threads/mutex.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/osc/base/base.h"
#include "opal/include/opal_stdint.h"
static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, int source, int lock_type,
uint64_t lock_ptr);
/* target-side tracking of a lock request */
struct ompi_osc_pt2pt_pending_lock_t {
opal_list_item_t super;
int peer;
int lock_type;
uint64_t lock_ptr;
};
typedef struct ompi_osc_pt2pt_pending_lock_t ompi_osc_pt2pt_pending_lock_t;
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_pending_lock_t, opal_list_item_t,
NULL, NULL);
static int ompi_osc_pt2pt_activate_next_lock (ompi_osc_pt2pt_module_t *module);
static inline int queue_lock (ompi_osc_pt2pt_module_t *module, int requestor, int lock_type, uint64_t lock_ptr);
static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock,
int target);
static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock)
{
const int my_rank = ompi_comm_rank (module->comm);
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank);
int lock_type = lock->sync.lock.type;
bool acquired = false;
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock);
if (!acquired) {
/* queue the lock */
queue_lock (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock);
/* If locking local, can't be non-blocking according to the
standard. We need to wait for the ack here. */
ompi_osc_pt2pt_sync_wait_expected (lock);
}
ompi_osc_pt2pt_peer_set_locked (peer, true);
ompi_osc_pt2pt_peer_set_eager_active (peer, true);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"local lock aquired"));
return OMPI_SUCCESS;
}
static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock)
{
const int my_rank = ompi_comm_rank (module->comm);
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank);
int lock_type = lock->sync.lock.type;
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status));
if (MPI_LOCK_EXCLUSIVE == lock_type) {
OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
ompi_osc_pt2pt_activate_next_lock (module);
} else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
ompi_osc_pt2pt_activate_next_lock (module);
}
/* need to ensure we make progress */
opal_progress();
ompi_osc_pt2pt_peer_set_locked (peer, false);
ompi_osc_pt2pt_peer_set_eager_active (peer, false);
ompi_osc_pt2pt_sync_expected (lock);
}
int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
int lock_type = lock->sync.lock.type;
ompi_osc_pt2pt_header_lock_t lock_req;
int ret;
OPAL_THREAD_LOCK(&peer->lock);
if (ompi_osc_pt2pt_peer_locked (peer)) {
OPAL_THREAD_UNLOCK(&peer->lock);
return OMPI_SUCCESS;
}
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
/* generate a lock request */
lock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ;
lock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
lock_req.padding[0] = 0;
lock_req.padding[1] = 0;
#endif
lock_req.lock_type = lock_type;
lock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
OSC_PT2PT_HTON(&lock_req, module, target);
ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, -1);
} else {
ompi_osc_pt2pt_peer_set_locked (peer, true);
}
OPAL_THREAD_UNLOCK(&peer->lock);
return ret;
}
static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock)
{
int32_t frag_count = opal_atomic_swap_32 ((opal_atomic_int32_t *) module->epoch_outgoing_frag_count + target, -1);
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
int lock_type = lock->sync.lock.type;
ompi_osc_pt2pt_header_unlock_t unlock_req;
int ret;
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
unlock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ;
unlock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
unlock_req.padding[0] = 0;
unlock_req.padding[1] = 0;
#endif
unlock_req.frag_count = frag_count;
unlock_req.lock_type = lock_type;
unlock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
OSC_PT2PT_HTON(&unlock_req, module, target);
if (peer->active_frag) {
ompi_osc_pt2pt_frag_t *active_frag = (ompi_osc_pt2pt_frag_t *) peer->active_frag;
if (active_frag->remain_len < sizeof (unlock_req)) {
/* the peer should expect one more packet */
++unlock_req.frag_count;
--module->epoch_outgoing_frag_count[target];
}
}
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: unlocking target %d, frag count: %d", target,
unlock_req.frag_count));
/* send control message with unlock request and count */
ret = ompi_osc_pt2pt_control_send (module, target, &unlock_req, sizeof (unlock_req));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
ompi_osc_pt2pt_peer_set_locked (peer, false);
ompi_osc_pt2pt_peer_set_eager_active (peer, false);
return ompi_osc_pt2pt_frag_flush_target(module, target);
}
static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
ompi_osc_pt2pt_header_flush_t flush_req;
int32_t frag_count = opal_atomic_swap_32 ((opal_atomic_int32_t *) module->epoch_outgoing_frag_count + target, -1);
int ret;
(void) OPAL_THREAD_ADD_FETCH32(&lock->sync_expected, 1);
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
flush_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ;
flush_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
flush_req.frag_count = frag_count;
flush_req.lock_ptr = (uint64_t) (uintptr_t) lock;
/* XXX -- TODO -- since fragment are always delivered in order we do not need to count anything but long
* requests. once that is done this can be removed. */
if (peer->active_frag) {
ompi_osc_pt2pt_frag_t *active_frag = (ompi_osc_pt2pt_frag_t *) peer->active_frag;
if (active_frag->remain_len < sizeof (flush_req)) {
/* the peer should expect one more packet */
++flush_req.frag_count;
--module->epoch_outgoing_frag_count[target];
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "flushing to target %d, frag_count: %d",
target, flush_req.frag_count));
/* send control message with unlock request and count */
OSC_PT2PT_HTON(&flush_req, module, target);
ret = ompi_osc_pt2pt_control_send (module, target, &flush_req, sizeof (flush_req));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
/* start all sendreqs to target */
return ompi_osc_pt2pt_frag_flush_target (module, target);
}
static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock)
{
int my_rank = ompi_comm_rank (module->comm);
int target = lock->sync.lock.target;
int assert = lock->sync.lock.assert;
int ret;
assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK);
if (0 == (assert & MPI_MODE_NOCHECK)) {
if (my_rank != target && target != -1) {
ret = ompi_osc_pt2pt_lock_remote (module, target, lock);
} else {
ret = ompi_osc_pt2pt_lock_self (module, lock);
}
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
/* return */
return ret;
}
/* for lock_all there is nothing more to do. we will lock peer's on demand */
} else {
lock->eager_send_active = true;
}
return OMPI_SUCCESS;
}
static int ompi_osc_pt2pt_lock_internal (int lock_type, int target, int assert, ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_sync_t *lock;
int ret = OMPI_SUCCESS;
/* Check if no_locks is set. TODO: we also need to track whether we are in an
* active target epoch. Fence can make this tricky to track. */
if (-1 == target) {
if (module->all_sync.epoch_active) {
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "osc/pt2pt: attempted "
"to lock all when active target epoch is %s and lock all epoch is %s. type %d",
(OMPI_OSC_PT2PT_SYNC_TYPE_LOCK != module->all_sync.type && module->all_sync.epoch_active) ?
"active" : "inactive",
(OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type) ? "active" : "inactive",
module->all_sync.type));
return OMPI_ERR_RMA_SYNC;
}
} else {
if (module->all_sync.epoch_active && (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK != module->all_sync.type || MPI_LOCK_EXCLUSIVE == lock_type)) {
/* impossible to get an exclusive lock while holding a global shared lock or in a active
* target access epoch */
return OMPI_ERR_RMA_SYNC;
}
}
/* Check if no_locks is set. TODO: we also need to track whether we are in an
* active target epoch. Fence can make this tricky to track. */
if (module->all_sync.epoch_active || (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type &&
(MPI_LOCK_EXCLUSIVE == lock_type || -1 == target))) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc pt2pt: attempted "
"to acquire a lock on %d with type %d when active sync is %s and lock "
"all epoch is %s", target, lock_type, module->all_sync.epoch_active ? "active" : "inactive",
(OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type &&
(MPI_LOCK_EXCLUSIVE == lock_type || -1 == target)) ? "active" : "inactive"));
return OMPI_ERR_RMA_SYNC;
}
if (OMPI_OSC_PT2PT_SYNC_TYPE_FENCE == module->all_sync.type) {
/* if not communication has occurred during a fence epoch then we can enter a lock epoch
* just need to clear the all access epoch */
module->all_sync.type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
}
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: lock %d %d", target, lock_type));
/* create lock item */
if (-1 != target) {
lock = ompi_osc_pt2pt_sync_allocate (module);
if (OPAL_UNLIKELY(NULL == lock)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
lock->peer_list.peer = ompi_osc_pt2pt_peer_lookup (module, target);
} else {
lock = &module->all_sync;
}
lock->type = OMPI_OSC_PT2PT_SYNC_TYPE_LOCK;
lock->sync.lock.target = target;
lock->sync.lock.type = lock_type;
lock->sync.lock.assert = assert;
lock->num_peers = (-1 == target) ? ompi_comm_size (module->comm) : 1;
lock->sync_expected = 0;
/* delay all eager sends until we've heard back.. */
OPAL_THREAD_LOCK(&module->lock);
/* check for conflicting lock */
if (ompi_osc_pt2pt_module_lock_find (module, target, NULL)) {
if (&module->all_sync != lock) {
ompi_osc_pt2pt_sync_return (lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_ERR_RMA_CONFLICT;
}
++module->passive_target_access_epoch;
ompi_osc_pt2pt_module_lock_insert (module, lock);
OPAL_THREAD_UNLOCK(&module->lock);
ret = ompi_osc_pt2pt_lock_internal_execute (module, lock);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_THREAD_SCOPED_LOCK(&module->lock, ompi_osc_pt2pt_module_lock_remove (module, lock));
if (&module->all_sync != lock) {
ompi_osc_pt2pt_sync_return (lock);
}
}
return ret;
}
static int ompi_osc_pt2pt_unlock_internal (int target, ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_sync_t *lock = NULL;
int my_rank = ompi_comm_rank (module->comm);
int ret = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_unlock_internal: unlocking target %d", target));
OPAL_THREAD_LOCK(&module->lock);
lock = ompi_osc_pt2pt_module_lock_find (module, target, NULL);
if (OPAL_UNLIKELY(NULL == lock)) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_unlock: target %d is not locked in window %s",
target, win->w_name));
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_ERR_RMA_SYNC;
}
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_unlock_internal: lock acks still expected: %d",
lock->sync_expected));
OPAL_THREAD_UNLOCK(&module->lock);
/* wait until ack has arrived from target */
ompi_osc_pt2pt_sync_wait_expected (lock);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_unlock_internal: all lock acks received"));
if (!(lock->sync.lock.assert & MPI_MODE_NOCHECK)) {
if (my_rank != target) {
if (-1 == target) {
/* send unlock messages to all of my peers */
for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) {
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, i);
if (my_rank == i || !ompi_osc_pt2pt_peer_locked (peer)) {
continue;
}
ret = ompi_osc_pt2pt_unlock_remote (module, i, lock);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
ompi_osc_pt2pt_unlock_self (module, lock);
} else {
ret = ompi_osc_pt2pt_unlock_remote (module, target, lock);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
/* wait for unlock acks. this signals remote completion of fragments */
ompi_osc_pt2pt_sync_wait_expected (lock);
/* It is possible for the unlock to finish too early before the data
* is actually present in the recv buffer (for non-contiguous datatypes)
* So make sure to wait for all of the fragments to arrive.
*/
OPAL_THREAD_LOCK(&module->lock);
while (module->outgoing_frag_count < 0) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_unlock: unlock of %d complete", target));
} else {
ompi_osc_pt2pt_unlock_self (module, lock);
}
} else {
/* flush instead */
ompi_osc_pt2pt_flush_lock (module, lock, target);
}
OPAL_THREAD_LOCK(&module->lock);
ompi_osc_pt2pt_module_lock_remove (module, lock);
if (-1 != lock->sync.lock.target) {
ompi_osc_pt2pt_sync_return (lock);
} else {
ompi_osc_pt2pt_sync_reset (lock);
}
--module->passive_target_access_epoch;
OPAL_THREAD_UNLOCK(&module->lock);
return ret;
}
int ompi_osc_pt2pt_lock(int lock_type, int target, int assert, ompi_win_t *win)
{
assert(target >= 0);
return ompi_osc_pt2pt_lock_internal (lock_type, target, assert, win);
}
int ompi_osc_pt2pt_unlock (int target, struct ompi_win_t *win)
{
return ompi_osc_pt2pt_unlock_internal (target, win);
}
int ompi_osc_pt2pt_lock_all(int assert, struct ompi_win_t *win)
{
return ompi_osc_pt2pt_lock_internal (MPI_LOCK_SHARED, -1, assert, win);
}
int ompi_osc_pt2pt_unlock_all (struct ompi_win_t *win)
{
return ompi_osc_pt2pt_unlock_internal (-1, win);
}
int ompi_osc_pt2pt_sync (struct ompi_win_t *win)
{
opal_progress();
return OMPI_SUCCESS;
}
static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock,
int target)
{
int ret;
int my_rank = ompi_comm_rank (module->comm);
/* wait until ack has arrived from target, since we need to be
able to eager send before we can transfer all the data... */
ompi_osc_pt2pt_sync_wait_expected (lock);
if (-1 == target) {
/* NTH: no local flush */
for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) {
if (i == my_rank) {
continue;
}
ret = ompi_osc_pt2pt_flush_remote (module, i, lock);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
} else {
/* send control message with flush request and count */
ret = ompi_osc_pt2pt_flush_remote (module, target, lock);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
}
/* wait for all flush acks (meaning remote completion) */
ompi_osc_pt2pt_sync_wait_expected (lock);
opal_condition_broadcast (&module->cond);
return OMPI_SUCCESS;
}
int ompi_osc_pt2pt_flush (int target, struct ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_sync_t *lock;
int ret;
assert (0 <= target);
/* flush is only allowed from within a passive target epoch */
if (!module->passive_target_access_epoch) {
return OMPI_ERR_RMA_SYNC;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_flush starting..."));
if (ompi_comm_rank (module->comm) == target) {
/* nothing to flush */
opal_progress ();
return OMPI_SUCCESS;
}
OPAL_THREAD_LOCK(&module->lock);
lock = ompi_osc_pt2pt_module_lock_find (module, target, NULL);
if (NULL == lock) {
if (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type) {
lock = &module->all_sync;
}
}
OPAL_THREAD_UNLOCK(&module->lock);
if (OPAL_UNLIKELY(NULL == lock)) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_flush: target %d is not locked in window %s",
target, win->w_name));
ret = OMPI_ERR_RMA_SYNC;
} else {
ret = ompi_osc_pt2pt_flush_lock (module, lock, target);
}
return ret;
}
int ompi_osc_pt2pt_flush_all (struct ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_sync_t *lock;
int target, ret;
void *node;
/* flush is only allowed from within a passive target epoch */
if (OPAL_UNLIKELY(!module->passive_target_access_epoch)) {
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_flush_all: no targets are locked in window %s",
win->w_name));
return OMPI_ERR_RMA_SYNC;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_flush_all entering..."));
/* flush all locks */
ret = opal_hash_table_get_first_key_uint32 (&module->outstanding_locks, (uint32_t *) &target,
(void **) &lock, &node);
if (OPAL_SUCCESS == ret) {
do {
ret = ompi_osc_pt2pt_flush_lock (module, lock, lock->sync.lock.target);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
break;
}
ret = opal_hash_table_get_next_key_uint32 (&module->outstanding_locks, (uint32_t *) &target,
(void **) lock, node, &node);
if (OPAL_SUCCESS != ret) {
ret = OPAL_SUCCESS;
break;
}
} while (1);
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_flush_all complete"));
return ret;
}
int ompi_osc_pt2pt_flush_local (int target, struct ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
int ret;
/* flush is only allowed from within a passive target epoch */
if (!module->passive_target_access_epoch) {
return OMPI_ERR_RMA_SYNC;
}
ret = ompi_osc_pt2pt_frag_flush_target(module, target);
if (OMPI_SUCCESS != ret) {
return ret;
}
/* wait for all the requests */
OPAL_THREAD_LOCK(&module->lock);
while (module->outgoing_frag_count < 0) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
/* make some progress */
opal_progress ();
return OMPI_SUCCESS;
}
int ompi_osc_pt2pt_flush_local_all (struct ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
int ret = OMPI_SUCCESS;
/* flush is only allowed from within a passive target epoch */
if (!module->passive_target_access_epoch) {
return OMPI_ERR_RMA_SYNC;
}
ret = ompi_osc_pt2pt_frag_flush_all(module);
if (OMPI_SUCCESS != ret) {
return ret;
}
/* wait for all the requests */
OPAL_THREAD_LOCK(&module->lock);
while (module->outgoing_frag_count < 0) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
/* make some progress */
opal_progress ();
return OMPI_SUCCESS;
}
/* target side operation to acknowledge to initiator side that the
lock is now held by the initiator */
static inline int activate_lock (ompi_osc_pt2pt_module_t *module, int requestor,
uint64_t lock_ptr)
{
ompi_osc_pt2pt_sync_t *lock;
if (ompi_comm_rank (module->comm) != requestor) {
ompi_osc_pt2pt_header_lock_ack_t lock_ack;
lock_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK;
lock_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
lock_ack.source = ompi_comm_rank(module->comm);
lock_ack.lock_ptr = lock_ptr;
OSC_PT2PT_HTON(&lock_ack, module, requestor);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: sending lock to %d", requestor));
/* we don't want to send any data, since we're the exposure
epoch only, so use an unbuffered send */
return ompi_osc_pt2pt_control_send_unbuffered (module, requestor, &lock_ack, sizeof (lock_ack));
}
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: releasing local lock"));
lock = (ompi_osc_pt2pt_sync_t *) (uintptr_t) lock_ptr;
if (OPAL_UNLIKELY(NULL == lock)) {
OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output,
"lock could not be located"));
}
ompi_osc_pt2pt_sync_expected (lock);
return OMPI_SUCCESS;
}
/* target side operation to create a pending lock request for a lock
request that could not be satisfied */
static inline int queue_lock (ompi_osc_pt2pt_module_t *module, int requestor,
int lock_type, uint64_t lock_ptr)
{
ompi_osc_pt2pt_pending_lock_t *pending =
OBJ_NEW(ompi_osc_pt2pt_pending_lock_t);
if (NULL == pending) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
pending->peer = requestor;
pending->lock_type = lock_type;
pending->lock_ptr = lock_ptr;
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: queueing lock request from %d", requestor));
OPAL_THREAD_SCOPED_LOCK(&module->locks_pending_lock, opal_list_append(&module->locks_pending, &pending->super));
return OMPI_SUCCESS;
}
static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, int source, int lock_type, uint64_t lock_ptr)
{
bool queue = false;
if (MPI_LOCK_SHARED == lock_type) {
int32_t lock_status = module->lock_status;
do {
if (lock_status < 0) {
queue = true;
break;
}
if (opal_atomic_compare_exchange_strong_32 (&module->lock_status, &lock_status, lock_status + 1)) {
break;
}
} while (1);
} else {
int32_t _tmp_value = 0;
queue = !opal_atomic_compare_exchange_strong_32 (&module->lock_status, &_tmp_value, -1);
}
if (queue) {
return false;
}
activate_lock(module, source, lock_ptr);
/* activated the lock */
return true;
}
static int ompi_osc_pt2pt_activate_next_lock (ompi_osc_pt2pt_module_t *module) {
/* release any other pending locks we can */
ompi_osc_pt2pt_pending_lock_t *pending_lock, *next;
int ret = OMPI_SUCCESS;
OPAL_THREAD_LOCK(&module->locks_pending_lock);
OPAL_LIST_FOREACH_SAFE(pending_lock, next, &module->locks_pending,
ompi_osc_pt2pt_pending_lock_t) {
bool acquired = ompi_osc_pt2pt_lock_try_acquire (module, pending_lock->peer, pending_lock->lock_type,
pending_lock->lock_ptr);
if (!acquired) {
break;
}
opal_list_remove_item (&module->locks_pending, &pending_lock->super);
OBJ_RELEASE(pending_lock);
}
OPAL_THREAD_UNLOCK(&module->locks_pending_lock);
return ret;
}
/* target side function called when the initiator sends a lock
request. Lock will either be activated and acknowledged or
queued. */
int ompi_osc_pt2pt_process_lock (ompi_osc_pt2pt_module_t* module, int source,
ompi_osc_pt2pt_header_lock_t* lock_header)
{
bool acquired;
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_process_lock: processing lock request from %d. current lock state = %d",
source, module->lock_status));
acquired = ompi_osc_pt2pt_lock_try_acquire (module, source, lock_header->lock_type, lock_header->lock_ptr);
if (!acquired) {
queue_lock(module, source, lock_header->lock_type, lock_header->lock_ptr);
}
return OMPI_SUCCESS;
}
/* initiator-side function called when the target acks the lock
request. */
void ompi_osc_pt2pt_process_lock_ack (ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_header_lock_ack_t *lock_ack_header)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, lock_ack_header->source);
ompi_osc_pt2pt_sync_t *lock;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_process_lock_ack: processing lock ack from %d for lock %" PRIu64,
lock_ack_header->source, lock_ack_header->lock_ptr));
lock = (ompi_osc_pt2pt_sync_t *) (uintptr_t) lock_ack_header->lock_ptr;
assert (NULL != lock);
ompi_osc_pt2pt_peer_set_eager_active (peer, true);
ompi_osc_pt2pt_frag_flush_pending (module, peer->rank);
ompi_osc_pt2pt_sync_expected (lock);
}
void ompi_osc_pt2pt_process_flush_ack (ompi_osc_pt2pt_module_t *module, int source,
ompi_osc_pt2pt_header_flush_ack_t *flush_ack_header) {
ompi_osc_pt2pt_sync_t *lock;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_process_flush_ack: processing flush ack from %d for lock 0x%" PRIx64,
source, flush_ack_header->lock_ptr));
lock = (ompi_osc_pt2pt_sync_t *) (uintptr_t) flush_ack_header->lock_ptr;
assert (NULL != lock);
ompi_osc_pt2pt_sync_expected (lock);
}
void ompi_osc_pt2pt_process_unlock_ack (ompi_osc_pt2pt_module_t *module, int source,
ompi_osc_pt2pt_header_unlock_ack_t *unlock_ack_header)
{
ompi_osc_pt2pt_sync_t *lock;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_process_unlock_ack: processing unlock ack from %d",
source));
/* NTH: need to verify that this will work as expected */
lock = (ompi_osc_pt2pt_sync_t *) (intptr_t) unlock_ack_header->lock_ptr;
assert (NULL != lock);
ompi_osc_pt2pt_sync_expected (lock);
}
/**
* Process an unlock request.
*
* @param[in] module - OSC PT2PT module
* @param[in] source - Source rank
* @param[in] unlock_header - Incoming unlock header
*
* This functions is the target-side function for handling an unlock
* request. Once all pending operations from the target are complete
* this functions sends an unlock acknowledgement then attempts to
* active a pending lock if the lock becomes free.
*/
int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source,
ompi_osc_pt2pt_header_unlock_t *unlock_header)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source);
ompi_osc_pt2pt_header_unlock_ack_t unlock_ack;
int ret;
assert (NULL != peer);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_process_unlock entering (passive_incoming_frag_count: %d)...",
peer->passive_incoming_frag_count));
/* we cannot block when processing an incoming request */
if (0 != peer->passive_incoming_frag_count) {
return OMPI_ERR_WOULD_BLOCK;
}
unlock_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK;
unlock_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
unlock_ack.padding[0] = 0;
unlock_ack.padding[1] = 0;
unlock_ack.padding[2] = 0;
unlock_ack.padding[3] = 0;
unlock_ack.padding[4] = 0;
unlock_ack.padding[5] = 0;
#endif
unlock_ack.lock_ptr = unlock_header->lock_ptr;
OSC_PT2PT_HTON(&unlock_ack, module, source);
ret = ompi_osc_pt2pt_control_send_unbuffered (module, source, &unlock_ack, sizeof (unlock_ack));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
if (-1 == module->lock_status) {
OPAL_THREAD_ADD_FETCH32(&module->lock_status, 1);
ompi_osc_pt2pt_activate_next_lock (module);
} else if (0 == OPAL_THREAD_ADD_FETCH32(&module->lock_status, -1)) {
ompi_osc_pt2pt_activate_next_lock (module);
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc pt2pt: finished processing unlock fragment"));
return ret;
}
int ompi_osc_pt2pt_process_flush (ompi_osc_pt2pt_module_t *module, int source,
ompi_osc_pt2pt_header_flush_t *flush_header)
{
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source);
ompi_osc_pt2pt_header_flush_ack_t flush_ack;
assert (NULL != peer);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_process_flush entering (passive_incoming_frag_count: %d)...",
peer->passive_incoming_frag_count));
/* we cannot block when processing an incoming request */
if (0 != peer->passive_incoming_frag_count) {
return OMPI_ERR_WOULD_BLOCK;
}
flush_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK;
flush_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
flush_ack.lock_ptr = flush_header->lock_ptr;
OSC_PT2PT_HTON(&flush_ack, module, source);
return ompi_osc_pt2pt_control_send_unbuffered (module, source, &flush_ack, sizeof (flush_ack));
}

Просмотреть файл

@ -1,68 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* Pending frags are fragments that have been received on the target,
* but can not yet be processed (because ordering is turned on).
* Because receive memory descriptors are precious resources, rather
* than keeping a descriptor until the right sequence number, we
* instead malloc a buffer (as part of the pending frag) and copy the
* message.
*/
#ifndef OSC_PT2PT_PENDING_FRAG_H
#define OSC_PT2PT_PENDING_FRAG_H
/** Incoming fragment that has to be queued */
struct ompi_osc_pt2pt_pending_frag_t {
opal_list_item_t super;
/* This is a pointer to the top of the fragment (which is always
the header). Save as a header to make the casting a bit less
onerous during sequence number lookups. */
ompi_osc_pt2pt_frag_header_t *header;
};
typedef struct ompi_osc_pt2pt_pending_frag_t ompi_osc_pt2pt_pending_frag_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_pending_frag_t);
/*
* Note: module lock must be held during this operation
*/
static inline ompi_osc_pt2pt_pending_frag_t*
ompi_osc_pt2pt_pending_frag_create(ompi_osc_pt2pt_module_t *module,
void *ptr,
size_t size)
{
size_t total_size = sizeof(ompi_osc_pt2pt_pending_frag_t) + size;
ompi_osc_pt2pt_pending_frag_t *ret =
(ompi_osc_pt2pt_pending_frag_t*) malloc(total_size);
if (NULL == ret) return NULL;
OBJ_CONSTRUCT(&ret, ompi_osc_pt2pt_pending_frag_t);
memcpy(ret->header, ptr, size);
return ret;
}
/*
* Note: module lock must be held for this operation
*/
static inline int
ompi_osc_pt2pt_pending_frag_destroy(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_pending_frag_t* frag)
{
OBJ_DESTRUCT(&frag);
free(frag);
return OMPI_SUCCESS;
}
#endif

Просмотреть файл

@ -1,63 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/request/request.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "osc_pt2pt.h"
#include "osc_pt2pt_request.h"
static int
request_cancel(struct ompi_request_t *request, int complete)
{
return MPI_ERR_REQUEST;
}
static int
request_free(struct ompi_request_t **ompi_req)
{
ompi_osc_pt2pt_request_t *request =
(ompi_osc_pt2pt_request_t*) *ompi_req;
if (REQUEST_COMPLETED != request->super.req_complete) {
return MPI_ERR_REQUEST;
}
OMPI_OSC_PT2PT_REQUEST_RETURN(request);
*ompi_req = MPI_REQUEST_NULL;
return OMPI_SUCCESS;
}
static
void
request_construct(ompi_osc_pt2pt_request_t *request)
{
request->super.req_type = OMPI_REQUEST_WIN;
request->super.req_status._cancelled = 0;
request->super.req_free = request_free;
request->super.req_cancel = request_cancel;
request->outstanding_requests = 0;
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_request_t,
ompi_request_t,
request_construct,
NULL);

Просмотреть файл

@ -1,77 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_PT2PT_REQUEST_H
#define OMPI_OSC_PT2PT_REQUEST_H
#include "osc_pt2pt.h"
#include "ompi/request/request.h"
#include "opal/util/output.h"
struct ompi_osc_pt2pt_request_t {
ompi_request_t super;
int type;
const void *origin_addr;
int origin_count;
struct ompi_datatype_t *origin_dt;
ompi_osc_pt2pt_module_t* module;
opal_atomic_int32_t outstanding_requests;
bool internal;
};
typedef struct ompi_osc_pt2pt_request_t ompi_osc_pt2pt_request_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_request_t);
/* REQUEST_ALLOC is only called from "top-level" functions (pt2pt_rput,
pt2pt_rget, etc.), so it's ok to spin here... */
#define OMPI_OSC_PT2PT_REQUEST_ALLOC(win, req) \
do { \
opal_free_list_item_t *item; \
do { \
item = opal_free_list_get (&mca_osc_pt2pt_component.requests); \
if (NULL == item) { \
opal_progress(); \
} \
} while (NULL == item); \
req = (ompi_osc_pt2pt_request_t*) item; \
OMPI_REQUEST_INIT(&req->super, false); \
req->super.req_mpi_object.win = win; \
req->super.req_complete = false; \
req->super.req_state = OMPI_REQUEST_ACTIVE; \
req->module = GET_MODULE(win); \
req->internal = false; \
} while (0)
#define OMPI_OSC_PT2PT_REQUEST_RETURN(req) \
do { \
OMPI_REQUEST_FINI(&(req)->super); \
(req)->outstanding_requests = 0; \
opal_free_list_return (&mca_osc_pt2pt_component.requests, \
(opal_free_list_item_t *) (req)); \
} while (0)
static inline void ompi_osc_pt2pt_request_complete (ompi_osc_pt2pt_request_t *request, int mpi_error)
{
if (!request->internal) {
request->super.req_status.MPI_ERROR = mpi_error;
/* mark the request complete at the mpi level */
ompi_request_complete (&request->super, true);
} else {
OMPI_OSC_PT2PT_REQUEST_RETURN (request);
}
}
#endif /* OMPI_OSC_PT2PT_REQUEST_H */

Просмотреть файл

@ -1,93 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "osc_pt2pt.h"
#include "osc_pt2pt_sync.h"
static void ompi_osc_pt2pt_sync_constructor (ompi_osc_pt2pt_sync_t *sync)
{
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
sync->eager_send_active = false;
sync->epoch_active = false;
OBJ_CONSTRUCT(&sync->lock, opal_mutex_t);
OBJ_CONSTRUCT(&sync->cond, opal_condition_t);
}
static void ompi_osc_pt2pt_sync_destructor (ompi_osc_pt2pt_sync_t *sync)
{
OBJ_DESTRUCT(&sync->lock);
OBJ_DESTRUCT(&sync->cond);
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_sync_t, opal_free_list_item_t,
ompi_osc_pt2pt_sync_constructor,
ompi_osc_pt2pt_sync_destructor);
ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_sync_allocate (struct ompi_osc_pt2pt_module_t *module)
{
ompi_osc_pt2pt_sync_t *sync;
/* module is not used yet */
(void) module;
sync = OBJ_NEW (ompi_osc_pt2pt_sync_t);
if (OPAL_UNLIKELY(NULL == sync)) {
return NULL;
}
sync->module = module;
return sync;
}
void ompi_osc_pt2pt_sync_return (ompi_osc_pt2pt_sync_t *sync)
{
OBJ_RELEASE(sync);
}
static inline bool ompi_osc_pt2pt_sync_array_peer (int rank, ompi_osc_pt2pt_peer_t **peers, size_t nranks,
struct ompi_osc_pt2pt_peer_t **peer)
{
int mid = nranks / 2;
/* base cases */
if (0 == nranks || (1 == nranks && peers[0]->rank != rank)) {
if (peer) {
*peer = NULL;
}
return false;
} else if (peers[0]->rank == rank) {
if (peer) {
*peer = peers[0];
}
return true;
}
if (peers[mid]->rank > rank) {
return ompi_osc_pt2pt_sync_array_peer (rank, peers, mid, peer);
}
return ompi_osc_pt2pt_sync_array_peer (rank, peers + mid, nranks - mid, peer);
}
bool ompi_osc_pt2pt_sync_pscw_peer (ompi_osc_pt2pt_module_t *module, int target, struct ompi_osc_pt2pt_peer_t **peer)
{
ompi_osc_pt2pt_sync_t *pt2pt_sync = &module->all_sync;
/* check synchronization type */
if (OMPI_OSC_PT2PT_SYNC_TYPE_PSCW != pt2pt_sync->type) {
if (peer) {
*peer = NULL;
}
return false;
}
return ompi_osc_pt2pt_sync_array_peer (target, pt2pt_sync->peer_list.peers, pt2pt_sync->num_peers, peer);
}

Просмотреть файл

@ -1,189 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_PT2PT_SYNC_H
#define OMPI_OSC_PT2PT_SYNC_H
#include "ompi_config.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/threads/threads.h"
enum ompi_osc_pt2pt_sync_type_t {
/** default value */
OMPI_OSC_PT2PT_SYNC_TYPE_NONE,
/** lock access epoch */
OMPI_OSC_PT2PT_SYNC_TYPE_LOCK,
/** fence access epoch */
OMPI_OSC_PT2PT_SYNC_TYPE_FENCE,
/* post-start-complete-wait access epoch */
OMPI_OSC_PT2PT_SYNC_TYPE_PSCW,
};
typedef enum ompi_osc_pt2pt_sync_type_t ompi_osc_pt2pt_sync_type_t;
struct ompi_osc_pt2pt_module_t;
struct ompi_osc_pt2pt_peer_t;
/**
* @brief synchronization object
*
* This structure holds information about an access epoch.
*/
struct ompi_osc_pt2pt_sync_t {
opal_free_list_item_t super;
struct ompi_osc_pt2pt_module_t *module;
/** synchronization type */
ompi_osc_pt2pt_sync_type_t type;
/** synchronization data */
union {
/** lock specific synchronization data */
struct {
/** lock target rank (-1 for all) */
int target;
/** lock type: MPI_LOCK_SHARED, MPI_LOCK_EXCLUSIVE */
int type;
/** assert specified at lock acquire time */
int assert;
} lock;
/** post/start/complete/wait specific synchronization data */
struct {
/** group passed to ompi_osc_pt2pt_start */
ompi_group_t *group;
} pscw;
} sync;
/** array of peers for this sync */
union {
/** multiple peers (lock all, pscw, fence) */
struct ompi_osc_pt2pt_peer_t **peers;
/** single peer (targeted lock) */
struct ompi_osc_pt2pt_peer_t *peer;
} peer_list;
/** number of peers */
int num_peers;
/** number of synchronization messages expected */
opal_atomic_int32_t sync_expected;
/** eager sends are active to all peers in this access epoch */
volatile bool eager_send_active;
/** communication has started on this epoch */
bool epoch_active;
/** lock to protect sync structure members */
opal_mutex_t lock;
/** condition variable for changes in the sync object */
opal_condition_t cond;
};
typedef struct ompi_osc_pt2pt_sync_t ompi_osc_pt2pt_sync_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_sync_t);
/**
* @brief allocate a new synchronization object
*
* @param[in] module osc pt2pt module
*
* @returns NULL on failure
* @returns a new synchronization object on success
*/
ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_sync_allocate (struct ompi_osc_pt2pt_module_t *module);
/**
* @brief release a synchronization object
*
* @param[in] pt2pt_sync synchronization object allocated by ompi_osc_pt2pt_sync_allocate()
*/
void ompi_osc_pt2pt_sync_return (ompi_osc_pt2pt_sync_t *pt2pt_sync);
/**
* Check if the target is part of a PSCW access epoch
*
* @param[in] module osc pt2pt module
* @param[in] target target rank
* @param[out] peer peer object
*
* @returns false if the window is not in a PSCW access epoch or the peer is not
* in the group passed to MPI_Win_start
* @returns true otherwise
*
* This functions verifies the target is part of an active PSCW access epoch.
*/
bool ompi_osc_pt2pt_sync_pscw_peer (struct ompi_osc_pt2pt_module_t *module, int target, struct ompi_osc_pt2pt_peer_t **peer);
/**
* Wait for all remote peers in the synchronization to respond
*/
static inline void ompi_osc_pt2pt_sync_wait_nolock (ompi_osc_pt2pt_sync_t *sync)
{
while (!sync->eager_send_active) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"waiting for access epoch to start"));
opal_condition_wait(&sync->cond, &sync->lock);
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"access epoch ready"));
}
static inline void ompi_osc_pt2pt_sync_wait (ompi_osc_pt2pt_sync_t *sync)
{
OPAL_THREAD_LOCK(&sync->lock);
ompi_osc_pt2pt_sync_wait_nolock (sync);
OPAL_THREAD_UNLOCK(&sync->lock);
}
/**
* Wait for all remote peers in the synchronization to respond
*/
static inline void ompi_osc_pt2pt_sync_wait_expected (ompi_osc_pt2pt_sync_t *sync)
{
OPAL_THREAD_LOCK(&sync->lock);
while (sync->sync_expected) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"waiting for %d syncronization messages",
sync->sync_expected));
opal_condition_wait(&sync->cond, &sync->lock);
}
OPAL_THREAD_UNLOCK(&sync->lock);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"all synchronization messages received"));
}
static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync)
{
int32_t new_value = OPAL_THREAD_ADD_FETCH32 (&sync->sync_expected, -1);
if (0 == new_value) {
OPAL_THREAD_LOCK(&sync->lock);
if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) {
sync->eager_send_active = true;
}
opal_condition_broadcast (&sync->cond);
OPAL_THREAD_UNLOCK(&sync->lock);
}
}
static inline void ompi_osc_pt2pt_sync_reset (ompi_osc_pt2pt_sync_t *sync)
{
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
sync->eager_send_active = false;
sync->epoch_active = 0;
sync->peer_list.peers = NULL;
sync->sync.pscw.group = NULL;
}
#endif /* OMPI_OSC_PT2PT_SYNC_H */

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: LANL
status: active

Просмотреть файл

@ -257,7 +257,7 @@ static int ompi_osc_rdma_component_register (void)
ompi_osc_rdma_mtl_names = "psm2";
opal_asprintf(&description_str, "Comma-delimited list of MTL component names to lower the priority of rdma "
"osc component favoring pt2pt osc (default: %s)", ompi_osc_rdma_mtl_names);
"osc component (default: %s)", ompi_osc_rdma_mtl_names);
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "mtls", description_str,
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names);
@ -374,7 +374,7 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
#endif /* OPAL_CUDA_SUPPORT */
if (OMPI_SUCCESS == ompi_osc_rdma_query_mtls ()) {
return 5; /* this has to be lower that osc pt2pt default priority */
return 5;
}
if (OMPI_SUCCESS != ompi_osc_rdma_query_btls (comm, NULL)) {