1
1
openmpi/ompi/mca/osc/rdma/osc_rdma.h

327 строки
11 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_RDMA_H
#define OMPI_OSC_RDMA_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/class/opal_hash_table.h"
#include "opal/threads/threads.h"
#include "ompi/win/win.h"
#include "ompi/communicator/communicator.h"
#include "ompi/request/request.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/bml/bml.h"
BEGIN_C_DECLS
struct ompi_osc_rdma_buffer_t {
mca_btl_base_descriptor_t* descriptor;
size_t remain_len;
mca_bml_base_btl_t *bml_btl;
};
typedef struct ompi_osc_rdma_buffer_t ompi_osc_rdma_buffer_t;
struct ompi_osc_rdma_component_t {
/** Extend the basic osc component interface */
ompi_osc_base_component_t super;
/** lock access to datastructures in the component structure */
opal_mutex_t c_lock;
/** List of ompi_osc_rdma_module_ts currently in existance.
Needed so that received fragments can be dispatched to the
correct module */
opal_hash_table_t c_modules;
/** Lock for request management */
opal_mutex_t c_request_lock;
/** Condition variable for request management */
opal_condition_t c_request_cond;
/** free list of ompi_osc_rdma_sendreq_t structures */
opal_free_list_t c_sendreqs;
/** free list of ompi_osc_rdma_replyreq_t structures */
opal_free_list_t c_replyreqs;
/** free list of ompi_osc_rdma_longreq_t structures */
opal_free_list_t c_longreqs;
bool c_btl_registered;
uint32_t c_sequence_number;
};
typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
struct ompi_osc_rdma_btl_t {
uint8_t peer_seg[MCA_BTL_SEG_MAX_SIZE];
mca_bml_base_btl_t *bml_btl;
int rdma_order;
int32_t num_sent;
};
typedef struct ompi_osc_rdma_btl_t ompi_osc_rdma_btl_t;
struct ompi_osc_rdma_peer_info_t {
uint64_t peer_base;
uint64_t peer_len;
int peer_num_btls;
volatile int peer_index_btls;
ompi_osc_rdma_btl_t *peer_btls;
int local_num_btls;
mca_bml_base_btl_t **local_btls;
mca_mpool_base_registration_t **local_registrations;
mca_btl_base_descriptor_t **local_descriptors;
};
typedef struct ompi_osc_rdma_peer_info_t ompi_osc_rdma_peer_info_t;
struct ompi_osc_rdma_setup_info_t {
volatile int32_t num_btls_callin;
int32_t num_btls_expected;
volatile int32_t num_btls_outgoing;
opal_list_t *outstanding_btl_requests;
};
typedef struct ompi_osc_rdma_setup_info_t ompi_osc_rdma_setup_info_t;
struct ompi_osc_rdma_module_t {
/** Extend the basic osc module interface */
ompi_osc_base_module_t super;
uint32_t m_sequence_number;
/** lock access to data structures in the current module */
opal_mutex_t m_lock;
/** condition variable for access to current module */
opal_condition_t m_cond;
/** lock for "atomic" window updates from reductions */
opal_mutex_t m_acc_lock;
/** pointer back to window */
ompi_win_t *m_win;
/** communicator created with this window */
ompi_communicator_t *m_comm;
/** list of ompi_osc_rdma_sendreq_t structures, and includes all
requests for this access epoch that have not already been
started. m_lock must be held when modifying this field. */
opal_list_t m_pending_sendreqs;
/** list of unsigned int counters for the number of requests to a
particular rank in m_comm for this access epoc. m_lock
must be held when modifying this field */
unsigned int *m_num_pending_sendreqs;
/** For MPI_Fence synchronization, the number of messages to send
in epoch. For Start/Complete, the number of updates for this
Complete. For lock, the number of
messages waiting for completion on on the origin side. Not
protected by m_lock - must use atomic counter operations. */
int32_t m_num_pending_out;
/** For MPI_Fence synchronization, the number of expected incoming
messages. For Post/Wait, the number of expected updates from
complete. For lock, the number of messages on the passive side
we are waiting for. Not protected by m_lock - must use
atomic counter operations. */
int32_t m_num_pending_in;
/** Number of "ping" messages from the remote post group we've
received */
int32_t m_num_post_msgs;
/** Number of "count" messages from the remote complete group
we've received */
int32_t m_num_complete_msgs;
/** cyclic counter for a unique tage for long messages. Not
protected by the m_lock - must use create_send_tag() to
create a send tag */
volatile int32_t m_tag_counter;
opal_list_t m_copy_pending_sendreqs;
unsigned int *m_copy_num_pending_sendreqs;
opal_list_t m_queued_sendreqs;
/** start sending data eagerly */
bool m_eager_send_active;
bool m_eager_send_ok;
/* RDMA data */
bool m_use_rdma;
bool m_rdma_wait_completion;
ompi_osc_rdma_setup_info_t *m_setup_info;
ompi_osc_rdma_peer_info_t *m_peer_info;
int32_t m_rdma_num_pending;
/*** buffering ***/
bool m_use_buffers;
ompi_osc_rdma_buffer_t *m_pending_buffers;
/* ********************* FENCE data ************************ */
/* an array of <sizeof(m_comm)> ints, each containing the value
1. */
int *m_fence_coll_counts;
/* ********************* PWSC data ************************ */
struct ompi_group_t *m_pw_group;
struct ompi_group_t *m_sc_group;
bool *m_sc_remote_active_ranks;
int *m_sc_remote_ranks;
/* ********************* LOCK data ************************ */
int32_t m_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */
int32_t m_shared_count;
opal_list_t m_locks_pending;
opal_list_t m_unlocks_pending;
int32_t m_lock_received_ack;
};
typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t;
OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component;
#define GET_MODULE(win) ((ompi_osc_rdma_module_t*) win->w_osc_module)
/*
* Component functions
*/
int ompi_osc_rdma_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
int ompi_osc_rdma_component_finalize(void);
int ompi_osc_rdma_component_query(struct ompi_win_t *win,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
int ompi_osc_rdma_component_select(struct ompi_win_t *win,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
/* helper function that properly sets up request handling */
int ompi_osc_rdma_component_irecv(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *data);
int ompi_osc_rdma_component_isend(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int dest,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *data);
int ompi_osc_rdma_peer_info_free(ompi_osc_rdma_peer_info_t *peer_info);
/*
* Module interface function types
*/
int ompi_osc_rdma_module_free(struct ompi_win_t *win);
int ompi_osc_rdma_module_put(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_rdma_module_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_rdma_module_get(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_rdma_module_fence(int assert, struct ompi_win_t *win);
int ompi_osc_rdma_module_start(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_rdma_module_complete(struct ompi_win_t *win);
int ompi_osc_rdma_module_post(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_rdma_module_wait(struct ompi_win_t *win);
int ompi_osc_rdma_module_test(struct ompi_win_t *win,
int *flag);
int ompi_osc_rdma_module_lock(int lock_type,
int target,
int assert,
struct ompi_win_t *win);
int ompi_osc_rdma_module_unlock(int target,
struct ompi_win_t *win);
/*
* passive side sync interface functions
*/
int ompi_osc_rdma_passive_lock(ompi_osc_rdma_module_t *module,
int32_t origin,
int32_t lock_type);
int ompi_osc_rdma_passive_unlock(ompi_osc_rdma_module_t *module,
int32_t origin,
int32_t count);
int ompi_osc_rdma_passive_unlock_complete(ompi_osc_rdma_module_t *module);
END_C_DECLS
#endif /* OMPI_OSC_RDMA_H */