2006-07-18 02:08:55 +04:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
|
|
* All rights reserved.
|
2006-08-24 20:38:08 +04:00
|
|
|
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
|
2006-07-18 02:08:55 +04:00
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2007-05-24 19:41:24 +04:00
|
|
|
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
2006-07-18 02:08:55 +04:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
#ifndef OMPI_OSC_RDMA_H
|
|
|
|
#define OMPI_OSC_RDMA_H
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2009-03-04 18:35:54 +03:00
|
|
|
#include "ompi_config.h"
|
2006-07-18 02:08:55 +04:00
|
|
|
#include "opal/class/opal_list.h"
|
|
|
|
#include "opal/class/opal_free_list.h"
|
|
|
|
#include "opal/class/opal_hash_table.h"
|
|
|
|
|
|
|
|
#include "ompi/win/win.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
2007-05-24 19:41:24 +04:00
|
|
|
#include "ompi/mca/osc/osc.h"
|
|
|
|
#include "ompi/mca/btl/btl.h"
|
2007-07-03 02:22:59 +04:00
|
|
|
#include "ompi/mca/bml/bml.h"
|
2007-05-24 19:41:24 +04:00
|
|
|
|
|
|
|
BEGIN_C_DECLS
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2007-07-06 01:40:06 +04:00
|
|
|
struct ompi_osc_rdma_buffer_t {
|
|
|
|
mca_btl_base_descriptor_t* descriptor;
|
|
|
|
size_t remain_len;
|
|
|
|
mca_bml_base_btl_t *bml_btl;
|
|
|
|
};
|
|
|
|
typedef struct ompi_osc_rdma_buffer_t ompi_osc_rdma_buffer_t;
|
|
|
|
|
2006-07-18 02:08:55 +04:00
|
|
|
struct ompi_osc_rdma_component_t {
|
|
|
|
/** Extend the basic osc component interface */
|
|
|
|
ompi_osc_base_component_t super;
|
|
|
|
|
|
|
|
/** store the state of progress threads for this instance of OMPI */
|
2007-05-24 19:41:24 +04:00
|
|
|
bool c_have_progress_threads;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** lock access to datastructures in the component structure */
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_mutex_t c_lock;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** List of ompi_osc_rdma_module_ts currently in existance.
|
|
|
|
Needed so that received fragments can be dispatched to the
|
|
|
|
correct module */
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_hash_table_t c_modules;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2007-05-24 19:41:24 +04:00
|
|
|
/** Lock for request management */
|
|
|
|
opal_mutex_t c_request_lock;
|
|
|
|
|
|
|
|
/** Condition variable for request management */
|
|
|
|
opal_condition_t c_request_cond;
|
|
|
|
|
2006-07-18 02:08:55 +04:00
|
|
|
/** free list of ompi_osc_rdma_sendreq_t structures */
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_free_list_t c_sendreqs;
|
2006-07-18 02:08:55 +04:00
|
|
|
/** free list of ompi_osc_rdma_replyreq_t structures */
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_free_list_t c_replyreqs;
|
2006-07-18 02:08:55 +04:00
|
|
|
/** free list of ompi_osc_rdma_longreq_t structures */
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_free_list_t c_longreqs;
|
|
|
|
|
|
|
|
/** list of outstanding requests, of type ompi_osc_pt2pt_longreq_t */
|
|
|
|
opal_list_t c_pending_requests;
|
|
|
|
|
|
|
|
#if OMPI_ENABLE_PROGRESS_THREADS
|
|
|
|
opal_thread_t c_thread;
|
|
|
|
bool c_thread_run;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
bool c_btl_registered;
|
2007-07-03 02:22:59 +04:00
|
|
|
|
|
|
|
uint32_t c_sequence_number;
|
2006-07-18 02:08:55 +04:00
|
|
|
};
|
|
|
|
typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
|
|
|
|
|
|
|
|
|
2007-07-03 02:22:59 +04:00
|
|
|
struct ompi_osc_rdma_btl_t {
|
|
|
|
uint64_t peer_seg_key;
|
|
|
|
mca_bml_base_btl_t *bml_btl;
|
|
|
|
int rdma_order;
|
2007-07-05 07:32:32 +04:00
|
|
|
int32_t num_sent;
|
2007-07-03 02:22:59 +04:00
|
|
|
};
|
|
|
|
typedef struct ompi_osc_rdma_btl_t ompi_osc_rdma_btl_t;
|
|
|
|
|
|
|
|
|
|
|
|
struct ompi_osc_rdma_peer_info_t {
|
|
|
|
uint64_t peer_base;
|
|
|
|
uint64_t peer_len;
|
|
|
|
|
|
|
|
int peer_num_btls;
|
|
|
|
volatile int peer_index_btls;
|
|
|
|
ompi_osc_rdma_btl_t *peer_btls;
|
|
|
|
|
|
|
|
int local_num_btls;
|
|
|
|
mca_bml_base_btl_t **local_btls;
|
|
|
|
mca_mpool_base_registration_t **local_registrations;
|
|
|
|
mca_btl_base_descriptor_t **local_descriptors;
|
|
|
|
};
|
|
|
|
typedef struct ompi_osc_rdma_peer_info_t ompi_osc_rdma_peer_info_t;
|
|
|
|
|
|
|
|
|
|
|
|
struct ompi_osc_rdma_setup_info_t {
|
|
|
|
volatile int32_t num_btls_callin;
|
|
|
|
int32_t num_btls_expected;
|
|
|
|
volatile int32_t num_btls_outgoing;
|
|
|
|
opal_list_t *outstanding_btl_requests;
|
|
|
|
};
|
|
|
|
typedef struct ompi_osc_rdma_setup_info_t ompi_osc_rdma_setup_info_t;
|
|
|
|
|
|
|
|
|
2006-07-18 02:08:55 +04:00
|
|
|
struct ompi_osc_rdma_module_t {
|
|
|
|
/** Extend the basic osc module interface */
|
|
|
|
ompi_osc_base_module_t super;
|
|
|
|
|
2007-07-03 02:22:59 +04:00
|
|
|
uint32_t m_sequence_number;
|
|
|
|
|
2006-07-18 02:08:55 +04:00
|
|
|
/** lock access to data structures in the current module */
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_mutex_t m_lock;
|
|
|
|
|
|
|
|
/** condition variable for access to current module */
|
|
|
|
opal_condition_t m_cond;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** lock for "atomic" window updates from reductions */
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_mutex_t m_acc_lock;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** pointer back to window */
|
2007-05-24 19:41:24 +04:00
|
|
|
ompi_win_t *m_win;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** communicator created with this window */
|
2007-05-24 19:41:24 +04:00
|
|
|
ompi_communicator_t *m_comm;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** list of ompi_osc_rdma_sendreq_t structures, and includes all
|
|
|
|
requests for this access epoch that have not already been
|
2007-05-24 19:41:24 +04:00
|
|
|
started. m_lock must be held when modifying this field. */
|
|
|
|
opal_list_t m_pending_sendreqs;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2006-11-28 00:41:29 +03:00
|
|
|
/** list of unsigned int counters for the number of requests to a
|
2007-05-24 19:41:24 +04:00
|
|
|
particular rank in m_comm for this access epoc. m_lock
|
2006-07-18 02:08:55 +04:00
|
|
|
must be held when modifying this field */
|
2007-05-24 19:41:24 +04:00
|
|
|
unsigned int *m_num_pending_sendreqs;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** For MPI_Fence synchronization, the number of messages to send
|
|
|
|
in epoch. For Start/Complete, the number of updates for this
|
2006-09-21 23:57:57 +04:00
|
|
|
Complete. For lock, the number of
|
2006-07-18 02:08:55 +04:00
|
|
|
messages waiting for completion on on the origin side. Not
|
2007-05-24 19:41:24 +04:00
|
|
|
protected by m_lock - must use atomic counter operations. */
|
2007-05-24 19:42:06 +04:00
|
|
|
int32_t m_num_pending_out;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/** For MPI_Fence synchronization, the number of expected incoming
|
|
|
|
messages. For Post/Wait, the number of expected updates from
|
|
|
|
complete. For lock, the number of messages on the passive side
|
2007-05-24 19:41:24 +04:00
|
|
|
we are waiting for. Not protected by m_lock - must use
|
2006-07-18 02:08:55 +04:00
|
|
|
atomic counter operations. */
|
2007-05-24 19:42:06 +04:00
|
|
|
int32_t m_num_pending_in;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2006-09-21 23:57:57 +04:00
|
|
|
/** Number of "ping" messages from the remote post group we've
|
|
|
|
received */
|
2007-05-24 19:42:06 +04:00
|
|
|
int32_t m_num_post_msgs;
|
2006-09-21 23:57:57 +04:00
|
|
|
|
|
|
|
/** Number of "count" messages from the remote complete group
|
|
|
|
we've received */
|
2007-05-24 19:42:06 +04:00
|
|
|
int32_t m_num_complete_msgs;
|
2006-09-21 23:57:57 +04:00
|
|
|
|
2006-07-18 02:08:55 +04:00
|
|
|
/** cyclic counter for a unique tage for long messages. Not
|
2007-05-24 19:41:24 +04:00
|
|
|
protected by the m_lock - must use create_send_tag() to
|
2006-07-18 02:08:55 +04:00
|
|
|
create a send tag */
|
2007-05-24 19:41:24 +04:00
|
|
|
volatile int32_t m_tag_counter;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2007-05-24 19:41:24 +04:00
|
|
|
opal_list_t m_copy_pending_sendreqs;
|
|
|
|
unsigned int *m_copy_num_pending_sendreqs;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2007-05-24 21:21:56 +04:00
|
|
|
opal_list_t m_queued_sendreqs;
|
|
|
|
|
2007-05-24 19:41:24 +04:00
|
|
|
/** start sending data eagerly */
|
2007-05-30 21:06:19 +04:00
|
|
|
bool m_eager_send_active;
|
|
|
|
bool m_eager_send_ok;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2007-07-03 02:22:59 +04:00
|
|
|
/* RDMA data */
|
|
|
|
bool m_use_rdma;
|
2007-07-05 20:50:05 +04:00
|
|
|
bool m_rdma_wait_completion;
|
2007-07-03 02:22:59 +04:00
|
|
|
ompi_osc_rdma_setup_info_t *m_setup_info;
|
|
|
|
ompi_osc_rdma_peer_info_t *m_peer_info;
|
2007-07-05 07:32:32 +04:00
|
|
|
int32_t m_rdma_num_pending;
|
2007-07-03 02:22:59 +04:00
|
|
|
|
2007-07-06 01:40:06 +04:00
|
|
|
/*** buffering ***/
|
|
|
|
bool m_use_buffers;
|
|
|
|
ompi_osc_rdma_buffer_t *m_pending_buffers;
|
|
|
|
|
2006-07-18 02:08:55 +04:00
|
|
|
/* ********************* FENCE data ************************ */
|
2007-05-24 19:41:24 +04:00
|
|
|
/* an array of <sizeof(m_comm)> ints, each containing the value
|
2006-07-18 02:08:55 +04:00
|
|
|
1. */
|
2007-05-24 19:41:24 +04:00
|
|
|
int *m_fence_coll_counts;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/* ********************* PWSC data ************************ */
|
2007-05-24 19:41:24 +04:00
|
|
|
struct ompi_group_t *m_pw_group;
|
|
|
|
struct ompi_group_t *m_sc_group;
|
|
|
|
bool *m_sc_remote_active_ranks;
|
|
|
|
int *m_sc_remote_ranks;
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/* ********************* LOCK data ************************ */
|
2007-05-24 19:41:24 +04:00
|
|
|
int32_t m_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */
|
|
|
|
int32_t m_shared_count;
|
|
|
|
opal_list_t m_locks_pending;
|
|
|
|
opal_list_t m_unlocks_pending;
|
|
|
|
int32_t m_lock_received_ack;
|
2006-07-18 02:08:55 +04:00
|
|
|
};
|
|
|
|
typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t;
|
2006-08-25 01:17:31 +04:00
|
|
|
OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component;
|
|
|
|
|
2007-07-03 02:22:59 +04:00
|
|
|
|
2007-05-24 19:41:24 +04:00
|
|
|
#define GET_MODULE(win) ((ompi_osc_rdma_module_t*) win->w_osc_module)
|
2006-07-18 02:08:55 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Component functions
|
|
|
|
*/
|
|
|
|
|
|
|
|
int ompi_osc_rdma_component_init(bool enable_progress_threads,
|
|
|
|
bool enable_mpi_threads);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_component_finalize(void);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_component_query(struct ompi_win_t *win,
|
|
|
|
struct ompi_info_t *info,
|
|
|
|
struct ompi_communicator_t *comm);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_component_select(struct ompi_win_t *win,
|
|
|
|
struct ompi_info_t *info,
|
|
|
|
struct ompi_communicator_t *comm);
|
|
|
|
|
2007-05-24 19:41:24 +04:00
|
|
|
int ompi_osc_rdma_component_progress(void);
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2007-07-03 02:22:59 +04:00
|
|
|
int ompi_osc_rdma_peer_info_free(ompi_osc_rdma_peer_info_t *peer_info);
|
|
|
|
|
2006-07-18 02:08:55 +04:00
|
|
|
/*
|
|
|
|
* Module interface function types
|
|
|
|
*/
|
|
|
|
int ompi_osc_rdma_module_free(struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_put(void *origin_addr,
|
|
|
|
int origin_count,
|
|
|
|
struct ompi_datatype_t *origin_dt,
|
|
|
|
int target,
|
|
|
|
int target_disp,
|
|
|
|
int target_count,
|
|
|
|
struct ompi_datatype_t *target_dt,
|
|
|
|
struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_accumulate(void *origin_addr,
|
|
|
|
int origin_count,
|
|
|
|
struct ompi_datatype_t *origin_dt,
|
|
|
|
int target,
|
|
|
|
int target_disp,
|
|
|
|
int target_count,
|
|
|
|
struct ompi_datatype_t *target_dt,
|
|
|
|
struct ompi_op_t *op,
|
|
|
|
struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_get(void *origin_addr,
|
|
|
|
int origin_count,
|
|
|
|
struct ompi_datatype_t *origin_dt,
|
|
|
|
int target,
|
|
|
|
int target_disp,
|
|
|
|
int target_count,
|
|
|
|
struct ompi_datatype_t *target_dt,
|
|
|
|
struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_fence(int assert, struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_start(struct ompi_group_t *group,
|
|
|
|
int assert,
|
|
|
|
struct ompi_win_t *win);
|
|
|
|
int ompi_osc_rdma_module_complete(struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_post(struct ompi_group_t *group,
|
|
|
|
int assert,
|
|
|
|
struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_wait(struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_test(struct ompi_win_t *win,
|
|
|
|
int *flag);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_lock(int lock_type,
|
|
|
|
int target,
|
|
|
|
int assert,
|
|
|
|
struct ompi_win_t *win);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_module_unlock(int target,
|
|
|
|
struct ompi_win_t *win);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* passive side sync interface functions
|
|
|
|
*/
|
|
|
|
int ompi_osc_rdma_passive_lock(ompi_osc_rdma_module_t *module,
|
|
|
|
int32_t origin,
|
|
|
|
int32_t lock_type);
|
|
|
|
|
|
|
|
int ompi_osc_rdma_passive_unlock(ompi_osc_rdma_module_t *module,
|
|
|
|
int32_t origin,
|
|
|
|
int32_t count);
|
|
|
|
|
2007-05-24 19:41:24 +04:00
|
|
|
int ompi_osc_rdma_passive_unlock_complete(ompi_osc_rdma_module_t *module);
|
|
|
|
|
2007-07-03 02:22:59 +04:00
|
|
|
|
2007-05-24 19:41:24 +04:00
|
|
|
END_C_DECLS
|
2006-07-18 02:08:55 +04:00
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
#endif /* OMPI_OSC_RDMA_H */
|