/* * Copyright (c) 2004-2005 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. * All rights reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef OMPI_OSC_RDMA_H #define OMPI_OSC_RDMA_H #include "opal/class/opal_list.h" #include "opal/class/opal_free_list.h" #include "opal/class/opal_hash_table.h" #include "ompi/win/win.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/osc/osc.h" #include "ompi/mca/btl/btl.h" BEGIN_C_DECLS struct ompi_osc_rdma_component_t { /** Extend the basic osc component interface */ ompi_osc_base_component_t super; /** store the state of progress threads for this instance of OMPI */ bool c_have_progress_threads; /** lock access to datastructures in the component structure */ opal_mutex_t c_lock; /** List of ompi_osc_rdma_module_ts currently in existance. Needed so that received fragments can be dispatched to the correct module */ opal_hash_table_t c_modules; /** Lock for request management */ opal_mutex_t c_request_lock; /** Condition variable for request management */ opal_condition_t c_request_cond; /** free list of ompi_osc_rdma_sendreq_t structures */ opal_free_list_t c_sendreqs; /** free list of ompi_osc_rdma_replyreq_t structures */ opal_free_list_t c_replyreqs; /** free list of ompi_osc_rdma_longreq_t structures */ opal_free_list_t c_longreqs; /** list of outstanding requests, of type ompi_osc_pt2pt_longreq_t */ opal_list_t c_pending_requests; #if OMPI_ENABLE_PROGRESS_THREADS opal_thread_t c_thread; bool c_thread_run; #endif bool c_btl_registered; }; typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t; struct ompi_osc_rdma_module_t { /** Extend the basic osc module interface */ ompi_osc_base_module_t super; /** lock access to data structures in the current module */ opal_mutex_t m_lock; /** condition variable for access to current module */ opal_condition_t m_cond; /** lock for "atomic" window updates from reductions */ opal_mutex_t m_acc_lock; /** pointer back to window */ ompi_win_t *m_win; /** communicator created with this window */ ompi_communicator_t *m_comm; /** list of ompi_osc_rdma_sendreq_t structures, and includes all requests for this access epoch that have not already been started. m_lock must be held when modifying this field. */ opal_list_t m_pending_sendreqs; /** list of unsigned int counters for the number of requests to a particular rank in m_comm for this access epoc. m_lock must be held when modifying this field */ unsigned int *m_num_pending_sendreqs; /** For MPI_Fence synchronization, the number of messages to send in epoch. For Start/Complete, the number of updates for this Complete. For lock, the number of messages waiting for completion on on the origin side. Not protected by m_lock - must use atomic counter operations. */ int32_t m_num_pending_out; /** For MPI_Fence synchronization, the number of expected incoming messages. For Post/Wait, the number of expected updates from complete. For lock, the number of messages on the passive side we are waiting for. Not protected by m_lock - must use atomic counter operations. */ int32_t m_num_pending_in; /** Number of "ping" messages from the remote post group we've received */ int32_t m_num_post_msgs; /** Number of "count" messages from the remote complete group we've received */ int32_t m_num_complete_msgs; /** cyclic counter for a unique tage for long messages. Not protected by the m_lock - must use create_send_tag() to create a send tag */ volatile int32_t m_tag_counter; opal_list_t m_copy_pending_sendreqs; unsigned int *m_copy_num_pending_sendreqs; opal_list_t m_queued_sendreqs; /** start sending data eagerly */ bool m_eager_send_active; bool m_eager_send_ok; /* ********************* FENCE data ************************ */ /* an array of ints, each containing the value 1. */ int *m_fence_coll_counts; /* ********************* PWSC data ************************ */ struct ompi_group_t *m_pw_group; struct ompi_group_t *m_sc_group; bool *m_sc_remote_active_ranks; int *m_sc_remote_ranks; /* ********************* LOCK data ************************ */ int32_t m_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */ int32_t m_shared_count; opal_list_t m_locks_pending; opal_list_t m_unlocks_pending; int32_t m_lock_received_ack; }; typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t; OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component; #define GET_MODULE(win) ((ompi_osc_rdma_module_t*) win->w_osc_module) /* * Component functions */ int ompi_osc_rdma_component_init(bool enable_progress_threads, bool enable_mpi_threads); int ompi_osc_rdma_component_finalize(void); int ompi_osc_rdma_component_query(struct ompi_win_t *win, struct ompi_info_t *info, struct ompi_communicator_t *comm); int ompi_osc_rdma_component_select(struct ompi_win_t *win, struct ompi_info_t *info, struct ompi_communicator_t *comm); int ompi_osc_rdma_component_progress(void); /* * Module interface function types */ int ompi_osc_rdma_module_free(struct ompi_win_t *win); int ompi_osc_rdma_module_put(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, int target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); int ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, int target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win); int ompi_osc_rdma_module_get(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, int target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); int ompi_osc_rdma_module_fence(int assert, struct ompi_win_t *win); int ompi_osc_rdma_module_start(struct ompi_group_t *group, int assert, struct ompi_win_t *win); int ompi_osc_rdma_module_complete(struct ompi_win_t *win); int ompi_osc_rdma_module_post(struct ompi_group_t *group, int assert, struct ompi_win_t *win); int ompi_osc_rdma_module_wait(struct ompi_win_t *win); int ompi_osc_rdma_module_test(struct ompi_win_t *win, int *flag); int ompi_osc_rdma_module_lock(int lock_type, int target, int assert, struct ompi_win_t *win); int ompi_osc_rdma_module_unlock(int target, struct ompi_win_t *win); /* * passive side sync interface functions */ int ompi_osc_rdma_passive_lock(ompi_osc_rdma_module_t *module, int32_t origin, int32_t lock_type); int ompi_osc_rdma_passive_unlock(ompi_osc_rdma_module_t *module, int32_t origin, int32_t count); int ompi_osc_rdma_passive_unlock_complete(ompi_osc_rdma_module_t *module); END_C_DECLS #endif /* OMPI_OSC_RDMA_H */