/* * Copyright (c) 2004-2005 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file */ #ifndef MCA_PTL_MX_H #define MCA_PTL_MX_H #include "ompi_config.h" #include #include "mca/pml/pml.h" #include "mca/ptl/ptl.h" #include "class/ompi_bitmap.h" #include "class/ompi_free_list.h" #include "class/ompi_proc_table.h" #define MCA_PTL_MX_STATISTICS 0 /** * Myricom MX PTL component. */ struct mca_ptl_mx_component_t { mca_ptl_base_component_1_0_0_t super; /**< base PTL component */ int mx_free_list_num; /**< initial size of free lists */ int mx_free_list_max; /**< maximum size of free lists */ int mx_free_list_inc; /**< number of elements to growing free lists by */ int mx_prepost; /**< number of preposted recvs */ int mx_debug; /**< debug level */ uint32_t mx_filter; /**< filter assigned to application */ uint32_t mx_num_ptls; /**< number of MX NICs available to app */ uint32_t mx_max_ptls; /**< max number of MX NICs to use */ struct mca_ptl_mx_module_t** mx_ptls; /**< array of available PTL moduless */ ompi_free_list_t mx_send_frags; /**< free list of mx send fragments */ ompi_free_list_t mx_recv_frags; /**< free list of mx recv fragments */ ompi_hash_table_t mx_procs; /**< hash table of procs */ ompi_list_t mx_pending_acks; /**< queue of pending sends */ ompi_mutex_t mx_lock; /**< lock for accessing module state */ }; typedef struct mca_ptl_mx_component_t mca_ptl_mx_component_t; struct mca_ptl_mx_recv_frag_t; struct mca_ptl_mx_send_frag_t; extern mca_ptl_mx_component_t mca_ptl_mx_component; /** * Register MX module parameters with the MCA framework */ extern int mca_ptl_mx_component_open(void); /** * Any final cleanup before being unloaded. */ extern int mca_ptl_mx_component_close(void); /** * MCA->PTL Intializes the PTL component and creates specific PTL * module(s). * * @param num_ptls (OUT) Returns the number of ptl instances created, or 0 * if the transport is not available. * * @param allow_multi_user_threads (OUT) Indicated wether this component can * run at MPI_THREAD_MULTIPLE or not. * * @param have_hidden_threads (OUT) Whether this component uses * hidden threads (e.g., progress threads) or not. * * @return Array of pointers to PTL modules, or NULL if the transport * is not available. * * During component initialization, the PTL component should discover * the physical devices that are available for the given transport, * and create a PTL instance to represent each device. Any addressing * information required by peers to reach the device should be published * during this function via the mca_base_modex_send() interface. * */ extern mca_ptl_base_module_t** mca_ptl_mx_component_init( int *num_ptls, bool *allow_multi_user_threads, bool *have_hidden_threads ); /** * MCA->PTL Called to dynamically change a component parameter. * * @param flag (IN) Parameter to change. * @param value (IN) Optional parameter value. * * @return OMPI_SUCCESS or error code on failure. * * The only supported parameter is currently MCA_PTL_ENABLE, * which can be used by the PML to enable/disable forwarding * by the PTL. */ extern int mca_ptl_mx_component_control( int param, void* value, size_t size ); /** * MCA->PTL Called to progress outstanding requests for * non-threaded polling environments. * * @param tstamp Current time. * @return OMPI_SUCCESS or error code on failure. */ extern int mca_ptl_mx_component_progress( mca_ptl_tstamp_t tstamp ); /** * Myricom MX PTL module. */ struct mca_ptl_mx_module_t { mca_ptl_base_module_t super; /**< base PTL module interface */ ompi_list_t mx_peers; /**< list of peers */ uint64_t mx_nic_addr; /**< NIC MAC address */ uint32_t mx_filter; /**< endpoint filter */ uint32_t mx_endpoint_id; /**< endpoint ID */ bool mx_enabled; /**< flag to indicate if endpoint enabled */ mx_endpoint_t mx_endpoint; /**< endpoint */ mx_endpoint_addr_t mx_endpoint_addr; /**< endpoint address */ volatile int32_t mx_recvs_posted; /**< count of posted match fragments */ #if OMPI_ENABLE_PROGRESS_THREADS ompi_thread_t mx_thread; /**< thread for progressing outstanding requests */ #endif }; typedef struct mca_ptl_mx_module_t mca_ptl_mx_module_t; extern mca_ptl_mx_module_t mca_ptl_mx_module; /** * Create/initialize the MX PTL modules. * @return OMPI_SUCCESS or error status on failure. */ extern int mca_ptl_mx_module_init(void); /** * Cleanup any resources held by the PTL. * * @param ptl PTL instance. * @return OMPI_SUCCESS or error status on failure. */ extern int mca_ptl_mx_finalize( struct mca_ptl_base_module_t* ptl ); /** * PML->PTL notification of change in the process list. * * @param ptl (IN) PTL instance * @param nprocs (IN) Number of processes * @param procs (IN) Set of processes * @param peer (OUT) Set of (optional) mca_ptl_base_peer_t instances returned by PTL. * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this PTL. * @return OMPI_SUCCESS or error status on failure. * * The mca_ptl_base_module_add_procs_fn_t() is called by the PML to * determine the set of PTLs that should be used to reach each process. * Any addressing information exported by the peer via the mca_base_modex_send() * function should be available during this call via the corresponding * mca_base_modex_recv() function. The PTL may utilize this information to * determine reachability of each peer process. * * For each process that is reachable by the PTL, the bit corresponding to the index * into the proc array (nprocs) should be set in the reachable bitmask. The PML * provides the PTL the option to return a pointer to a data structure defined * by the PTL that is returned to the PTL on subsequent calls to the PTL data * transfer functions (e.g ptl_send). This may be used by the PTL to cache any addressing * or connection information (e.g. TCP socket, IP queue pair). */ extern int mca_ptl_mx_add_procs( struct mca_ptl_base_module_t* ptl, size_t nprocs, struct ompi_proc_t **procs, struct mca_ptl_base_peer_t** peers, ompi_bitmap_t* reachable ); /** * PML->PTL notification of change to the process list. * * @param ptl (IN) PTL instance * @param nprocs (IN) Number of processes * @param proc (IN) Set of processes * @param peer (IN) Set of peer addressing information. * @return Status indicating if cleanup was successful * * When the process list changes, the PML notifies the PTL of the * change, to provide the opportunity to cleanup or release any * resources associated with the peer. */ extern int mca_ptl_mx_del_procs( struct mca_ptl_base_module_t* ptl, size_t nprocs, struct ompi_proc_t **procs, struct mca_ptl_base_peer_t** peers ); /** * PML->PTL Initialize a send request for use by the PTL. * * @param ptl (IN) PTL instance * @param request (IN) Pointer to allocated request. * * To reduce latency (number of required allocations), the PML allocates up * to ptl_cache_bytes of additional space contigous w/ the base send request. * This space may be used by the PTL for additional control information (e.g. * first fragment descriptor). * * The ptl_request_init() function is called by the PML when requests are * allocated to the PTLs cache. These requests will be cached by the PML * on completion and re-used by the same PTL w/out additional calls to * ptl_request_init(). * * If the cache size is exceeded, the PML may pass requests to ptl_send/ptl_put * that have been taken from the global pool and have not been initialized by the * PTL. These requests will have the req_cached attribute set to false. * */ extern int mca_ptl_mx_request_init( struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* ); /** * PML->PTL Cleanup any resources that may have been associated with the * request by the PTL. * * @param ptl (IN) PTL instance * @param request (IN) Pointer to allocated request. * * The ptl_request_fini function is called when the PML removes a request * from the PTLs cache (due to resource constraints). This routine provides * the PTL the chance to cleanup/release any resources cached on the send * descriptor by the PTL. */ extern void mca_ptl_mx_request_fini( struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* ); /** * PML->PTL Notification from the PML to the PTL that a receive * has been posted and matched against the indicated fragment. * * @param ptl (IN) PTL instance * @param recv_frag Matched fragment * * The ptl_matched() function is called by the PML when a fragment * is matched to a posted receive. This may occur during a call to * ptl_match() if the receive is matched, or at a later point in time * when a matching receive is posted. * * When this routine is called, the PTL is responsible for generating * an acknowledgment to the peer if the MCA_PTL_FLAGS_ACK * bit is set in the original fragment header. Additionally, the PTL * is responsible for transferring any data associated with the fragment * into the users buffer utilizing the datatype engine, and notifying * the PML that the fragment has completed via the ptl_recv_progress() * function. */ extern void mca_ptl_mx_matched( struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_recv_frag_t* frag ); /** * PML->PTL Initiate a send to the peer. * * @param ptl (IN) PTL instance * @param ptl_base_peer (IN) PTL peer addressing * @param request (IN) Send request * @param offset Current offset into packed/contiguous buffer. * @param size (IN) Number of bytes PML is requesting PTL to deliver, * @param flags (IN) Flags that should be passed to the peer via the message header. * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments * * The PML implements a rendevouz protocol, with up to the PTL threshold * (ptl_first_frag_size) bytes of the message sent in eager send mode. The ptl_send() * function is called by the PML to initiate the send of the first message fragment. * * The PTL is responsible for updating the current data offset (req_offset) in the * request to reflect the actual number of bytes fragmented. This may be less than * the requested size, due to resource constraints or datatype alighnment/offset. If * an acknowledgment is required, the MCA_PTL_FLAGS_ACK bit will be set in the * flags parameter. In this case, the PTL should not call ptl_send_progress() function * to indicate completion of the fragment until the ack is received. For all other * fragments ptl_send_progress() may be called based on local completion semantics. */ extern int mca_ptl_mx_send( struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_peer_t* ptl_peer, struct mca_ptl_base_send_request_t*, size_t offset, size_t size, int flags ); /** * PML->PTL Continue sending fragments of a large message. * * @param ptl (IN) PTL instance * @param ptl_base_peer (IN) PTL peer addressing * @param request (IN) Send request * @param offset Current offset into packed/contiguous buffer. * @param size (IN) Number of bytes PML is requesting PTL to deliver, * @param flags (IN) Flags that should be passed to the peer via the message header. * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments * */ extern int mca_ptl_mx_send_continue( struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_peer_t* ptl_peer, struct mca_ptl_base_send_request_t*, size_t offset, size_t size, int flags ); #define HAVE_MX_ICOMPLETED 0 #if HAVE_MX_ICOMPLETED extern mx_return_t mx_icompleted(mx_endpoint_t endpoint, mx_status_t *status, uint32_t *result); #endif #endif