39dbeeedfb
This commit was SVN r6327.
363 строки
13 KiB
C
363 строки
13 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
/**
|
|
* @file
|
|
*/
|
|
#ifndef MCA_PTL_MX_H
|
|
#define MCA_PTL_MX_H
|
|
|
|
#include "ompi_config.h"
|
|
#include <myriexpress.h>
|
|
#include "mca/pml/pml.h"
|
|
#include "mca/ptl/ptl.h"
|
|
#include "class/ompi_bitmap.h"
|
|
#include "class/ompi_free_list.h"
|
|
#include "class/ompi_proc_table.h"
|
|
|
|
#define MCA_PTL_MX_STATISTICS 0
|
|
|
|
/**
|
|
* Myricom MX PTL component.
|
|
*/
|
|
struct mca_ptl_mx_component_t {
|
|
mca_ptl_base_component_1_0_0_t super; /**< base PTL component */
|
|
int mx_free_list_num; /**< initial size of free lists */
|
|
int mx_free_list_max; /**< maximum size of free lists */
|
|
int mx_free_list_inc; /**< number of elements to growing free lists by */
|
|
int mx_prepost; /**< number of preposted recvs */
|
|
int mx_debug; /**< debug level */
|
|
uint32_t mx_filter; /**< filter assigned to application */
|
|
uint32_t mx_num_ptls; /**< number of MX NICs available to app */
|
|
uint32_t mx_max_ptls; /**< max number of MX NICs to use */
|
|
struct mca_ptl_mx_module_t** mx_ptls; /**< array of available PTL moduless */
|
|
ompi_free_list_t mx_send_frags; /**< free list of mx send fragments */
|
|
ompi_free_list_t mx_recv_frags; /**< free list of mx recv fragments */
|
|
opal_hash_table_t mx_procs; /**< hash table of procs */
|
|
opal_list_t mx_pending_acks; /**< queue of pending sends */
|
|
opal_mutex_t mx_lock; /**< lock for accessing module state */
|
|
};
|
|
|
|
typedef struct mca_ptl_mx_component_t mca_ptl_mx_component_t;
|
|
struct mca_ptl_mx_recv_frag_t;
|
|
struct mca_ptl_mx_send_frag_t;
|
|
|
|
extern mca_ptl_mx_component_t mca_ptl_mx_component;
|
|
|
|
/**
|
|
* Register MX module parameters with the MCA framework
|
|
*/
|
|
extern int mca_ptl_mx_component_open(void);
|
|
|
|
/**
|
|
* Any final cleanup before being unloaded.
|
|
*/
|
|
extern int mca_ptl_mx_component_close(void);
|
|
|
|
/**
|
|
* MCA->PTL Intializes the PTL component and creates specific PTL
|
|
* module(s).
|
|
*
|
|
* @param num_ptls (OUT) Returns the number of ptl instances created, or 0
|
|
* if the transport is not available.
|
|
*
|
|
* @param allow_multi_user_threads (OUT) Indicated wether this component can
|
|
* run at MPI_THREAD_MULTIPLE or not.
|
|
*
|
|
* @param have_hidden_threads (OUT) Whether this component uses
|
|
* hidden threads (e.g., progress threads) or not.
|
|
*
|
|
* @return Array of pointers to PTL modules, or NULL if the transport
|
|
* is not available.
|
|
*
|
|
* During component initialization, the PTL component should discover
|
|
* the physical devices that are available for the given transport,
|
|
* and create a PTL instance to represent each device. Any addressing
|
|
* information required by peers to reach the device should be published
|
|
* during this function via the mca_base_modex_send() interface.
|
|
*
|
|
*/
|
|
|
|
extern mca_ptl_base_module_t** mca_ptl_mx_component_init(
|
|
int *num_ptls,
|
|
bool *allow_multi_user_threads,
|
|
bool *have_hidden_threads
|
|
);
|
|
|
|
|
|
/**
|
|
* MCA->PTL Called to dynamically change a component parameter.
|
|
*
|
|
* @param flag (IN) Parameter to change.
|
|
* @param value (IN) Optional parameter value.
|
|
*
|
|
* @return OMPI_SUCCESS or error code on failure.
|
|
*
|
|
* The only supported parameter is currently MCA_PTL_ENABLE,
|
|
* which can be used by the PML to enable/disable forwarding
|
|
* by the PTL.
|
|
*/
|
|
|
|
extern int mca_ptl_mx_component_control(
|
|
int param,
|
|
void* value,
|
|
size_t size
|
|
);
|
|
|
|
/**
|
|
* MCA->PTL Called to progress outstanding requests for
|
|
* non-threaded polling environments.
|
|
*
|
|
* @param tstamp Current time.
|
|
* @return OMPI_SUCCESS or error code on failure.
|
|
*/
|
|
|
|
extern int mca_ptl_mx_component_progress(
|
|
mca_ptl_tstamp_t tstamp
|
|
);
|
|
|
|
/**
|
|
* Myricom MX PTL module.
|
|
*/
|
|
struct mca_ptl_mx_module_t {
|
|
mca_ptl_base_module_t super; /**< base PTL module interface */
|
|
opal_list_t mx_peers; /**< list of peers */
|
|
uint64_t mx_nic_addr; /**< NIC MAC address */
|
|
uint32_t mx_filter; /**< endpoint filter */
|
|
uint32_t mx_endpoint_id; /**< endpoint ID */
|
|
bool mx_enabled; /**< flag to indicate if endpoint enabled */
|
|
mx_endpoint_t mx_endpoint; /**< endpoint */
|
|
mx_endpoint_addr_t mx_endpoint_addr; /**< endpoint address */
|
|
volatile int32_t mx_recvs_posted; /**< count of posted match fragments */
|
|
#if OMPI_ENABLE_PROGRESS_THREADS
|
|
opal_thread_t mx_thread; /**< thread for progressing outstanding requests */
|
|
#endif
|
|
};
|
|
typedef struct mca_ptl_mx_module_t mca_ptl_mx_module_t;
|
|
|
|
|
|
extern mca_ptl_mx_module_t mca_ptl_mx_module;
|
|
|
|
/**
|
|
* Create/initialize the MX PTL modules.
|
|
* @return OMPI_SUCCESS or error status on failure.
|
|
*/
|
|
|
|
extern int mca_ptl_mx_module_init(void);
|
|
|
|
|
|
/**
|
|
* Cleanup any resources held by the PTL.
|
|
*
|
|
* @param ptl PTL instance.
|
|
* @return OMPI_SUCCESS or error status on failure.
|
|
*/
|
|
|
|
extern int mca_ptl_mx_finalize(
|
|
struct mca_ptl_base_module_t* ptl
|
|
);
|
|
|
|
|
|
/**
|
|
* PML->PTL notification of change in the process list.
|
|
*
|
|
* @param ptl (IN) PTL instance
|
|
* @param nprocs (IN) Number of processes
|
|
* @param procs (IN) Set of processes
|
|
* @param peer (OUT) Set of (optional) mca_ptl_base_peer_t instances returned by PTL. * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this PTL.
|
|
* @return OMPI_SUCCESS or error status on failure.
|
|
*
|
|
* The mca_ptl_base_module_add_procs_fn_t() is called by the PML to
|
|
* determine the set of PTLs that should be used to reach each process.
|
|
* Any addressing information exported by the peer via the mca_base_modex_send()
|
|
* function should be available during this call via the corresponding
|
|
* mca_base_modex_recv() function. The PTL may utilize this information to
|
|
* determine reachability of each peer process.
|
|
*
|
|
* For each process that is reachable by the PTL, the bit corresponding to the index
|
|
* into the proc array (nprocs) should be set in the reachable bitmask. The PML
|
|
* provides the PTL the option to return a pointer to a data structure defined
|
|
* by the PTL that is returned to the PTL on subsequent calls to the PTL data
|
|
* transfer functions (e.g ptl_send). This may be used by the PTL to cache any addressing
|
|
* or connection information (e.g. TCP socket, IP queue pair).
|
|
*/
|
|
|
|
extern int mca_ptl_mx_add_procs(
|
|
struct mca_ptl_base_module_t* ptl,
|
|
size_t nprocs,
|
|
struct ompi_proc_t **procs,
|
|
struct mca_ptl_base_peer_t** peers,
|
|
ompi_bitmap_t* reachable
|
|
);
|
|
|
|
|
|
/**
|
|
* PML->PTL notification of change to the process list.
|
|
*
|
|
* @param ptl (IN) PTL instance
|
|
* @param nprocs (IN) Number of processes
|
|
* @param proc (IN) Set of processes
|
|
* @param peer (IN) Set of peer addressing information.
|
|
* @return Status indicating if cleanup was successful
|
|
*
|
|
* When the process list changes, the PML notifies the PTL of the
|
|
* change, to provide the opportunity to cleanup or release any
|
|
* resources associated with the peer.
|
|
*/
|
|
|
|
extern int mca_ptl_mx_del_procs(
|
|
struct mca_ptl_base_module_t* ptl,
|
|
size_t nprocs,
|
|
struct ompi_proc_t **procs,
|
|
struct mca_ptl_base_peer_t** peers
|
|
);
|
|
|
|
|
|
/**
|
|
* PML->PTL Initialize a send request for use by the PTL.
|
|
*
|
|
* @param ptl (IN) PTL instance
|
|
* @param request (IN) Pointer to allocated request.
|
|
*
|
|
* To reduce latency (number of required allocations), the PML allocates up
|
|
* to ptl_cache_bytes of additional space contigous w/ the base send request.
|
|
* This space may be used by the PTL for additional control information (e.g.
|
|
* first fragment descriptor).
|
|
*
|
|
* The ptl_request_init() function is called by the PML when requests are
|
|
* allocated to the PTLs cache. These requests will be cached by the PML
|
|
* on completion and re-used by the same PTL w/out additional calls to
|
|
* ptl_request_init().
|
|
*
|
|
* If the cache size is exceeded, the PML may pass requests to ptl_send/ptl_put
|
|
* that have been taken from the global pool and have not been initialized by the
|
|
* PTL. These requests will have the req_cached attribute set to false.
|
|
*
|
|
*/
|
|
|
|
extern int mca_ptl_mx_request_init(
|
|
struct mca_ptl_base_module_t* ptl,
|
|
struct mca_ptl_base_send_request_t*
|
|
);
|
|
|
|
|
|
/**
|
|
* PML->PTL Cleanup any resources that may have been associated with the
|
|
* request by the PTL.
|
|
*
|
|
* @param ptl (IN) PTL instance
|
|
* @param request (IN) Pointer to allocated request.
|
|
*
|
|
* The ptl_request_fini function is called when the PML removes a request
|
|
* from the PTLs cache (due to resource constraints). This routine provides
|
|
* the PTL the chance to cleanup/release any resources cached on the send
|
|
* descriptor by the PTL.
|
|
*/
|
|
|
|
extern void mca_ptl_mx_request_fini(
|
|
struct mca_ptl_base_module_t* ptl,
|
|
struct mca_ptl_base_send_request_t*
|
|
);
|
|
|
|
|
|
/**
|
|
* PML->PTL Notification from the PML to the PTL that a receive
|
|
* has been posted and matched against the indicated fragment.
|
|
*
|
|
* @param ptl (IN) PTL instance
|
|
* @param recv_frag Matched fragment
|
|
*
|
|
* The ptl_matched() function is called by the PML when a fragment
|
|
* is matched to a posted receive. This may occur during a call to
|
|
* ptl_match() if the receive is matched, or at a later point in time
|
|
* when a matching receive is posted.
|
|
*
|
|
* When this routine is called, the PTL is responsible for generating
|
|
* an acknowledgment to the peer if the MCA_PTL_FLAGS_ACK
|
|
* bit is set in the original fragment header. Additionally, the PTL
|
|
* is responsible for transferring any data associated with the fragment
|
|
* into the users buffer utilizing the datatype engine, and notifying
|
|
* the PML that the fragment has completed via the ptl_recv_progress()
|
|
* function.
|
|
*/
|
|
|
|
extern void mca_ptl_mx_matched(
|
|
struct mca_ptl_base_module_t* ptl,
|
|
struct mca_ptl_base_recv_frag_t* frag
|
|
);
|
|
|
|
/**
|
|
* PML->PTL Initiate a send to the peer.
|
|
*
|
|
* @param ptl (IN) PTL instance
|
|
* @param ptl_base_peer (IN) PTL peer addressing
|
|
* @param request (IN) Send request
|
|
* @param offset Current offset into packed/contiguous buffer.
|
|
* @param size (IN) Number of bytes PML is requesting PTL to deliver,
|
|
* @param flags (IN) Flags that should be passed to the peer via the message header.
|
|
* @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments
|
|
*
|
|
* The PML implements a rendevouz protocol, with up to the PTL threshold
|
|
* (ptl_first_frag_size) bytes of the message sent in eager send mode. The ptl_send()
|
|
* function is called by the PML to initiate the send of the first message fragment.
|
|
*
|
|
* The PTL is responsible for updating the current data offset (req_offset) in the
|
|
* request to reflect the actual number of bytes fragmented. This may be less than
|
|
* the requested size, due to resource constraints or datatype alighnment/offset. If
|
|
* an acknowledgment is required, the MCA_PTL_FLAGS_ACK bit will be set in the
|
|
* flags parameter. In this case, the PTL should not call ptl_send_progress() function
|
|
* to indicate completion of the fragment until the ack is received. For all other
|
|
* fragments ptl_send_progress() may be called based on local completion semantics.
|
|
*/
|
|
|
|
extern int mca_ptl_mx_send(
|
|
struct mca_ptl_base_module_t* ptl,
|
|
struct mca_ptl_base_peer_t* ptl_peer,
|
|
struct mca_ptl_base_send_request_t*,
|
|
size_t offset,
|
|
size_t size,
|
|
int flags
|
|
);
|
|
|
|
|
|
/**
|
|
* PML->PTL Continue sending fragments of a large message.
|
|
*
|
|
* @param ptl (IN) PTL instance
|
|
* @param ptl_base_peer (IN) PTL peer addressing
|
|
* @param request (IN) Send request
|
|
* @param offset Current offset into packed/contiguous buffer.
|
|
* @param size (IN) Number of bytes PML is requesting PTL to deliver,
|
|
* @param flags (IN) Flags that should be passed to the peer via the message header.
|
|
* @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments
|
|
*
|
|
*/
|
|
|
|
extern int mca_ptl_mx_send_continue(
|
|
struct mca_ptl_base_module_t* ptl,
|
|
struct mca_ptl_base_peer_t* ptl_peer,
|
|
struct mca_ptl_base_send_request_t*,
|
|
size_t offset,
|
|
size_t size,
|
|
int flags
|
|
);
|
|
|
|
|
|
#define HAVE_MX_ICOMPLETED 0
|
|
#if HAVE_MX_ICOMPLETED
|
|
extern mx_return_t mx_icompleted(mx_endpoint_t endpoint, mx_status_t *status, uint32_t *result);
|
|
#endif
|
|
#endif
|