1
1
Ralph Castain 1e2019ce2a Revert "Update to sync with OMPI master and cleanup to build"
This reverts commit cb55c88a8b7817d5891ff06a447ea190b0e77479.
2016-11-22 15:03:20 -08:00

455 строки
18 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Matching Transport Layer
*
* The Matching Transport Layer (MTL) provides device-layer support
* for transfer of MPI point-to-point messages over devices that
* support hardware / library message matching. This layer is used
* with the MTL PML component to provide lowest latency and highest
* bandwidth on given architectures. Features found in other PML
* interfaces, such as message fragmenting, multi-device support, and
* NIC failover are not provided by the upper layers.
*
* In general, this interface should not be used for transport layer
* support. Instead, the BTL interface should be used. The BTL
* interface allows for multiplexing between multiple users
* (point-to-point, one-sided, etc.) and provides many features not
* found in this interface (RDMA from arbitrary buffers, active
* messaging, reasonable pinned memory caching, etc.)
*/
#ifndef OMPI_MTL_H
#define OMPI_MTL_H
#include "ompi_config.h"
#include "mpi.h" /* needed for MPI_ANY_TAG */
#include "ompi/mca/mca.h"
#include "ompi/mca/pml/pml_constants.h" /* for send_mode enum */
#include "ompi/request/request.h"
BEGIN_C_DECLS
struct ompi_request_t;
struct opal_convertor_t;
struct mca_mtl_base_module_t;
struct mca_mtl_request_t {
/** pointer to associated ompi_request_t */
struct ompi_request_t *ompi_req;
void (*completion_callback)(struct mca_mtl_request_t* mtl_request);
};
typedef struct mca_mtl_request_t mca_mtl_request_t;
/**
* MTL module flags
*/
#define MCA_MTL_BASE_FLAG_REQUIRE_WORLD 0x00000001
/**
* Initialization routine for MTL component
*
* Initialization routine for MTL component. This function should
* allocate resources for communication and try to do all local setup.
* It should not attempt to contact it's peers, as that should be
* done at add_procs time. Contact information should be published
* during this initialization function. It will be made available
* during add_procs().
*
* @param enable_progress_threads (IN) Progress threads have been
* enabled by the user and the component must be
* capable of making asycnhronous progress (either
* with its own thread, with the kernel, or with
* the event library.
* @param enable_mpi_threads (IN) MPI threads have been enabled by the
* user and the component must be capable of coping
* with threads. If the component can cope with
* MPI_THREAD_MULTIPLE, enable_mpi_thread_multiple
* should be set to true. Otherwise, it is assumed
* that only THREAD_FUNNELLED and THREAD_SERIALIZED
* can be used.
* @param enable_mpi_thread_multiple (OUT) Component does / does not
* support MPI_THREAD_MULTIPLE. This variable only
* needs to be set if enable_mpi_threads is true.
* Otherwise, the return value will be ignored.
*
* @retval NULL component can not operate on the current machine
* @retval non-NULL component interface function
*/
typedef struct mca_mtl_base_module_t*
(*mca_mtl_base_component_init_fn_t)(bool enable_progress_threads,
bool enable_mpi_threads);
struct mca_mtl_base_component_2_0_0_t {
mca_base_component_t mtl_version;
mca_base_component_data_t mtl_data;
mca_mtl_base_component_init_fn_t mtl_init;
};
typedef struct mca_mtl_base_component_2_0_0_t mca_mtl_base_component_2_0_0_t;
typedef struct mca_mtl_base_component_2_0_0_t mca_mtl_base_component_t;
/**
* MCA->MTL Clean up any resources held by MTL module
*
* Opposite of module_init. Called when communication will no longer
* be necessary. ussually this is during MPI_FINALIZE, but it can be
* earlier if the component was not selected to run. Assuming
* module_init was called, finalize will always be called before the
* component_close function is called.
*
* @param mtl (IN) MTL module returned from call to initialize
*
* @retval OMPI_SUCCESS cleanup finished successfully
* @retval other failure during cleanup
*
*/
typedef int (*mca_mtl_base_module_finalize_fn_t)(struct mca_mtl_base_module_t* mtl);
/**
* PML->MTL notification of change in the process list.
*
* The mca_mtl_base_module_add_procs_fn_t() is used by the PML to
* notify the MTL that new processes are connected to the current
* process. Any addressing information exported by the peer via the
* ompi_modex_send() function should be available during this
* call via the corresponding ompi_modex_recv() function. The
* MTL may utilize this information to determine reachability of each
* peer process.
*
* It is an error for a proc to not be reachable by the given MTL, and
* an error should be returned if that case is detected. If a MTL
* requires per-endpoint data, it must handle storage, either using a
* static endpoint tag (MTL is the default tag that should generally
* be used) or a dynamic endpoint tag (although it should be noted
* that OMPI can be built without dynamic endpoint tag support).
*
* @param mtl (IN) MTL module
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
*
* @retval OMPI_SUCCESS successfully connected to processes
* @retval other failure during setup
*/
typedef int (*mca_mtl_base_module_add_procs_fn_t)(
struct mca_mtl_base_module_t* mtl,
size_t nprocs,
struct ompi_proc_t** procs);
/**
* Notification of change to the process list.
*
* When the process list changes, the PML notifies the MTL of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer. The MTL is responsible for
* releasing any memory associated with the endpoint data it may have
* stored during add_procs().
*
* @param mtl (IN) MTL module
* @param nprocs (IN) Number of processes
* @param proc (IN) Set of processes
* @param peer (IN) Set of peer addressing information.
*
* @return Status indicating if cleanup was successful
*/
typedef int (*mca_mtl_base_module_del_procs_fn_t)(
struct mca_mtl_base_module_t* mtl,
size_t nprocs,
struct ompi_proc_t** procs);
/**
* Blocking send to peer
*
* Blocking send (Call should not return until the user buffer may be
* used again). Standard MPI semantics must be met by this call, as
* mandated in the mode argument. There is one special mode argument,
* MCA_PML_BASE_SEND_COMPLETE, which requires local completion before
* the function can return. This is an optimization for coillective
* routines that can otherwise lead to degenerate performance for
* broadcast-based collectives.
*
* @param comm (IN) Communicator used for operation
* @param dest (IN) Destination rank for send (relative to comm)
* @param tag (IN) MPI tag used for sending. See note below.
* @param convertor (IN) Datatype convertor describing send datatype.
* Already prepared for send.
* @param mode (IN) Mode for send operation
*
* @return OMPI_SUCCESS or error value
*
* \note Open MPI is built around non-blocking operations. This
* function is provided for networks where progressing events outside
* of point-to-point (for example, collectives, I/O, one-sided) can
* occur without a progress function regularily being triggered.
*
* \note While MPI does not allow users to specify negative tags, they
* are used internally in Open MPI to provide a unique channel for
* collective operations. Therefore, the MTL can *not* cause an error
* if a negative tag is used.
*/
typedef int (*mca_mtl_base_module_send_fn_t)(
struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm,
int dest,
int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode);
/**
* Non-blocking send to peer
*
* Non-blocking send to peer. Standard MPI semantics must be met by
* this call, as mandated in the mode argument. There is one special
* mode argument, MCA_PML_BASE_SEND_COMPLETE, which requires local
* completion before the request is marked as complete.
*
* The PML will handle creation of the request, leaving the number of
* bytes requested in the module structure available for the MTL
* directly after the ompi_request_t structure. The PML will handle
* proper destruction of the request once it can safely be destructed
* (it has been completed and freeed by a call to REQUEST_FReE or
* TEST/WAIT). The MTL should remove all resources associated with
* the request when it is marked as completed.
*
* @param comm (IN) Communicator used for operation
* @param dest (IN) Destination rank for send (relative to comm)
* @param tag (IN) MPI tag used for sending. See note below.
* @param convertor (IN) Datatype convertor describing send datatype.
* Already prepared for send.
* @param mode (IN) Mode for send operation (see pml.h)
* @param blocking (IN) True if the call originated from a blocking
* call, but the PML decided to use a
* non-blocking operation, likely for
* internal performance decisions This is an
* optimization flag and is not needed for
* correctness.
* @param mtl_request (IN) Pointer to mtl_request. The ompi_req field
* will be populated with an initialized
* ompi_request_t before calling.
*
* @return OMPI_SUCCESS or error value
*
* \note While MPI does not allow users to specify negative tags, they
* are used internally in Open MPI to provide a unique channel for
* collective operations. Therefore, the MTL can *not* cause an error
* if a negative tag is used.
*/
typedef int (*mca_mtl_base_module_isend_fn_t)(
struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm,
int dest,
int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode,
bool blocking,
mca_mtl_request_t *mtl_request);
/**
* Non-blocking receive
*
* Non-blocking receive function. Standard MPI semantics for
* MPI_Irecv must be implemented by this call.
*
* The PML will handle creation of the request, leaving the number of
* bytes requested in teh module structure available for the MTL,
* directly after the ompi_request_t structure. The PML will handle
* proper destruction of the request once it can safely be destroyed
* (it has been completed and free'ed by a call to REQUEST_FREE or
* TEST/WAIT). The MTL should remove all resources associated with
* the request when it is marked as completed.
*
* @param comm (IN) Communicator used for operation
* @param src (IN) Source rank for send (relative to comm)
* @param tag (IN) MPI tag used for sending. See note below.
* @param convertor (IN) Datatype convertor describing receive datatype.
* Already prepared for receive.
* @param mtl_request (IN) Pointer to mtl_request. The ompi_req field
* will be populated with an initialized
* ompi_request_t before calling.
*
* @return OMPI_SUCCESS or error value
*
* \note While MPI does not allow users to specify negative tags, they
* are used internally in Open MPI to provide a unique channel for
* collective operations. Therefore, the MTL can *not* cause an error
* if a negative tag is used. Further, MPI_ANY_TAG should *not* match
* against negative tags.
*/
typedef int (*mca_mtl_base_module_irecv_fn_t)(
struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm,
int src,
int tag,
struct opal_convertor_t *convertor,
struct mca_mtl_request_t *mtl_request);
/**
* Non-blocking probe
*
* Non-blocking probe function. Standard MPI semantics for MPI_IPROBE
* must be implemented by this call.
*
* @param comm (IN) Communicator used for operation
* @param src (IN) Source rank for send (relative to comm)
* @param tag (IN) MPI tag used for sending. See note below.
* @param flag (OUT) true if message available, false otherwise
* @param status (OUT) Status structure for information on
* available message
*
* \note While MPI does not allow users to specify negative tags, they
* are used internally in Open MPI to provide a unique channel for
* collective operations. Therefore, the MTL can *not* cause an error
* if a negative tag is used. Further, MPI_ANY_TAG should *not* match
* against negative tags.
*/
typedef int (*mca_mtl_base_module_iprobe_fn_t)(
struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm,
int src,
int tag,
int *flag,
struct ompi_status_public_t *status);
typedef int (*mca_mtl_base_module_imrecv_fn_t)(struct mca_mtl_base_module_t* mtl,
struct opal_convertor_t *convertor,
struct ompi_message_t **message,
struct mca_mtl_request_t *mtl_request);
typedef int (*mca_mtl_base_module_improbe_fn_t)(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm,
int src,
int tag,
int *matched,
struct ompi_message_t **message,
struct ompi_status_public_t *status);
/**
* Cancel an existing request
*
* Attempt to cancel an existing request. The (poorly defined)
* semantics for MPI_CANCEL must be implemented by this call. This,
* of course, allows the MTL module to do nothing at all.
* Implementations of the MTL should make a good faith effort to
* cancel receive requests that have not been started, as the "post a
* receive for control messages" paradigm is a common one in loosely
* coupled MPI applications.
*
* @param request(IN) Request that should be cancelled
* @param flag Unknown exactly what this does.
*
*/
typedef int (*mca_mtl_base_module_cancel_fn_t)(
struct mca_mtl_base_module_t* mtl,
mca_mtl_request_t *mtl_request,
int flag);
/**
* Downcall from PML layer when a new communicator is created.
*
* @param comm Communicator
* @return OMPI_SUCCESS or failure status.
*
* Provides the MTL the opportunity to initialize/cache a data structure
* on the communicator.
*/
typedef int (*mca_mtl_base_module_add_comm_fn_t)(
struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm);
/**
* Downcall from PML layer when a communicator is destroyed.
*
* @param comm Communicator
* @return OMPI_SUCCESS or failure status.
*
* Provides the MTL the opportunity to cleanup any datastructures
* associated with the communicator.
*/
typedef int (*mca_mtl_base_module_del_comm_fn_t)(
struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm);
/**
* MTL module interface functions and attributes.
*/
struct mca_mtl_base_module_t {
int mtl_max_contextid; /**< maximum allowable contextid */
int mtl_max_tag; /**< maximum tag value. note that negative tags must be allowed */
size_t mtl_request_size; /**< number of bytes to reserve with request structure */
uint32_t mtl_flags; /**< flags (put/get...) */
/* MTL function table */
mca_mtl_base_module_add_procs_fn_t mtl_add_procs;
mca_mtl_base_module_del_procs_fn_t mtl_del_procs;
mca_mtl_base_module_finalize_fn_t mtl_finalize;
mca_mtl_base_module_send_fn_t mtl_send;
mca_mtl_base_module_isend_fn_t mtl_isend;
mca_mtl_base_module_irecv_fn_t mtl_irecv;
mca_mtl_base_module_iprobe_fn_t mtl_iprobe;
mca_mtl_base_module_imrecv_fn_t mtl_imrecv;
mca_mtl_base_module_improbe_fn_t mtl_improbe;
/* Optional MTL functions */
mca_mtl_base_module_cancel_fn_t mtl_cancel;
mca_mtl_base_module_add_comm_fn_t mtl_add_comm;
mca_mtl_base_module_del_comm_fn_t mtl_del_comm;
};
typedef struct mca_mtl_base_module_t mca_mtl_base_module_t;
/*
* Macro for use in modules that are of type mtl
*/
#define MCA_MTL_BASE_VERSION_2_0_0 \
OMPI_MCA_BASE_VERSION_2_1_0("mtl", 2, 0, 0)
OMPI_DECLSPEC extern mca_mtl_base_module_t *ompi_mtl;
/*
* macro for doing direct call / call through struct
*/
#if MCA_ompi_mtl_DIRECT_CALL
#define OMPI_MTL_CALL_STAMP(a, b) ompi_mtl_ ## a ## _ ## b
#define OMPI_MTL_CALL_EXPANDER(a, b) OMPI_MTL_CALL_STAMP(a,b)
#define OMPI_MTL_CALL(a) OMPI_MTL_CALL_EXPANDER(MCA_ompi_mtl_DIRECT_CALL_COMPONENT, a)
#include MCA_ompi_mtl_DIRECT_CALL_HEADER
#else
#define OMPI_MTL_CALL(a) ompi_mtl->mtl_ ## a
#endif
END_C_DECLS
#endif