1
1
2009-07-19 18:07:04 +00:00

655 строки
26 KiB
C

/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Runtime Messaging Layer (RML) Communication Interface
*
* The Runtime Messaging Layer (RML) provices basic point-to-point
* communication between ORTE processes. The system is available for
* most architectures, with some exceptions (the Cray XT3/XT4, for example).
*/
#ifndef ORTE_MCA_RML_RML_H_
#define ORTE_MCA_RML_RML_H_
#include "orte_config.h"
#include "orte/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "opal/mca/mca.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#include "orte/mca/rml/rml_types.h"
BEGIN_C_DECLS
/* ******************************************************************** */
struct opal_buffer_t;
struct orte_process_name_t;
struct orte_rml_module_t;
/* ******************************************************************** */
/**
* RML component initialization
*
* Create an instance (module) of the given RML component. Upon
* returning, the module data structure should be fully populated and
* all functions should be usable. Non-blocking receive calls may be
* posted upon return from this function, although communication need
* not be enabled until enable_comm() call is called on the module.
*
* @return Exactly one module created by the call to the component's
* initialization function should be returned. The module structure
* should be fully populated, and the priority should be set to a
* reasonable value.
*
* @param[out] priority Selection priority for the given component
*
* @retval NULL An error occurred and initialization did not occur
* @retval non-NULL The module was successfully initialized
*/
typedef struct orte_rml_module_t* (*orte_rml_component_init_fn_t)(int *priority);
/**
* RML component interface
*
* Component interface for the RML framework. A public instance of
* this structure, called mca_rml_[component name]_component, must
* exist in any RML component.
*/
struct orte_rml_component_2_0_0_t {
/* Base component description */
mca_base_component_t rml_version;
/* Base component data block */
mca_base_component_data_t rml_data;
/* Component intialization function */
orte_rml_component_init_fn_t rml_init;
};
/** Convienence typedef */
typedef struct orte_rml_component_2_0_0_t orte_rml_component_t;
/* ******************************************************************** */
/**
* Funtion prototype for callback from non-blocking iovec send and receive
*
* Funtion prototype for callback from non-blocking iovec send and
* receive. The iovec pointer will be the same pointer passed to
* either send_nb and recv_nb. The peer memory location may not be
* the same, and is owned by the RML, not the calling process.
*
* @note The parameter in/out parameters are relative to the user's callback
* function.
*
* @param[in] status Completion status - equivalent to the return value
* from blocking send/recv
* @param[in] peer Opaque name of peer process
* @param[in] msg Array of iovecs describing user buffers and lengths
* @param[in] count Number of elements in iovec array
* @param[in] tag User defined tag for matching send/recv
* @param[in] cbdata User data passed to send_nb() or recv_nb()
*/
typedef void (*orte_rml_callback_fn_t)(int status,
struct orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
void* cbdata);
/**
* Funtion prototype for callback from non-blocking buffer send and receive
*
* Funtion prototype for callback from non-blocking buffer send and
* receive. The buffer may not be the same pointer passed to either
* send_buffer_nb and recv_buffer_nb. The peer memory location may
* not be the same, and is owned by the RML, not the calling process.
*
* @note The parameter in/out parameters are relative to the user's callback
* function.
*
* @param[in] status Completion status - equivalent to the return value
* from blocking send/recv
* @param[in] peer Name of peer process
* @param[in] buffer Message buffer
* @param[in] tag User defined tag for matching send/recv
* @param[in] cbdata User data passed to send_nb() or recv_nb()
*/
typedef void (*orte_rml_buffer_callback_fn_t)(int status,
struct orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata);
/**
* Function prototype for exception callback
*
* Function prototype for callback triggered when a communication error is detected.
*
* @note The parameter in/out parameters are relative to the user's callback
* function.
*
* @param[in] peer Name of peer process
* @param[in] exception Description of the error causing the exception
*/
typedef void (*orte_rml_exception_callback_t)(const orte_process_name_t* peer,
orte_rml_exception_t exception);
/* ******************************************************************** */
/**
* Enable communication using the RML module
*
* Enable communication using the RML module. Before this call, only
* the non-blocking receive and ping interfaces may be used. After
* this call returns, the module must be fully functional, capable of
* sending and receiving data. This function will be called after the
* process has been assigned a proces identifier.
*
* @note While the ping interface may be used between the call to the
* component's initialization function and this call, care must be
* taken when doing so. The remote process must have already called
* enable_comm() or the remote process will not reply to the ping.
* As the ping interface is generally used by MPI processes to find a
* daemon to contact, this should not be a major limitation.
*
* @retval ORTE_SUCCESS Communications successfully enabled
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_enable_comm_fn_t)(void);
/**
* Finalize the RML module
*
* Finalize the RML module, ending all communication and cleaning up
* all resources associated with the module. After the finalize
* function is called, all interface functions (and the module
* structure itself) are not available for use.
*
* @note Whether or not the finalize function returns successfully,
* the module should not be used once this function is called.
*
* @retval ORTE_SUCCESS Success
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_finalize_fn_t)(void);
/**
* Get a "contact info" string for the local process
*
* Get a "contact info" string that can be used by other processes to
* share the contact information for the given process. The "contact
* info" string includes the process identifier for the given process
* and uses only basic ascii characters. It should be quoted when
* evaluated by a shell, although no special escaping is necessary.
*
* @note The function may return a contact info string which contains
* multiple addresses.
*
* @retval non-NULL The contact information for this process
* @retval NULL An error occurred when trying to get the current
* process contact info
*/
typedef char* (*orte_rml_module_get_contact_info_fn_t)(void);
/**
* Update the RML with a remote process's contact info
*
* Update the RML with a remote process's contact information, as
* returned from the get_contact_info() function on the remote
* process. Before a send can be initiated to a remote process,
* either this function must be called for that process or that
* process must have already established a connection to the local
* process.
*
* @note The user may not always explicitly call this function
* directly, but may instead cause it to be called through one of the
* contact setup functions available in
* orte/mca/rml/base/rml_contact.h.
*
* @param[in] contact_info The contact information string of a peer
*
* @retval ORTE_SUCCESS The contact information was successfully updated
* @retval ORTE_ERROR An unspecified error occurred during the update
*/
typedef int (*orte_rml_module_set_contact_info_fn_t)(const char *contact_info);
/**
* Request a new name from the HNP, if one is not already assigned
*
* Request a new name from the HNP, if one is not already assigned.
* This function should be avoided at all costs, but is unavoidable in
* some instances (like when trying to get a name from a singleton or
* when trying to contact a persistent daemon from an orte tool.
*
* @param[out] name The new name of the process
*
* @retval ORTE_SUCCESS A name was successfully acquired
* @retcal ORTE_ERR_NOT_SUPPORTED A name is already assigned to the
* current process
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_get_new_name_fn_t)(orte_process_name_t *name);
/**
* "Ping" another process to determine availability
*
* Ping another process to determine if it is available. This
* function only verifies that the process is alive and will allow a
* connection to the local process. It does *not* qualify as
* establishing communication with the remote process, as required by
* the note for set_contact_info().
*
* @param[in] contact_info The contact info string for the remote process
* @param[in] tv Timeout after which the ping should be failed
*
* @retval ORTE_SUCESS The process is available and will allow connections
* from the local process
* @retval ORTE_ERROR An unspecified error occurred during the update
*/
typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
const struct timeval* tv);
/**
* Send an iovec blocking message
*
* Send an iovec blocking message to the specified peer. The call
* will return when the buffer may be modified. The return of a call
* to send() does not give any indication of remote completion.
*
* @param[in] peer Name of receiving process
* @param[in] msg iovec array describing send buffer
* @param[in] count Length of iovec array
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags Currently unused.
*
* @retval >0 Number of bytes successfully sent (will be
* length of all iovecs)
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_send_fn_t)(struct orte_process_name_t* peer,
struct iovec *msg,
int count,
int tag,
int flags);
/**
* Send a buffer blocking message
*
* Send a buffer blocking message to the specified peer. The call
* will return when the buffer may be modified. The return of a call
* to send() does not give any indication of remote completion.
*
* @param[in] peer Name of receiving process
* @param[in] buffer send buffer
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags Currently unused.
*
* @retval >0 Number of bytes successfully sent (will be
* length of buffer)
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_send_buffer_fn_t)(struct orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
int flags);
/**
* Send an iovec non-blocking message
*
* Send an iovec blocking message to the specified peer. The call
* will return immediately, although the iovec may not be modified
* until the completion callback is triggered. The iovec *may* be
* passed to another call to send_nb before the completion callback is
* triggered. The callback being triggered does not give any
* indication of remote completion.
*
* @param[in] peer Name of receiving process
* @param[in] msg iovec array describing send buffer
* @param[in] count Length of iovec array
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags Currently unused
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*
* @retval ORTE_SUCCESS The message was successfully started
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
int flags,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
* Send an buffer non-blocking message
*
* Send an buffer blocking message to the specified peer. The call
* will return immediately, although the buffer may not be modified
* until the completion callback is triggered. The buffer *may* be
* passed to another call to send_nb before the completion callback is
* triggered. The callback being triggered does not give any
* indication of remote completion.
*
* @param[in] peer Name of receiving process
* @param[in] buffer Buffer array describing send buffer
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags Currently unused
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*
* @retval ORTE_SUCCESS The message was successfully started
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
int flags,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
/**
* Receive an iovec blocking message
*
* Receive a message into a user-provided iovec. The call will not
* return until the buffer has been received. The remote process does
* not need to be connected to the current process to post the
* receive, so it is possible to post a receive with no sending
* process (ie, it will never complete). The buffer must not be
* currently in use with any other RML communication function during a
* receive call.
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[out] msg iovec array of receive buffer space
* @param[in] count Number of iovec entries in the array
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags May be ORTE_RML_PEEK to return up to the number
* of bytes provided in the iovec array without
* removing the message from the queue.
*
* @retval >0 Number of bytes successfully received (may be smaller
* than the posted buffer size
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_recv_fn_t)(struct orte_process_name_t* peer,
struct iovec *msg,
int count,
orte_rml_tag_t tag,
int flags);
/**
* Receive a buffer blocking message
*
* Receive a message into a user-provided buffer. The call will not
* return until the buffer has been received. The remote process does
* not need to be connected to the current process to post the
* receive, so it is possible to post a receive with no sending
* process (ie, it will never complete). The buffer must not be
* currently in use with any other RML communication function during a
* receive call.
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[out] buffer A dss buffer to update with the received data
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags Flags modifying receive behavior
*
* @retval >0 Number of bytes successfully received (may be smaller
* than the posted buffer size
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_recv_buffer_fn_t) (struct orte_process_name_t* peer,
struct opal_buffer_t *buf,
orte_rml_tag_t tag,
int flags);
/**
* Receive an iovec non-blocking message
*
* Receive a message into a user-provided iovec. The call will not
* return until the buffer has been received. The remote process does
* not need to be connected to the current process to post the
* receive, so it is possible to post a receive with no sending
* process (ie, it will never complete). The buffer must not be
* currently in use with any other RML communication function during a
* receive call.
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[out] msg iovec array of receive buffer space
* @param[in] count Number of iovec entries in the array
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags May be ORTE_RML_PEEK to return up to the number
* of bytes provided in the iovec array without
* removing the message from the queue.
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*
* @retval ORTE_SUCCESS The message was successfully started
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_recv_nb_fn_t)(struct orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
int flags,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
* Receive a buffer non-blocking message
*
* Receive a message into a user-provided buffer. The call will not
* return until the buffer has been received. The remote process does
* not need to be connected to the current process to post the
* receive, so it is possible to post a receive with no sending
* process (ie, it will never complete). The buffer must not be
* currently in use with any other RML communication function during a
* receive call.
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[in] tag User defined tag for matching send/recv
* @param[in] flags May be ORTE_RML_PEEK to return up to the number
* of bytes provided in the iovec array without
* removing the message from the queue.
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*
* @retval ORTE_SUCCESS The message was successfully started
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_recv_buffer_nb_fn_t)(struct orte_process_name_t* peer,
orte_rml_tag_t tag,
int flags,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
/**
* Cancel a posted non-blocking receive
*
* Attempt to cancel a posten non-blocking receive.
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
* to the non-blocking receive call
* @param[in] tag Posted receive tag
*
* @retval ORTE_SUCCESS The receive was successfully cancelled
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_NOT_FOUND A matching receive was not found
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer,
orte_rml_tag_t tag);
/**
* Register or deregister an exception callback function
*
* Register or deregister a callback when an asynchronous
* communication exception occurs.
*
* @param[in] cbfunc User callback
*
* @retval ORTE_SUCCESS The operation completed successfully
* @retval ORTE_ERROR An unspecifed error occurred
*/
typedef int (*orte_rml_module_exception_fn_t)(orte_rml_exception_callback_t cbfunc);
/**
* Handle fault tolerance updates
*
* Handle fault tolerance updates
*
* @param[in] state Fault tolerance state update
*
* @retval ORTE_SUCCESS The operation completed successfully
* @retval ORTE_ERROR An unspecifed error occurred
*/
typedef int (*orte_rml_module_ft_event_fn_t)(int state);
/* ******************************************************************** */
/**
* RML module interface
*
* Module interface to the RML communication system. A global
* instance of this module, orte_rml, provices an interface into the
* active RML interface.
*/
struct orte_rml_module_t {
/** Enable communication once a process name has been assigned */
orte_rml_module_enable_comm_fn_t enable_comm;
/** Shutdown the communication system and clean up resources */
orte_rml_module_finalize_fn_t finalize;
/** Get contact information for local process */
orte_rml_module_get_contact_info_fn_t get_contact_info;
/** Set contact information for remote process */
orte_rml_module_set_contact_info_fn_t set_contact_info;
/** Get a name from the remote process */
orte_rml_module_get_new_name_fn_t get_new_name;
/** Ping process for connectivity check */
orte_rml_module_ping_fn_t ping;
/** Send blocking iovec message */
orte_rml_module_send_fn_t send;
/** Send blocking buffer message */
orte_rml_module_send_nb_fn_t send_nb;
/** Send non-blocking iovec message */
orte_rml_module_send_buffer_fn_t send_buffer;
/** Send non-blocking buffer message */
orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
/** Receive blocking iovec message */
orte_rml_module_recv_fn_t recv;
/** Receive blocking buffer message */
orte_rml_module_recv_nb_fn_t recv_nb;
/** Receive non-blocking iovec message */
orte_rml_module_recv_buffer_fn_t recv_buffer;
/** Receive non-blocking buffer message */
orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb;
/** Cancel posted non-blocking receive */
orte_rml_module_recv_cancel_fn_t recv_cancel;
/** Add callback for communication exception */
orte_rml_module_exception_fn_t add_exception_handler;
/** Delete callback for communication exception */
orte_rml_module_exception_fn_t del_exception_handler;
/** Fault tolerance handler */
orte_rml_module_ft_event_fn_t ft_event;
};
/** Convienence typedef */
typedef struct orte_rml_module_t orte_rml_module_t;
/** Interface for RML communication */
ORTE_DECLSPEC extern orte_rml_module_t orte_rml;
/* ******************************************************************** */
/** Macro for use in components that are of type rml */
#define ORTE_RML_BASE_VERSION_2_0_0 \
MCA_BASE_VERSION_2_0_0, \
"rml", 2, 0, 0
/* ******************************************************************** */
END_C_DECLS
#endif