2004-07-13 02:46:57 +04:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-07-13 02:46:57 +04:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
/** @file:
|
|
|
|
*
|
|
|
|
* contains the data structure we will use to describe a message
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _MCA_OOB_TCP_MESSAGE_H_
|
|
|
|
#define _MCA_OOB_TCP_MESSAGE_H_
|
|
|
|
|
2008-02-28 04:57:57 +03:00
|
|
|
#include "orte_config.h"
|
|
|
|
#include "orte/types.h"
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
2005-07-03 20:22:16 +04:00
|
|
|
#include "opal/class/opal_list.h"
|
2008-06-09 18:53:58 +04:00
|
|
|
#include "orte/util/show_help.h"
|
2008-02-28 04:57:57 +03:00
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/mca/oob/oob.h"
|
2004-08-03 01:24:00 +04:00
|
|
|
#include "oob_tcp_hdr.h"
|
|
|
|
|
2008-02-28 04:57:57 +03:00
|
|
|
BEGIN_C_DECLS
|
|
|
|
|
2004-07-15 23:08:54 +04:00
|
|
|
struct mca_oob_tcp_peer_t;
|
2004-08-03 01:24:00 +04:00
|
|
|
|
|
|
|
#define MCA_OOB_TCP_IOV_MAX 16
|
|
|
|
|
|
|
|
typedef enum { MCA_OOB_TCP_POSTED, MCA_OOB_TCP_UNEXPECTED } mca_oob_tcp_type_t;
|
|
|
|
|
|
|
|
|
2004-07-13 02:46:57 +04:00
|
|
|
/**
|
|
|
|
* describes each message being progressed.
|
|
|
|
*/
|
|
|
|
struct mca_oob_tcp_msg_t {
|
2006-07-18 01:51:50 +04:00
|
|
|
opal_free_list_item_t super; /**< allow this item to be put on a list */
|
2004-08-03 01:24:00 +04:00
|
|
|
mca_oob_tcp_type_t msg_type; /**< posted receive or unexpected */
|
|
|
|
int msg_flags; /**< flags to send/recv */
|
|
|
|
int msg_rc; /**< the return code for the send/recv (amount sent/recvd or errno) */
|
|
|
|
mca_oob_tcp_hdr_t msg_hdr; /**< header used to convey message properties to peer */
|
2004-08-13 02:41:42 +04:00
|
|
|
struct iovec* msg_uiov; /**< the user supplied iovec array */
|
2004-08-03 01:24:00 +04:00
|
|
|
int msg_ucnt; /**< the number of items in the user iovec array */
|
|
|
|
struct iovec * msg_rwiov; /**< copy of iovec array - not data */
|
|
|
|
struct iovec * msg_rwptr; /**< current read/write pointer into msg_iov */
|
|
|
|
int msg_rwnum; /**< number of iovecs left for read/write */
|
|
|
|
int msg_rwcnt; /**< total number of iovecs for read/write */
|
|
|
|
void* msg_rwbuf; /**< optional buffer for send/recv */
|
2007-07-20 05:34:02 +04:00
|
|
|
orte_rml_callback_fn_t msg_cbfunc; /**< the callback function for the send/receive */
|
2004-08-03 01:24:00 +04:00
|
|
|
void * msg_cbdata; /**< the data for the callback fnuction */
|
|
|
|
bool msg_complete; /**< whether the message is done sending or not */
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_process_name_t msg_peer; /**< the name of the peer */
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_mutex_t msg_lock; /**< lock for the condition variable */
|
|
|
|
opal_condition_t msg_condition; /**< condition variable for completion */
|
2004-08-03 01:24:00 +04:00
|
|
|
struct iovec msg_iov[MCA_OOB_TCP_IOV_MAX]; /** preallocate space for iovec array */
|
2004-07-13 02:46:57 +04:00
|
|
|
};
|
2004-08-03 01:24:00 +04:00
|
|
|
|
2004-07-15 17:51:40 +04:00
|
|
|
/**
|
|
|
|
* Convenience typedef
|
|
|
|
*/
|
2004-07-13 02:46:57 +04:00
|
|
|
typedef struct mca_oob_tcp_msg_t mca_oob_tcp_msg_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_t);
|
|
|
|
|
2004-07-14 01:03:03 +04:00
|
|
|
/**
|
|
|
|
* Get a new structure for use with a message
|
|
|
|
*/
|
2006-07-18 01:51:50 +04:00
|
|
|
#define MCA_OOB_TCP_MSG_ALLOC(msg, rc) \
|
|
|
|
{ \
|
|
|
|
opal_free_list_item_t* item; \
|
2005-07-02 20:46:27 +04:00
|
|
|
OPAL_FREE_LIST_GET(&mca_oob_tcp_component.tcp_msgs, item, rc); \
|
2006-07-18 01:51:50 +04:00
|
|
|
msg = (mca_oob_tcp_msg_t*)item; \
|
|
|
|
}
|
2004-07-13 02:46:57 +04:00
|
|
|
|
2004-07-14 01:03:03 +04:00
|
|
|
/**
|
|
|
|
* return a message structure that is no longer needed
|
|
|
|
*/
|
2006-07-18 01:51:50 +04:00
|
|
|
#define MCA_OOB_TCP_MSG_RETURN(msg) \
|
|
|
|
{ \
|
|
|
|
/* frees the iovec allocated during the send/receive */ \
|
|
|
|
if(NULL != msg->msg_rwiov) \
|
|
|
|
mca_oob_tcp_msg_iov_return(msg,msg->msg_rwiov); \
|
|
|
|
if(NULL != msg->msg_rwbuf) \
|
|
|
|
free(msg->msg_rwbuf); \
|
|
|
|
OPAL_FREE_LIST_RETURN(&mca_oob_tcp_component.tcp_msgs, \
|
|
|
|
&msg->super); \
|
|
|
|
}
|
2004-07-13 02:46:57 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Wait for a msg to complete.
|
2004-08-03 01:24:00 +04:00
|
|
|
* @param msg (IN) Message to wait on.
|
|
|
|
* @param size (OUT) Number of bytes delivered.
|
2006-02-12 04:33:29 +03:00
|
|
|
* @retval ORTE_SUCCESS or error code on failure.
|
2004-07-13 02:46:57 +04:00
|
|
|
*/
|
|
|
|
int mca_oob_tcp_msg_wait(mca_oob_tcp_msg_t* msg, int* size);
|
|
|
|
|
2004-09-08 21:02:24 +04:00
|
|
|
/**
|
|
|
|
* Wait - up to a timeout - for a msg to complete.
|
|
|
|
* @param msg (IN) Message to wait on.
|
|
|
|
* @param size (OUT) Number of bytes delivered.
|
2006-02-12 04:33:29 +03:00
|
|
|
* @retval ORTE_SUCCESS or error code on failure.
|
2004-09-08 21:02:24 +04:00
|
|
|
*/
|
|
|
|
int mca_oob_tcp_msg_timedwait(mca_oob_tcp_msg_t* msg, int* size, struct timespec* ts);
|
|
|
|
|
2004-07-13 02:46:57 +04:00
|
|
|
/**
|
|
|
|
* Signal that a message has completed. Wakes up any pending threads (for blocking send)
|
|
|
|
* or invokes callbacks for non-blocking case.
|
|
|
|
* @param msg (IN) Message send/recv that has completed.
|
2004-08-03 01:24:00 +04:00
|
|
|
* @param peer (IN) The peer the send/receive was from
|
2006-02-12 04:33:29 +03:00
|
|
|
* @retval ORTE_SUCCESS or error code on failure.
|
2004-07-13 02:46:57 +04:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
int mca_oob_tcp_msg_complete(mca_oob_tcp_msg_t* msg, orte_process_name_t * peer);
|
2004-07-13 02:46:57 +04:00
|
|
|
|
2004-08-03 01:24:00 +04:00
|
|
|
/**
|
|
|
|
* Called to copy the results of a message into user supplied iovec array.
|
|
|
|
* @param msg (IN) Message send that is in progress.
|
|
|
|
* @param iov (IN) Iovec array of user supplied buffers.
|
|
|
|
* @retval count Number of elements in iovec array.
|
|
|
|
*/
|
|
|
|
|
2004-08-13 02:41:42 +04:00
|
|
|
int mca_oob_tcp_msg_copy(mca_oob_tcp_msg_t* msg, struct iovec* iov, int count);
|
2004-08-03 01:24:00 +04:00
|
|
|
|
2004-07-13 02:46:57 +04:00
|
|
|
/**
|
|
|
|
* Called asynchronously to progress sending a message from the event library thread.
|
|
|
|
* @param msg (IN) Message send that is in progress.
|
2004-08-10 03:07:53 +04:00
|
|
|
* @param peer (IN) Peer we are sending to.
|
2004-08-03 01:24:00 +04:00
|
|
|
* @retval Number of bytes copied.
|
2004-07-13 02:46:57 +04:00
|
|
|
*/
|
2004-07-15 17:51:40 +04:00
|
|
|
bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_peer_t * peer);
|
2004-07-13 02:46:57 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Called asynchronously to progress sending a message from the event library thread.
|
|
|
|
* @param msg (IN) Message send that is in progress.
|
2004-08-10 03:07:53 +04:00
|
|
|
* @param peer (IN) Peer theat we are recieving from.
|
2004-07-13 02:46:57 +04:00
|
|
|
* @retval bool Bool flag indicating wether operation has completed.
|
|
|
|
*/
|
|
|
|
|
2004-07-15 17:51:40 +04:00
|
|
|
bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_peer_t * peer);
|
2004-07-13 02:46:57 +04:00
|
|
|
|
Not as bad as this all may look. Tim and I made a significant change to the way we handle the startup of the oob, the seed, etc. We have made it backwards-compatible so that mpirun2 and singleton operations remain working. We had to adjust the name server and gpr as well, plus the process_info structure.
This also includes a checkpoint update to openmpi.c and ompid.c. I have re-enabled the ompid compile.
This latter raises an important point. The trunk compiles the programs like ompid just fine under Linux. It also does just fine for OSX under the dynamic libraries. However, we are seeing errors when compiling under OSX for the static case - the linker seems to have trouble resolving some variable names, even though linker diagnostics show the variables as being defined. Thus, a warning to Mac users that you may have to locally turn things off if you are trying to do static compiles. We ask, however, that you don't commit those changes that turn things off for everyone else - instead, let's try to figure out why the static compile is having a problem, and let everyone else continue to work.
Thanks
Ralph
This commit was SVN r2534.
2004-09-08 07:59:06 +04:00
|
|
|
/**
|
|
|
|
* The message has been completely received - so attempt to match
|
|
|
|
* against posted recvs.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void mca_oob_tcp_msg_recv_complete(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_peer_t* peer);
|
|
|
|
|
2004-08-03 01:24:00 +04:00
|
|
|
/**
|
|
|
|
* Match name to a message that has been received asynchronously (unexpected).
|
|
|
|
*
|
|
|
|
* @param name (IN) Name associated with peer or wildcard to match first posted recv.
|
|
|
|
* @param tag (IN) Message tag.
|
|
|
|
* @return msg Matched message or NULL.
|
|
|
|
*
|
|
|
|
* Note - this routine requires the caller to be holding the module lock.
|
|
|
|
*/
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
mca_oob_tcp_msg_t* mca_oob_tcp_msg_match_recv(orte_process_name_t* name, int tag);
|
2004-08-03 01:24:00 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Match name to a posted recv request.
|
|
|
|
*
|
|
|
|
* @param name (IN) Name associated with peer or wildcard to match first posted recv.
|
|
|
|
* @param tag (IN) Message tag.
|
|
|
|
* @return msg Matched message or NULL.
|
|
|
|
*
|
|
|
|
* Note - this routine requires the caller to be holding the module lock.
|
|
|
|
*/
|
|
|
|
|
2005-03-22 03:31:17 +03:00
|
|
|
mca_oob_tcp_msg_t* mca_oob_tcp_msg_match_post(orte_process_name_t* name, int tag);
|
2004-08-03 01:24:00 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Allocate space for iovec array - if the request number of elements is less than
|
|
|
|
* MCA_OOB_TCP_IOV_MAX then use the array allocated along w/ the message - otherwise
|
|
|
|
* allocate count elements.
|
|
|
|
*
|
|
|
|
* @param msg (IN) Message to allocate array.
|
2004-08-10 03:07:53 +04:00
|
|
|
* @param count (IN) the number of iovecs
|
2004-08-03 01:24:00 +04:00
|
|
|
* @return Array of iovec elements.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static inline struct iovec* mca_oob_tcp_msg_iov_alloc(mca_oob_tcp_msg_t* msg, int count)
|
|
|
|
{
|
|
|
|
if(count <= MCA_OOB_TCP_IOV_MAX)
|
|
|
|
return msg->msg_iov;
|
2004-10-28 22:13:43 +04:00
|
|
|
return (struct iovec *)malloc(sizeof(struct iovec) * count);
|
2004-08-03 01:24:00 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Release resource held by iovec array if this is not part of the message.
|
|
|
|
*
|
|
|
|
* @param msg (IN) Message to allocate array.
|
|
|
|
* @param iov (IN) Iovec array to return.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline void mca_oob_tcp_msg_iov_return(mca_oob_tcp_msg_t* msg, struct iovec* iov)
|
|
|
|
{
|
|
|
|
if(iov != msg->msg_iov)
|
|
|
|
free(iov);
|
|
|
|
}
|
|
|
|
|
2008-02-28 04:57:57 +03:00
|
|
|
END_C_DECLS
|
|
|
|
|
2004-07-13 02:46:57 +04:00
|
|
|
#endif /* _MCA_OOB_TCP_MESSAGE_H_ */
|
|
|
|
|