2004-07-01 18:49:54 +04:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2006-02-07 06:32:36 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
2004-11-28 23:09:25 +03:00
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
2006-02-07 06:32:36 +03:00
|
|
|
*
|
2004-11-22 04:38:40 +03:00
|
|
|
* Additional copyrights may follow
|
2006-02-07 06:32:36 +03:00
|
|
|
*
|
2004-02-01 00:43:26 +03:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
2004-07-01 18:49:54 +04:00
|
|
|
/** @file:
|
|
|
|
*
|
2004-08-05 03:42:51 +04:00
|
|
|
* the oob framework
|
2004-07-01 18:49:54 +04:00
|
|
|
*/
|
2004-02-01 00:43:26 +03:00
|
|
|
|
2004-08-05 03:42:51 +04:00
|
|
|
#ifndef _MCA_OOB_BASE_H_
|
|
|
|
#define _MCA_OOB_BASE_H_
|
2005-03-14 23:57:21 +03:00
|
|
|
|
|
|
|
#include "orte_config.h"
|
|
|
|
|
2005-12-01 21:28:20 +03:00
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
2004-10-22 20:06:05 +04:00
|
|
|
#ifdef HAVE_SYS_UIO_H
|
2006-02-07 06:32:36 +03:00
|
|
|
#include <sys/uio.h>
|
2004-10-22 20:06:05 +04:00
|
|
|
#endif
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2006-02-07 06:32:36 +03:00
|
|
|
#include "opal/mca/mca.h"
|
|
|
|
|
|
|
|
#include "orte/dss/dss_types.h"
|
|
|
|
#include "orte/mca/ns/ns_types.h"
|
|
|
|
#include "orte/mca/gpr/gpr_types.h"
|
|
|
|
#include "orte/mca/oob/oob_types.h"
|
2004-02-01 00:43:26 +03:00
|
|
|
|
2004-10-26 23:15:19 +04:00
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
2004-02-01 00:43:26 +03:00
|
|
|
|
2006-11-03 19:04:40 +03:00
|
|
|
/*
|
|
|
|
* global flag for use in timing tests
|
|
|
|
*/
|
|
|
|
ORTE_DECLSPEC extern bool orte_oob_base_timing;
|
2006-12-02 01:30:39 +03:00
|
|
|
ORTE_DECLSPEC extern bool orte_oob_xcast_timing;
|
|
|
|
ORTE_DECLSPEC extern int orte_oob_xcast_mode;
|
2006-11-03 19:04:40 +03:00
|
|
|
|
2004-08-05 03:42:51 +04:00
|
|
|
/*
|
2006-02-07 06:32:36 +03:00
|
|
|
* OOB API
|
2004-06-30 00:36:34 +04:00
|
|
|
*/
|
2004-07-01 18:49:54 +04:00
|
|
|
|
2004-08-05 03:42:51 +04:00
|
|
|
/**
|
|
|
|
* General flags for send/recv
|
2006-02-07 06:32:36 +03:00
|
|
|
*
|
2004-08-05 03:42:51 +04:00
|
|
|
* An example of usage - to determine the size of the next available message w/out receiving it:
|
|
|
|
*
|
2004-08-12 01:07:16 +04:00
|
|
|
* int size = mca_oob_recv(name, 0, 0, MCA_OOB_TRUNC|MCA_OOB_PEEK);
|
2004-08-05 03:42:51 +04:00
|
|
|
*/
|
2004-07-01 18:49:54 +04:00
|
|
|
|
2004-08-10 03:07:53 +04:00
|
|
|
#define MCA_OOB_PEEK 0x01 /**< flag to oob_recv to allow caller to peek a portion of the next available
|
2004-08-05 03:42:51 +04:00
|
|
|
* message w/out removing the message from the queue. */
|
2006-02-07 06:32:36 +03:00
|
|
|
#define MCA_OOB_TRUNC 0x02 /**< flag to oob_recv to return the actual size of the message even if
|
2004-08-13 02:41:42 +04:00
|
|
|
* the receive buffer is smaller than the number of bytes available */
|
|
|
|
#define MCA_OOB_ALLOC 0x04 /**< flag to oob_recv to request the oob to allocate a buffer of the appropriate
|
|
|
|
* size for the receive and return the allocated buffer and size in the first
|
|
|
|
* element of the iovec array. */
|
2005-10-21 02:06:11 +04:00
|
|
|
#define MCA_OOB_PERSISTENT 0x08 /* post receive request persistently - don't remove on match */
|
2004-06-30 00:36:34 +04:00
|
|
|
|
2006-02-07 06:32:36 +03:00
|
|
|
|
2004-08-24 20:54:45 +04:00
|
|
|
/**
|
|
|
|
* Obtain a string representation of the OOB contact information for
|
2006-02-07 06:32:36 +03:00
|
|
|
* the selected OOB channels. This string may be passed to another
|
2004-08-24 20:54:45 +04:00
|
|
|
* application via an MCA parameter (OMPI_MCA_oob_base_seed) to bootstrap
|
|
|
|
* communications.
|
|
|
|
*
|
|
|
|
* @return A null terminated string that should be freed by the caller.
|
|
|
|
*
|
|
|
|
* Note that mca_oob_base_init() must be called to load and select
|
|
|
|
* an OOB module prior to calling this routine.
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC char* mca_oob_get_contact_info(void);
|
2004-08-24 20:54:45 +04:00
|
|
|
|
|
|
|
/**
|
2006-02-07 06:32:36 +03:00
|
|
|
* Pre-populate the cache of contact information required by the OOB
|
2004-09-08 21:02:24 +04:00
|
|
|
* to reach a given destination. This is required to setup a pointer
|
|
|
|
* to initial registry/name server/etc.
|
2004-08-24 20:54:45 +04:00
|
|
|
*
|
Not as bad as this all may look. Tim and I made a significant change to the way we handle the startup of the oob, the seed, etc. We have made it backwards-compatible so that mpirun2 and singleton operations remain working. We had to adjust the name server and gpr as well, plus the process_info structure.
This also includes a checkpoint update to openmpi.c and ompid.c. I have re-enabled the ompid compile.
This latter raises an important point. The trunk compiles the programs like ompid just fine under Linux. It also does just fine for OSX under the dynamic libraries. However, we are seeing errors when compiling under OSX for the static case - the linker seems to have trouble resolving some variable names, even though linker diagnostics show the variables as being defined. Thus, a warning to Mac users that you may have to locally turn things off if you are trying to do static compiles. We ask, however, that you don't commit those changes that turn things off for everyone else - instead, let's try to figure out why the static compile is having a problem, and let everyone else continue to work.
Thanks
Ralph
This commit was SVN r2534.
2004-09-08 07:59:06 +04:00
|
|
|
* @param uri The contact information of the peer process obtained
|
2004-08-24 20:54:45 +04:00
|
|
|
* via a call to mca_oob_get_contact_info().
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_set_contact_info(const char*);
|
2004-08-24 20:54:45 +04:00
|
|
|
|
2004-09-08 21:02:24 +04:00
|
|
|
/**
|
2006-02-07 06:32:36 +03:00
|
|
|
* A routine to ping a given process name to determine if it is reachable.
|
2004-09-08 21:02:24 +04:00
|
|
|
*
|
|
|
|
* @param name The peer name.
|
|
|
|
* @param tv The length of time to wait on a connection/response.
|
2006-02-07 06:32:36 +03:00
|
|
|
*
|
2004-09-08 21:02:24 +04:00
|
|
|
* Note that this routine blocks up to the specified timeout waiting for a
|
|
|
|
* connection / response from the specified peer. If the peer is unavailable
|
|
|
|
* an error status is returned.
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_ping(const char*, struct timeval* tv);
|
2004-09-08 21:02:24 +04:00
|
|
|
|
2004-08-11 02:11:31 +04:00
|
|
|
/**
|
Not as bad as this all may look. Tim and I made a significant change to the way we handle the startup of the oob, the seed, etc. We have made it backwards-compatible so that mpirun2 and singleton operations remain working. We had to adjust the name server and gpr as well, plus the process_info structure.
This also includes a checkpoint update to openmpi.c and ompid.c. I have re-enabled the ompid compile.
This latter raises an important point. The trunk compiles the programs like ompid just fine under Linux. It also does just fine for OSX under the dynamic libraries. However, we are seeing errors when compiling under OSX for the static case - the linker seems to have trouble resolving some variable names, even though linker diagnostics show the variables as being defined. Thus, a warning to Mac users that you may have to locally turn things off if you are trying to do static compiles. We ask, however, that you don't commit those changes that turn things off for everyone else - instead, let's try to figure out why the static compile is having a problem, and let everyone else continue to work.
Thanks
Ralph
This commit was SVN r2534.
2004-09-08 07:59:06 +04:00
|
|
|
* Extract from the contact info the peer process identifier.
|
2004-08-11 02:11:31 +04:00
|
|
|
*
|
Not as bad as this all may look. Tim and I made a significant change to the way we handle the startup of the oob, the seed, etc. We have made it backwards-compatible so that mpirun2 and singleton operations remain working. We had to adjust the name server and gpr as well, plus the process_info structure.
This also includes a checkpoint update to openmpi.c and ompid.c. I have re-enabled the ompid compile.
This latter raises an important point. The trunk compiles the programs like ompid just fine under Linux. It also does just fine for OSX under the dynamic libraries. However, we are seeing errors when compiling under OSX for the static case - the linker seems to have trouble resolving some variable names, even though linker diagnostics show the variables as being defined. Thus, a warning to Mac users that you may have to locally turn things off if you are trying to do static compiles. We ask, however, that you don't commit those changes that turn things off for everyone else - instead, let's try to figure out why the static compile is having a problem, and let everyone else continue to work.
Thanks
Ralph
This commit was SVN r2534.
2004-09-08 07:59:06 +04:00
|
|
|
* @param cinfo (IN) The contact information of the peer process.
|
|
|
|
* @param name (OUT) The peer process identifier.
|
|
|
|
* @param uris (OUT) Will return an array of uri strings corresponding
|
|
|
|
* to the peers exported protocols.
|
2006-02-07 06:32:36 +03:00
|
|
|
*
|
Not as bad as this all may look. Tim and I made a significant change to the way we handle the startup of the oob, the seed, etc. We have made it backwards-compatible so that mpirun2 and singleton operations remain working. We had to adjust the name server and gpr as well, plus the process_info structure.
This also includes a checkpoint update to openmpi.c and ompid.c. I have re-enabled the ompid compile.
This latter raises an important point. The trunk compiles the programs like ompid just fine under Linux. It also does just fine for OSX under the dynamic libraries. However, we are seeing errors when compiling under OSX for the static case - the linker seems to have trouble resolving some variable names, even though linker diagnostics show the variables as being defined. Thus, a warning to Mac users that you may have to locally turn things off if you are trying to do static compiles. We ask, however, that you don't commit those changes that turn things off for everyone else - instead, let's try to figure out why the static compile is having a problem, and let everyone else continue to work.
Thanks
Ralph
This commit was SVN r2534.
2004-09-08 07:59:06 +04:00
|
|
|
* Note the caller may pass NULL for the uris if they only wish to extact
|
|
|
|
* the process name.
|
2004-08-11 02:11:31 +04:00
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_parse_contact_info(const char* uri, orte_process_name_t* peer, char*** uris);
|
2004-08-28 05:15:19 +04:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the contact info for the seed daemon.
|
|
|
|
*
|
|
|
|
* Note that this can also be passed to the application as an
|
|
|
|
* MCA parameter (OMPI_MCA_oob_base_seed). The contact info (of the seed)
|
|
|
|
* must currently be set before calling mca_oob_base_init().
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_set_contact_info(const char*);
|
2004-08-28 05:15:19 +04:00
|
|
|
|
2004-08-05 03:42:51 +04:00
|
|
|
/**
|
|
|
|
* Similiar to unix writev(2).
|
|
|
|
*
|
|
|
|
* @param peer (IN) Opaque name of peer process.
|
|
|
|
* @param msg (IN) Array of iovecs describing user buffers and lengths.
|
|
|
|
* @param count (IN) Number of elements in iovec array.
|
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @param flags (IN) Currently unused.
|
|
|
|
* @return OMPI error code (<0) on error number of bytes actually sent.
|
|
|
|
*
|
|
|
|
* This routine provides semantics similar to unix send/writev with the addition of
|
|
|
|
* a tag parameter that can be used by the application to match the send w/ a specific
|
|
|
|
* receive. In other words - a recv call by the specified peer will only succeed when
|
|
|
|
* the corresponding (or wildcard) tag is used.
|
|
|
|
*
|
|
|
|
* The <i>peer</i> parameter represents an opaque handle to the peer process that
|
|
|
|
* is resolved by the oob layer (using the registry) to an actual physical network
|
|
|
|
* address.
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_send(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
|
|
|
struct iovec *msg,
|
|
|
|
int count,
|
2004-08-05 03:42:51 +04:00
|
|
|
int tag,
|
|
|
|
int flags);
|
|
|
|
|
2004-08-12 02:05:02 +04:00
|
|
|
/*
|
|
|
|
* Similiar to unix send(2) and mca_oob_send.
|
|
|
|
*
|
|
|
|
* @param peer (IN) Opaque name of peer process.
|
|
|
|
* @param buffer (IN) Prepacked OMPI_BUFFER containing data to send
|
|
|
|
* @param flags (IN) Currently unused.
|
|
|
|
* @return OMPI error code (<0) on error or number of bytes actually sent.
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_send_packed(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
|
|
|
orte_buffer_t* buffer,
|
|
|
|
int tag,
|
2004-08-05 03:42:51 +04:00
|
|
|
int flags);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Similiar to unix readv(2)
|
|
|
|
*
|
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
|
|
|
* @param peer (IN/OUT) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive. In the
|
2004-08-13 02:41:42 +04:00
|
|
|
* case of a wildcard receive, will be modified to return the matched peer name.
|
|
|
|
* @param msg (IN) Array of iovecs describing user buffers and lengths.
|
|
|
|
* @param count (IN) Number of elements in iovec array.
|
|
|
|
* @param tag (IN/OUT) User defined tag for matching send/recv. In the case of a wildcard receive, will
|
2006-02-07 06:32:36 +03:00
|
|
|
* be modified to return the matched tag. May be optionally by NULL to specify a
|
2004-08-13 02:41:42 +04:00
|
|
|
* wildcard receive with no return value.
|
|
|
|
* @param flags (IN) May be MCA_OOB_PEEK to return up to the number of bytes provided in the
|
|
|
|
* iovec array without removing the message from the queue.
|
|
|
|
* @return OMPI error code (<0) on error or number of bytes actually received.
|
2004-08-05 03:42:51 +04:00
|
|
|
*
|
|
|
|
* The OOB recv call is similar to unix recv/readv in that it requires the caller to manage
|
|
|
|
* memory associated w/ the message. The routine accepts an array of iovecs (<i>msg</i>); however,
|
|
|
|
* the caller must determine the appropriate number of elements (<i>count</i>) and allocate the
|
2006-02-07 06:32:36 +03:00
|
|
|
* buffer space for each entry.
|
2004-08-05 03:42:51 +04:00
|
|
|
*
|
2006-02-07 06:32:36 +03:00
|
|
|
* The <i>tag</i> parameter is provided to facilitate this. The user may define tags based on message
|
2004-08-05 03:42:51 +04:00
|
|
|
* type to determine the message layout and size, as the mca_oob_recv call will block until a message
|
|
|
|
* with the matching tag is received.
|
|
|
|
*
|
2006-02-07 06:32:36 +03:00
|
|
|
* Alternately, the <i>flags</i> parameter may be used to peek (MCA_OOB_PEEK) a portion of the message
|
2004-08-05 03:42:51 +04:00
|
|
|
* (e.g. a standard message header) or determine the overall message size (MCA_OOB_TRUNC|MCA_OOB_PEEK)
|
2006-02-07 06:32:36 +03:00
|
|
|
* without removing the message from the queue.
|
2004-08-05 03:42:51 +04:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_recv(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
|
|
|
struct iovec *msg,
|
|
|
|
int count,
|
|
|
|
int tag,
|
2004-08-05 03:42:51 +04:00
|
|
|
int flags);
|
|
|
|
|
2004-08-12 01:07:16 +04:00
|
|
|
/**
|
|
|
|
* Similiar to unix read(2)
|
|
|
|
*
|
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
|
|
|
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
|
2004-08-13 02:41:42 +04:00
|
|
|
* @param buf (OUT) Array of iovecs describing user buffers and lengths.
|
|
|
|
* @param tag (IN/OUT) User defined tag for matching send/recv.
|
2004-08-12 01:07:16 +04:00
|
|
|
* @return OMPI error code (<0) on error or number of bytes actually received.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* This version of oob_recv is as above except it does NOT take a iovec list
|
2006-02-07 06:32:36 +03:00
|
|
|
* but instead hands back a orte_buffer_t* buffer with the message in it.
|
2005-03-14 23:57:21 +03:00
|
|
|
* The user is responsible for releasing the buffer when finished w/ it.
|
2004-08-12 01:07:16 +04:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_recv_packed (
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
|
|
|
orte_buffer_t *buf,
|
|
|
|
int tag);
|
2004-08-12 01:07:16 +04:00
|
|
|
|
2004-08-05 03:42:51 +04:00
|
|
|
/*
|
|
|
|
* Non-blocking versions of send/recv.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback function on send/recv completion.
|
|
|
|
*
|
|
|
|
* @param status (IN) Completion status - equivalent to the return value from blocking send/recv.
|
|
|
|
* @param peer (IN) Opaque name of peer process.
|
|
|
|
* @param msg (IN) Array of iovecs describing user buffers and lengths.
|
|
|
|
* @param count (IN) Number of elements in iovec array.
|
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @param cbdata (IN) User data.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef void (*mca_oob_callback_fn_t)(
|
|
|
|
int status,
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
|
|
|
struct iovec* msg,
|
2004-08-05 03:42:51 +04:00
|
|
|
int count,
|
2004-08-18 19:51:40 +04:00
|
|
|
int tag,
|
2004-08-13 00:30:03 +04:00
|
|
|
void* cbdata);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback function on send/recv completion for buffer PACKED message only.
|
|
|
|
* i.e. only mca_oob_send_packed_nb and mca_oob_recv_packed_nb USE this.
|
|
|
|
*
|
|
|
|
* @param status (IN) Completion status - equivalent to the return value from blocking send/recv.
|
|
|
|
* @param peer (IN) Opaque name of peer process.
|
|
|
|
* @param buffer (IN) For sends, this is a pointer to a prepacked buffer
|
2006-02-07 06:32:36 +03:00
|
|
|
For recvs, OOB creates and returns a buffer
|
2004-08-13 00:30:03 +04:00
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @param cbdata (IN) User data.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef void (*mca_oob_callback_packed_fn_t)(
|
|
|
|
int status,
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_buffer_t* buffer,
|
2004-08-18 19:51:40 +04:00
|
|
|
int tag,
|
2004-08-05 03:42:51 +04:00
|
|
|
void* cbdata);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Non-blocking version of mca_oob_send().
|
|
|
|
*
|
|
|
|
* @param peer (IN) Opaque name of peer process.
|
|
|
|
* @param msg (IN) Array of iovecs describing user buffers and lengths.
|
|
|
|
* @param count (IN) Number of elements in iovec array.
|
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @param flags (IN) Currently unused.
|
|
|
|
* @param cbfunc (IN) Callback function on send completion.
|
|
|
|
* @param cbdata (IN) User data that is passed to callback function.
|
|
|
|
* @return OMPI error code (<0) on error number of bytes actually sent.
|
|
|
|
*
|
|
|
|
* The user supplied callback function is called when the send completes. Note that
|
2006-02-07 06:32:36 +03:00
|
|
|
* the callback may occur before the call to mca_oob_send returns to the caller,
|
2004-08-05 03:42:51 +04:00
|
|
|
* if the send completes during the call.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_send_nb(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
|
|
|
struct iovec* msg,
|
|
|
|
int count,
|
2004-08-05 03:42:51 +04:00
|
|
|
int tag,
|
2006-02-07 06:32:36 +03:00
|
|
|
int flags,
|
2004-08-05 03:42:51 +04:00
|
|
|
mca_oob_callback_fn_t cbfunc,
|
|
|
|
void* cbdata);
|
|
|
|
|
2004-08-18 19:51:40 +04:00
|
|
|
/**
|
|
|
|
* Non-blocking version of mca_oob_send_packed().
|
|
|
|
*
|
|
|
|
* @param peer (IN) Opaque name of peer process.
|
|
|
|
* @param buffer (IN) Opaque buffer handle.
|
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @param flags (IN) Currently unused.
|
|
|
|
* @param cbfunc (IN) Callback function on send completion.
|
|
|
|
* @param cbdata (IN) User data that is passed to callback function.
|
|
|
|
* @return OMPI error code (<0) on error number of bytes actually sent.
|
|
|
|
*
|
|
|
|
* The user supplied callback function is called when the send completes. Note that
|
2006-02-07 06:32:36 +03:00
|
|
|
* the callback may occur before the call to mca_oob_send returns to the caller,
|
2004-08-18 19:51:40 +04:00
|
|
|
* if the send completes during the call.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_send_packed_nb(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_buffer_t* buffer,
|
2004-08-18 19:51:40 +04:00
|
|
|
int tag,
|
2006-02-07 06:32:36 +03:00
|
|
|
int flags,
|
2004-08-18 19:51:40 +04:00
|
|
|
mca_oob_callback_packed_fn_t cbfunc,
|
|
|
|
void* cbdata);
|
|
|
|
|
2004-08-05 03:42:51 +04:00
|
|
|
/**
|
|
|
|
* Non-blocking version of mca_oob_recv().
|
|
|
|
*
|
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
|
|
|
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
|
2004-08-05 03:42:51 +04:00
|
|
|
* @param msg (IN) Array of iovecs describing user buffers and lengths.
|
|
|
|
* @param count (IN) Number of elements in iovec array.
|
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @param flags (IN) May be MCA_OOB_PEEK to return up to size bytes of msg w/out removing it from the queue,
|
|
|
|
* @param cbfunc (IN) Callback function on recv completion.
|
|
|
|
* @param cbdata (IN) User data that is passed to callback function.
|
|
|
|
* @return OMPI error code (<0) on error or number of bytes actually received.
|
|
|
|
*
|
2006-02-07 06:32:36 +03:00
|
|
|
* The user supplied callback function is called asynchronously when a message is received
|
2004-08-05 03:42:51 +04:00
|
|
|
* that matches the call parameters.
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_recv_nb(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
|
|
|
struct iovec* msg,
|
|
|
|
int count,
|
2004-08-18 19:51:40 +04:00
|
|
|
int tag,
|
2006-02-07 06:32:36 +03:00
|
|
|
int flags,
|
2004-08-05 03:42:51 +04:00
|
|
|
mca_oob_callback_fn_t cbfunc,
|
|
|
|
void* cbdata);
|
2004-09-30 19:09:29 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Routine to cancel pending non-blocking recvs.
|
|
|
|
*
|
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
|
|
|
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
|
2004-09-30 19:09:29 +04:00
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @return OMPI error code (<0) on error or number of bytes actually received.
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_recv_cancel(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
2004-09-30 19:09:29 +04:00
|
|
|
int tag);
|
2004-08-05 03:42:51 +04:00
|
|
|
|
2004-06-30 00:36:34 +04:00
|
|
|
/**
|
2004-08-18 19:51:40 +04:00
|
|
|
* Non-blocking version of mca_oob_recv_packed().
|
|
|
|
*
|
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 22:34:59 +03:00
|
|
|
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
|
2004-08-18 19:51:40 +04:00
|
|
|
* @param buffer (IN) Array of iovecs describing user buffers and lengths.
|
|
|
|
* @param count (IN) Number of elements in iovec array.
|
|
|
|
* @param tag (IN) User defined tag for matching send/recv.
|
|
|
|
* @param flags (IN) May be MCA_OOB_PEEK to return up to size bytes of msg w/out removing it from the queue,
|
|
|
|
* @param cbfunc (IN) Callback function on recv completion.
|
|
|
|
* @param cbdata (IN) User data that is passed to callback function.
|
|
|
|
* @return OMPI error code (<0) on error or number of bytes actually received.
|
|
|
|
*
|
2006-02-07 06:32:36 +03:00
|
|
|
* The user supplied callback function is called asynchronously when a message is received
|
2004-08-18 19:51:40 +04:00
|
|
|
* that matches the call parameters.
|
|
|
|
*/
|
2004-06-30 00:36:34 +04:00
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_recv_packed_nb(
|
2006-02-07 06:32:36 +03:00
|
|
|
orte_process_name_t* peer,
|
2004-08-18 19:51:40 +04:00
|
|
|
int tag,
|
2006-02-07 06:32:36 +03:00
|
|
|
int flags,
|
2004-08-18 19:51:40 +04:00
|
|
|
mca_oob_callback_packed_fn_t cbfunc,
|
|
|
|
void* cbdata);
|
2004-02-01 00:43:26 +03:00
|
|
|
|
2004-11-20 22:12:43 +03:00
|
|
|
/**
|
2006-02-07 06:32:36 +03:00
|
|
|
* A "broadcast-like" function over the specified set of peers.
|
Bring in the code for routing xcast stage gate messages via the local orteds. This code is inactive unless you specifically request it via an mca param oob_xcast_mode (can be set to "linear" or "direct"). Direct mode is the old standard method where we send messages directly to each MPI process. Linear mode sends the xcast message via the orteds, with the HNP sending the message to each orted directly.
There is a binomial algorithm in the code (i.e., the HNP would send to a subset of the orteds, which then relay it on according to the typical log-2 algo), but that has a bug in it so the code won't let you select it even if you tried (and the mca param doesn't show, so you'd *really* have to try).
This also involved a slight change to the oob.xcast API, so propagated that as required.
Note: this has *only* been tested on rsh, SLURM, and Bproc environments (now that it has been transferred to the OMPI trunk, I'll need to re-test it [only done rsh so far]). It should work fine on any environment that uses the ORTE daemons - anywhere else, you are on your own... :-)
Also, correct a mistake where the orte_debug_flag was declared an int, but the mca param was set as a bool. Move the storage for that flag to the orte/runtime/params.c and orte/runtime/params.h files appropriately.
This commit was SVN r14475.
2007-04-23 22:41:04 +04:00
|
|
|
* @param job The job whose processes are to receive the message.
|
|
|
|
* @param msg The message to be sent
|
|
|
|
* @param cbfunc Callback function on receipt of data
|
2006-02-07 06:32:36 +03:00
|
|
|
*
|
2004-11-20 22:12:43 +03:00
|
|
|
* Note that the callback function is provided so that the data can be
|
Bring in the code for routing xcast stage gate messages via the local orteds. This code is inactive unless you specifically request it via an mca param oob_xcast_mode (can be set to "linear" or "direct"). Direct mode is the old standard method where we send messages directly to each MPI process. Linear mode sends the xcast message via the orteds, with the HNP sending the message to each orted directly.
There is a binomial algorithm in the code (i.e., the HNP would send to a subset of the orteds, which then relay it on according to the typical log-2 algo), but that has a bug in it so the code won't let you select it even if you tried (and the mca param doesn't show, so you'd *really* have to try).
This also involved a slight change to the oob.xcast API, so propagated that as required.
Note: this has *only* been tested on rsh, SLURM, and Bproc environments (now that it has been transferred to the OMPI trunk, I'll need to re-test it [only done rsh so far]). It should work fine on any environment that uses the ORTE daemons - anywhere else, you are on your own... :-)
Also, correct a mistake where the orte_debug_flag was declared an int, but the mca param was set as a bool. Move the storage for that flag to the orte/runtime/params.c and orte/runtime/params.h files appropriately.
This commit was SVN r14475.
2007-04-23 22:41:04 +04:00
|
|
|
* received and interpreted by the application
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
|
|
|
|
2006-11-28 03:06:25 +03:00
|
|
|
ORTE_DECLSPEC int mca_oob_xcast(orte_jobid_t job,
|
Bring in the code for routing xcast stage gate messages via the local orteds. This code is inactive unless you specifically request it via an mca param oob_xcast_mode (can be set to "linear" or "direct"). Direct mode is the old standard method where we send messages directly to each MPI process. Linear mode sends the xcast message via the orteds, with the HNP sending the message to each orted directly.
There is a binomial algorithm in the code (i.e., the HNP would send to a subset of the orteds, which then relay it on according to the typical log-2 algo), but that has a bug in it so the code won't let you select it even if you tried (and the mca param doesn't show, so you'd *really* have to try).
This also involved a slight change to the oob.xcast API, so propagated that as required.
Note: this has *only* been tested on rsh, SLURM, and Bproc environments (now that it has been transferred to the OMPI trunk, I'll need to re-test it [only done rsh so far]). It should work fine on any environment that uses the ORTE daemons - anywhere else, you are on your own... :-)
Also, correct a mistake where the orte_debug_flag was declared an int, but the mca param was set as a bool. Move the storage for that flag to the orte/runtime/params.c and orte/runtime/params.h files appropriately.
This commit was SVN r14475.
2007-04-23 22:41:04 +04:00
|
|
|
orte_gpr_notify_message_t *msg,
|
2006-11-28 03:06:25 +03:00
|
|
|
orte_gpr_trigger_cb_fn_t cbfunc);
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-10-06 23:39:20 +04:00
|
|
|
/*
|
|
|
|
* Callback on exception condition.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
MCA_OOB_PEER_UNREACH,
|
|
|
|
MCA_OOB_PEER_DISCONNECTED
|
|
|
|
} mca_oob_base_exception_t;
|
|
|
|
|
|
|
|
typedef int (*mca_oob_base_exception_fn_t)(const orte_process_name_t* peer, int exception);
|
2006-02-07 06:32:36 +03:00
|
|
|
|
2005-10-06 23:39:20 +04:00
|
|
|
/**
|
|
|
|
* Register a callback function on loss of a connection.
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_add_exception_handler(
|
2005-10-06 23:39:20 +04:00
|
|
|
mca_oob_base_exception_fn_t cbfunc);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove a callback
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC int mca_oob_del_exception_handler(
|
2005-10-06 23:39:20 +04:00
|
|
|
mca_oob_base_exception_fn_t cbfunc);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Invoke exception handlers
|
|
|
|
*/
|
|
|
|
|
2006-08-20 19:54:04 +04:00
|
|
|
ORTE_DECLSPEC void mca_oob_call_exception_handlers(
|
2005-10-06 23:39:20 +04:00
|
|
|
orte_process_name_t* peer, int exception);
|
|
|
|
|
2004-02-01 00:43:26 +03:00
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
2004-08-05 03:42:51 +04:00
|
|
|
|