
* clang warning stomp * memory barrier for volatile variable use These can go to 1.7.5 or can slip to v1.8 -- RM decision. Submitted by Jeff Squyres, reviewed by Dave Goodell cmr=v1.7.5:reviewer=ompi-rm1.7 This commit was SVN r30944.
302 строки
11 KiB
C
302 строки
11 KiB
C
/*
|
|
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef OMPI_BTL_USNIC_CONNECTIVITY_H
|
|
#define OMPI_BTL_USNIC_CONNECTIVITY_H
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include "ompi/runtime/mpiruntime.h"
|
|
|
|
#include "btl_usnic_util.h"
|
|
|
|
|
|
/**
|
|
* Agent-based service to verify UDP connectivity between two peers.
|
|
*
|
|
* Generally, it is a client-server pattern with three entities
|
|
* involved:
|
|
*
|
|
* 1. Agent thread: running in MPI process local rank 0
|
|
* 2. Client: running in the main application thread in every MPI process
|
|
* 3. RTE thread: running in every MPI process
|
|
*
|
|
* If enabled (via MCA param), the usnic module_init() will setup the
|
|
* client (and server on local rank 0). For each usnic module, Each
|
|
* client will RML send a request to the server asking it to listen on
|
|
* its usnic interface. The agent will discard duplicates and setup a
|
|
* single UDP socket listener on the eth interface corresponding to
|
|
* each requested usnic interface. The agent returns the listening
|
|
* UDP port number to the client, and each client puts this UDP port
|
|
* number in their modex information.
|
|
*
|
|
* At the first send to a given MPI process peer, the client will send
|
|
* another request to the server asking it to verify connectivity to
|
|
* the peer (supplying the peer's UDP listener port number from the
|
|
* peer's modex info). Again, the agent will discard duplicates -- it
|
|
* will only verify connectivity to each peer's *server* once. The
|
|
* agent will send a short UDP message and a long UDP message
|
|
* (basically, the MTU-68 bytes -- see comment in btl_usnic_cagent.c
|
|
* for the reasons why) to the listening peer UDP port.
|
|
*
|
|
* When the peer agent gets PING messages, it sends short ACK control
|
|
* messages back to the sending agent. When the sending agent gets
|
|
* all ACKs back from the peer, it rules that connectivity is GOOD and
|
|
* no further action is taken. If the sending agent doesn't get one
|
|
* or both ACKs back in a timely fashion, it re-sends the PING(s) that
|
|
* wasn't(weren't) ACKed. Eventually if the sending agent re-sends
|
|
* too many times and does not get an ACK back, it gives up, displays
|
|
* and error, and aborts the MPI job.
|
|
*
|
|
* Note that the client/server interaction is intentionally quite
|
|
* primitive:
|
|
*
|
|
* 1. Client requests agent to listen on interface X. Server responds
|
|
* with UDP port number of listener.
|
|
*
|
|
* 2. Client requests ping check to peer Y. Client does not wait for
|
|
* the answer; the agent either verifies the connectivity successfully
|
|
* or aborts the job.
|
|
*
|
|
* All client/agent communication is via the RML.
|
|
*
|
|
* As mentioned above, the agent is smart about discarding duplicate
|
|
* ping requests from clients. Since a single agent serves all MPI
|
|
* processes on a given server, this cuts down on a lot of PING
|
|
* traffic.
|
|
*/
|
|
|
|
/*
|
|
* Forward declaration
|
|
*/
|
|
struct ompi_btl_usnic_module_t;
|
|
|
|
/** @internal
|
|
* This macro just makes the macros below a little easier to read.
|
|
*/
|
|
#define ABORT(msg) ompi_btl_usnic_util_abort((msg), __FILE__, __LINE__, 1)
|
|
|
|
/**
|
|
* Helper macro to pack binary bytes into RML message buffers.
|
|
*
|
|
* Note that this macro will call ompi_btl_usnic_util_abort()
|
|
* if an error occurs.
|
|
*/
|
|
#define PACK_BYTES(buffer, value, num_bytes) \
|
|
do { \
|
|
int ret; \
|
|
ret = opal_dss.pack((buffer), &(value), (num_bytes), OPAL_BYTE); \
|
|
if (OPAL_SUCCESS != ret) { \
|
|
OMPI_ERROR_LOG(ret); \
|
|
ABORT("Could not pack"); \
|
|
} \
|
|
} while(0)
|
|
|
|
/**
|
|
* @internal
|
|
*
|
|
* Back-end macro for PACK_STRING, PACK_INT32, and PACK_UINT32.
|
|
*
|
|
* Note that this macro will call ompi_btl_usnic_util_abort()
|
|
* if an error occurs.
|
|
*/
|
|
#define PACK_(buffer, type, opal_type, value) \
|
|
do { \
|
|
int ret; \
|
|
type temp = (type) (value); \
|
|
if (OPAL_SUCCESS != \
|
|
(ret = opal_dss.pack((buffer), &temp, 1, opal_type))) { \
|
|
OMPI_ERROR_LOG(ret); \
|
|
ABORT("Could not pack"); \
|
|
} \
|
|
} while(0)
|
|
|
|
/**
|
|
* Helper macro the pack a string into RML message buffers.
|
|
*/
|
|
#define PACK_STRING(buffer, value) PACK_(buffer, char *, OPAL_STRING, value)
|
|
/**
|
|
* Helper macro the pack an int32_ into RML message buffers.
|
|
*/
|
|
#define PACK_INT32(buffer, value) PACK_(buffer, int32_t, OPAL_INT32, value)
|
|
/**
|
|
* Helper macro the pack a uint32_t into RML message buffers.
|
|
*/
|
|
#define PACK_UINT32(buffer, value) PACK_(buffer, uint32_t, OPAL_UINT32, value)
|
|
|
|
/**
|
|
* Helper macro to unpack binary bytes from RML message buffers.
|
|
*
|
|
* Note that all of these macros will call ompi_btl_usnic_util_abort() if
|
|
* an error occurs.
|
|
*/
|
|
#define UNPACK_BYTES(buffer, value, num_bytes) \
|
|
do { \
|
|
int ret_value, n = (num_bytes); \
|
|
ret_value = opal_dss.unpack((buffer), value, &n, OPAL_BYTE); \
|
|
if (OPAL_SUCCESS != ret_value) { \
|
|
OMPI_ERROR_LOG(ret_value); \
|
|
ABORT("Could not unpack"); \
|
|
} \
|
|
} while(0)
|
|
|
|
/**
|
|
* @internal
|
|
*
|
|
* Back-end macro for UNPACK_STRING, UNPACK_INT32, and UNPACK_UINT32
|
|
*
|
|
* Note that this macro will call ompi_btl_usnic_util_abort() if an error
|
|
* occurs.
|
|
*
|
|
* Also note that we use a temp variable of the correct type because
|
|
* some of the values passed in to this macro are volatile, and the
|
|
* call to opal_dss.unpack() will discard that volatile qualifier.
|
|
*/
|
|
#define UNPACK(buffer, type, opal_unpack_type, value) \
|
|
do { \
|
|
int ret_value, n = 1; \
|
|
type temp; \
|
|
value = (type) 0; \
|
|
ret_value = opal_dss.unpack((buffer), &temp, &n, opal_unpack_type); \
|
|
if (OPAL_SUCCESS != ret_value) { \
|
|
OMPI_ERROR_LOG(ret_value); \
|
|
ABORT("Could not unpack"); \
|
|
} else { \
|
|
value = (type) temp; \
|
|
} \
|
|
} while(0)
|
|
|
|
/**
|
|
* Helper macro to unpack a string from RML message buffers.
|
|
*/
|
|
#define UNPACK_STRING(buffer, value) \
|
|
UNPACK(buffer, char *, OPAL_STRING, value);
|
|
/**
|
|
* Helper macro to unpack an int32_t from RML message buffers.
|
|
*/
|
|
#define UNPACK_INT32(buffer, value) \
|
|
UNPACK(buffer, int32_t, OPAL_INT32, value);
|
|
/**
|
|
* Helper macro to unpack a uint32_t from RML message buffers.
|
|
*/
|
|
#define UNPACK_UINT32(buffer, value) \
|
|
UNPACK(buffer, uint32_t, OPAL_UINT32, value)
|
|
|
|
/**
|
|
* RML message types. This value is packed as the first field in each
|
|
* RML message to identify its type. Use a non-zero value as the
|
|
* first enum just as defensive programming (i.e., it's a slightly
|
|
* lower chance that an uninitialized message type would randomly
|
|
* match these values).
|
|
*/
|
|
enum {
|
|
CONNECTIVITY_AGENT_CMD_LISTEN = 17,
|
|
CONNECTIVITY_AGENT_CMD_PING,
|
|
CONNECTIVITY_AGENT_CMD_MAX
|
|
};
|
|
|
|
/**
|
|
* Startup the connectivity client.
|
|
*
|
|
* @returns OMPI_SUCCESS or an OMPI error code.
|
|
*
|
|
* It is safe to call this function even if the connectivity check is
|
|
* disabled; it will be a no-op in this case.
|
|
*/
|
|
int ompi_btl_usnic_connectivity_client_init(void);
|
|
|
|
/**
|
|
* Tell the agent to establsh a listening port on the given IP
|
|
* address.
|
|
*
|
|
* @params[in] module The module that is requesting the listen.
|
|
*
|
|
* @returns OMPI_SUCCESS or an OMPI error code.
|
|
*
|
|
* The module contains the local interface addressing information,
|
|
* which tells the agent one which interface to listen.
|
|
*
|
|
* This routine will request the new listen from the agent, and wait
|
|
* for the agent to reply with the UDP port that is being used/was
|
|
* created. The UDP listening port will then be stuffed in
|
|
* module->local_addr.connectivity_udp_port (i.e., data that will be
|
|
* sent in the modex).
|
|
*
|
|
* It is safe to call this function even if the connectivity check is
|
|
* disabled; it will be a no-op in this case.
|
|
*/
|
|
int ompi_btl_usnic_connectivity_listen(struct ompi_btl_usnic_module_t *module);
|
|
|
|
/**
|
|
* Tell the agent to ping a specific IP address and UDP port number
|
|
* with a specific message size.
|
|
*
|
|
* @param[in] src_ipv4_addr The source module IPv4 address
|
|
* @param[in] src_port The source module listening UDP port
|
|
* @param[in] dest_ipv4_addr The destination IPv4 address
|
|
* @param[in] dest_cidrmask The destination CIDR mask
|
|
* @param[in] dest_port The destination UDP port
|
|
* @param[in] dest_mac The destination MAC address
|
|
* @param[in] dest_nodename The destination server name
|
|
* @param[in] mtu The max ping message size to send
|
|
*
|
|
* @returns OMPI_SUCCESS or an OMPI error code.
|
|
*
|
|
* Note that several of the above parameters are only passed so that
|
|
* they can be used in a complete/helpful error message, if necessary.
|
|
*
|
|
* This function does not wait for a reply from the agent; it assumes
|
|
* the agent will successfully ping the remote peer or will abort the
|
|
* MPI job if the pinging fails.
|
|
*
|
|
* It is safe to call this function even if the connectivity check is
|
|
* disabled; it will be a no-op in this case.
|
|
*/
|
|
int ompi_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
|
|
uint32_t dest_ipv4_addr,
|
|
uint32_t dest_cidrmask, int dest_port,
|
|
uint8_t *dest_mac, char *dest_nodename,
|
|
size_t mtu);
|
|
|
|
/**
|
|
* Shut down the connectivity service client.
|
|
*
|
|
* @returns OMPI_SUCCESS or an OMPI error code.
|
|
*
|
|
* It is safe to call this function even if the connectivity check is
|
|
* disabled; it will be a no-op in this case.
|
|
*/
|
|
int ompi_btl_usnic_connectivity_client_finalize(void);
|
|
|
|
/**
|
|
* Startup the connectivity agent.
|
|
*
|
|
* @returns OMPI_SUCCESS or an OMPI error code.
|
|
*
|
|
* This function will be a no-op if this process is not the local rank
|
|
* 0.
|
|
*/
|
|
int ompi_btl_usnic_connectivity_agent_init(void);
|
|
|
|
/**
|
|
* Shut down the connectivity agent
|
|
*
|
|
* @returns OMPI_SUCCESS or an OMPI error code.
|
|
*
|
|
* This function will be a no-op if this process is not the local rank
|
|
* 0.
|
|
*/
|
|
int ompi_btl_usnic_connectivity_agent_finalize(void);
|
|
|
|
#endif /* OMPI_BTL_USNIC_CONNECITIVITY_H */
|