Bring the RML modifications across. This is the first step in a revamp of the ORTE messaging subsystem to support fabric-based communications during launch and wireup phases. When completed, the grpcomm and plm frameworks will each have their own "conduit" for communication - each conduit corresponds to a particular RML messaging transport. This can be the active OOB-based component, or a provider from within the RML/OFI component. Messages sent down the conduit will flow across the associated transport.
Multiple conduits can exist at the same time, and can even point to the same base transport. Each conduit can have its own characteristics (e.g., flow control) based on the info keys provided to the "open_conduit" call. For ease during the transition period, the "legacy" RML interfaces remain as wrappers over the new conduit-based APIs using a default conduit opened during orte_init - this default conduit is tied to the OOB framework so that current behaviors are preserved. Once the transition has been completed, a one-time cleanup will be done to update all RML calls to the new APIs and the "legacy" interfaces will be deleted. While we are at it: Remove oob/usock component to eliminate the TMPDIR length problem - get all working, including oob_stress
Этот коммит содержится в:
родитель
432d79046b
Коммит
a2919174d0
@ -154,7 +154,8 @@ enum {
|
||||
ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55),
|
||||
ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56),
|
||||
ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57),
|
||||
ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58)
|
||||
ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58),
|
||||
ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 59)
|
||||
};
|
||||
|
||||
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -201,12 +201,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_grpcomm_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* enable communication via the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
/* setup the routed info */
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -356,6 +356,29 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
/* obviously, we have "reported" */
|
||||
jdata->num_reported = 1;
|
||||
|
||||
/* setup the PMIx framework - ensure it skips all non-PMIx components,
|
||||
* but do not override anything we were given */
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_pmix_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "opal_pmix_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* set the event base */
|
||||
opal_pmix_base_set_evbase(orte_event_base);
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
/* the server code already barked, so let's be quiet */
|
||||
ret = ORTE_ERR_SILENT;
|
||||
error = "pmix_server_init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Setup the communication infrastructure */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -380,6 +403,13 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
/* add our contact info */
|
||||
proc->rml_uri = orte_rml.get_contact_info();
|
||||
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix server init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* select the errmgr */
|
||||
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -440,49 +470,7 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
error = "orte_rtc_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* enable communication with the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
/* Now provide a chance for the PLM
|
||||
* to perform any module-specific init functions. This
|
||||
* needs to occur AFTER the communications are setup
|
||||
* as it may involve starting a non-blocking recv
|
||||
* Do this only if a specific PLM was given to us - the
|
||||
* orted has no need of the proxy PLM at all
|
||||
*/
|
||||
if (plm_in_use) {
|
||||
if (ORTE_SUCCESS != (ret = orte_plm.init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_plm_init";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the PMIx framework - ensure it skips all non-PMIx components,
|
||||
* but do not override anything we were given */
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_pmix_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "opal_pmix_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* set the event base */
|
||||
opal_pmix_base_set_evbase(orte_event_base);
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
/* the server code already barked, so let's be quiet */
|
||||
ret = ORTE_ERR_SILENT;
|
||||
error = "pmix_server_init";
|
||||
goto error;
|
||||
}
|
||||
#if ORTE_ENABLE_STATIC_PORTS
|
||||
/* if we are using static ports, then we need to setup
|
||||
* the daemon info so the RML can function properly
|
||||
@ -511,6 +499,21 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
*/
|
||||
orte_routed.update_routing_plan();
|
||||
|
||||
/* Now provide a chance for the PLM
|
||||
* to perform any module-specific init functions. This
|
||||
* needs to occur AFTER the communications are setup
|
||||
* as it may involve starting a non-blocking recv
|
||||
* Do this only if a specific PLM was given to us - the
|
||||
* orted has no need of the proxy PLM at all
|
||||
*/
|
||||
if (plm_in_use) {
|
||||
if (ORTE_SUCCESS != (ret = orte_plm.init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_plm_init";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the routed info - the selected routed component
|
||||
* will know what to do.
|
||||
*/
|
||||
|
@ -134,12 +134,7 @@ int orte_ess_base_tool_setup(void)
|
||||
* to which I want to communicate and setting up a route for
|
||||
* that link is my responsibility
|
||||
*/
|
||||
/* enable communication via the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* we -may- need to know the name of the head
|
||||
* of our session directory tree, particularly the
|
||||
* tmp base where any other session directories on
|
||||
|
@ -551,12 +551,7 @@ static int rte_init(void)
|
||||
error = "orte_rtc_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* enable communication with the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* we are an hnp, so update the contact info field for later use */
|
||||
orte_process_info.my_hnp_uri = orte_rml.get_contact_info();
|
||||
proc->rml_uri = strdup(orte_process_info.my_hnp_uri);
|
||||
|
@ -56,9 +56,6 @@
|
||||
/* LOCAL FUNCTIONS */
|
||||
static void stdin_write_handler(int fd, short event, void *cbdata);
|
||||
|
||||
static void
|
||||
orte_iof_hnp_exception_handler(orte_process_name_t* peer, orte_rml_exception_t reason);
|
||||
|
||||
/* API FUNCTIONS */
|
||||
static int init(void);
|
||||
|
||||
@ -94,8 +91,6 @@ orte_iof_base_module_t orte_iof_hnp_module = {
|
||||
/* Initialize the module */
|
||||
static int init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* post non-blocking recv to catch forwarded IO from
|
||||
* the orteds
|
||||
*/
|
||||
@ -105,12 +100,6 @@ static int init(void)
|
||||
orte_iof_hnp_recv,
|
||||
NULL);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.add_exception_handler(orte_iof_hnp_exception_handler))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP);
|
||||
return rc;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mca_iof_hnp_component.procs, opal_list_t);
|
||||
mca_iof_hnp_component.stdinev = NULL;
|
||||
|
||||
@ -610,37 +599,3 @@ CHECK:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback when peer is disconnected
|
||||
*/
|
||||
|
||||
static void
|
||||
orte_iof_hnp_exception_handler(orte_process_name_t* peer, orte_rml_exception_t reason)
|
||||
{
|
||||
#if 0
|
||||
orte_iof_base_endpoint_t *endpoint;
|
||||
opal_output_verbose(1, orte_iof_base_framework.framework_output,
|
||||
"iof svc exception handler! %s\n",
|
||||
ORTE_NAME_PRINT((orte_process_name_t*)peer));
|
||||
|
||||
/* If we detect an exception on the RML connection to a peer,
|
||||
delete all of its subscriptions and publications. Note that
|
||||
exceptions can be detected during a normal RML shutdown; they
|
||||
are recoverable events (no need to abort). */
|
||||
orte_iof_hnp_sub_delete_all(peer);
|
||||
orte_iof_hnp_pub_delete_all(peer);
|
||||
opal_output_verbose(1, orte_iof_base_framework.framework_output, "deleted all pubs and subs\n");
|
||||
|
||||
/* Find any streams on any endpoints for this peer and close them */
|
||||
while (NULL !=
|
||||
(endpoint = orte_iof_base_endpoint_match(peer, ORTE_NS_CMP_ALL,
|
||||
ORTE_IOF_ANY))) {
|
||||
orte_iof_base_endpoint_closed(endpoint);
|
||||
|
||||
/* Delete the endpoint that we just matched */
|
||||
orte_iof_base_endpoint_delete(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY);
|
||||
}
|
||||
#endif
|
||||
opal_output_verbose(1, orte_iof_base_framework.framework_output, "done with exception handler\n");
|
||||
}
|
||||
|
@ -181,6 +181,12 @@ OBJ_CLASS_DECLARATION(mca_oob_uri_req_t);
|
||||
}while(0);
|
||||
ORTE_DECLSPEC void orte_oob_base_set_addr(int fd, short args, void *cbdata);
|
||||
|
||||
|
||||
/* Get the available transports and their attributes */
|
||||
#define ORTE_OOB_GET_TRANSPORTS(u) orte_oob_base_get_transports(u)
|
||||
ORTE_DECLSPEC void orte_oob_base_get_transports(opal_list_t *transports);
|
||||
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata);
|
||||
#endif
|
||||
|
@ -400,6 +400,30 @@ static void process_uri(char *uri)
|
||||
opal_argv_free(uris);
|
||||
}
|
||||
|
||||
void orte_oob_base_get_transports(opal_list_t *transports)
|
||||
{
|
||||
mca_base_component_list_item_t *cli;
|
||||
mca_oob_base_component_t *component;
|
||||
orte_rml_pathway_t *p;
|
||||
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s: get transports",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
|
||||
component = (mca_oob_base_component_t*)cli->cli_component;
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s:get transports for component %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
component->oob_base.mca_component_name);
|
||||
if (NULL != component->query_transports) {
|
||||
if (NULL != (p = component->query_transports())) {
|
||||
opal_list_append(transports, &p->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
void orte_oob_base_ft_event(int sd, short argc, void *cbdata)
|
||||
{
|
||||
|
@ -57,24 +57,27 @@ typedef int (*mca_oob_base_component_set_addr_fn_t)(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
typedef bool (*mca_oob_base_component_is_reachable_fn_t)(orte_process_name_t *peer);
|
||||
typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata);
|
||||
typedef orte_rml_pathway_t* (*mca_oob_base_component_query_transports_fn_t)(void);
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
typedef int (*mca_oob_base_component_ft_event_fn_t)(int state);
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
mca_base_component_t oob_base;
|
||||
mca_base_component_data_t oob_data;
|
||||
int idx;
|
||||
int priority;
|
||||
mca_oob_base_component_avail_fn_t available;
|
||||
mca_oob_base_component_startup_fn_t startup;
|
||||
mca_oob_base_component_shutdown_fn_t shutdown;
|
||||
mca_oob_base_component_send_fn_t send_nb;
|
||||
mca_oob_base_component_get_addr_fn_t get_addr;
|
||||
mca_oob_base_component_set_addr_fn_t set_addr;
|
||||
mca_oob_base_component_is_reachable_fn_t is_reachable;
|
||||
mca_base_component_t oob_base;
|
||||
mca_base_component_data_t oob_data;
|
||||
int idx;
|
||||
int priority;
|
||||
mca_oob_base_component_avail_fn_t available;
|
||||
mca_oob_base_component_startup_fn_t startup;
|
||||
mca_oob_base_component_shutdown_fn_t shutdown;
|
||||
mca_oob_base_component_send_fn_t send_nb;
|
||||
mca_oob_base_component_get_addr_fn_t get_addr;
|
||||
mca_oob_base_component_set_addr_fn_t set_addr;
|
||||
mca_oob_base_component_is_reachable_fn_t is_reachable;
|
||||
mca_oob_base_component_query_transports_fn_t query_transports;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
mca_oob_base_component_ft_event_fn_t ft_event;
|
||||
mca_oob_base_component_ft_event_fn_t ft_event;
|
||||
#endif
|
||||
} mca_oob_base_component_t;
|
||||
|
||||
|
@ -277,6 +277,13 @@ static void process_set_peer(int fd, short args, void *cbdata)
|
||||
OBJ_RELEASE(peer);
|
||||
return;
|
||||
}
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
/* we have to initiate the connection because otherwise the
|
||||
* daemon has no way to communicate to us via this component
|
||||
* as the app doesn't have a listening port */
|
||||
peer->state = MCA_OOB_TCP_CONNECTING;
|
||||
ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect);
|
||||
}
|
||||
}
|
||||
|
||||
maddr = OBJ_NEW(mca_oob_tcp_addr_t);
|
||||
@ -294,7 +301,7 @@ static void process_set_peer(int fd, short args, void *cbdata)
|
||||
(NULL == pop->port) ? "NULL" : pop->port);
|
||||
opal_list_append(&peer->addrs, &maddr->super);
|
||||
|
||||
cleanup:
|
||||
cleanup:
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
|
@ -64,8 +64,10 @@
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/util/attr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/show_help.h"
|
||||
@ -94,6 +96,7 @@ static char* component_get_addr(void);
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
static bool component_is_reachable(orte_process_name_t *peer);
|
||||
static orte_rml_pathway_t* component_query_transports(void);
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
static int component_ft_event(int state);
|
||||
#endif
|
||||
@ -124,6 +127,7 @@ mca_oob_tcp_component_t mca_oob_tcp_component = {
|
||||
.get_addr = component_get_addr,
|
||||
.set_addr = component_set_addr,
|
||||
.is_reachable = component_is_reachable,
|
||||
.query_transports = component_query_transports,
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
.ft_event = component_ft_event,
|
||||
#endif
|
||||
@ -146,11 +150,8 @@ static int tcp_component_open(void)
|
||||
mca_oob_tcp_component.addr_count = 0;
|
||||
mca_oob_tcp_component.ipv4conns = NULL;
|
||||
mca_oob_tcp_component.ipv4ports = NULL;
|
||||
|
||||
#if OPAL_ENABLE_IPV6
|
||||
mca_oob_tcp_component.ipv6conns = NULL;
|
||||
mca_oob_tcp_component.ipv6ports = NULL;
|
||||
#endif
|
||||
|
||||
/* if_include and if_exclude need to be mutually exclusive */
|
||||
if (OPAL_SUCCESS !=
|
||||
@ -513,6 +514,11 @@ static int component_available(void)
|
||||
/* get the name for diagnostic purposes */
|
||||
opal_ifindextoname(i, name, sizeof(name));
|
||||
|
||||
/* ignore any virtual interfaces */
|
||||
if (0 == strncmp(name, "vir", 3)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* handle include/exclude directives */
|
||||
if (NULL != interfaces) {
|
||||
/* check for match */
|
||||
@ -612,6 +618,37 @@ static int component_available(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static orte_rml_pathway_t* component_query_transports(void)
|
||||
{
|
||||
orte_rml_pathway_t *p;
|
||||
char *qual;
|
||||
|
||||
/* if neither IPv4 or IPv6 connections are available, then
|
||||
* we have nothing to support */
|
||||
if (NULL == mca_oob_tcp_component.ipv4conns &&
|
||||
NULL == mca_oob_tcp_component.ipv6conns) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* if we get here, then we support Ethernet and TCP */
|
||||
p = OBJ_NEW(orte_rml_pathway_t);
|
||||
p->component = strdup("oob");
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, "Ethernet", OPAL_STRING);
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_PROTOCOL_TYPE, ORTE_ATTR_LOCAL, "TCP", OPAL_STRING);
|
||||
/* setup our qualifiers - we route communications, may have IPv4 and/or IPv6, etc. */
|
||||
if (NULL != mca_oob_tcp_component.ipv4conns &&
|
||||
NULL != mca_oob_tcp_component.ipv6conns) {
|
||||
qual = "routed=true:ipv4:ipv6";
|
||||
} else if (NULL == mca_oob_tcp_component.ipv6conns) {
|
||||
qual = "routed=true:ipv4";
|
||||
} else {
|
||||
qual = "routed=true:ipv6";
|
||||
}
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_QUALIFIER_ATTRIB, ORTE_ATTR_LOCAL, qual, OPAL_STRING);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Start all modules */
|
||||
static int component_startup(void)
|
||||
{
|
||||
|
@ -60,14 +60,12 @@ typedef struct {
|
||||
char** ipv4conns;
|
||||
char** ipv4ports;
|
||||
|
||||
#if OPAL_ENABLE_IPV6
|
||||
/* IPv6 support */
|
||||
bool disable_ipv6_family; /**< disable this AF */
|
||||
char** tcp6_static_ports; /**< Static ports - IPV6 */
|
||||
char** tcp6_dyn_ports; /**< Dynamic ports - IPV6 */
|
||||
char** ipv6conns;
|
||||
char** ipv6ports;
|
||||
#endif
|
||||
|
||||
/* connection support */
|
||||
char* my_uri; /**< uri for connecting to the TCP module */
|
||||
|
@ -1,42 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_oob_usock_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_oob_usock_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/oob/usock/Makefile])
|
||||
|
||||
# check for sockaddr_un (a good sign we have Unix domain sockets)
|
||||
AC_CHECK_TYPES([struct sockaddr_un],
|
||||
[oob_usock_happy="yes"],
|
||||
[oob_usock_happy="no"],
|
||||
[AC_INCLUDES_DEFAULT
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_UN_H
|
||||
#include <sys/un.h>
|
||||
#endif])
|
||||
|
||||
AS_IF([test "$oob_usock_happy" = "yes"], [$1], [$2])
|
||||
])dnl
|
@ -1,70 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
[static-and-dynamic]
|
||||
Both static and dynamic port ranges were specified for the
|
||||
out-of-band (OOB) communication subsystem:
|
||||
|
||||
Static ports: %s
|
||||
Dynamic ports: %s
|
||||
|
||||
Only one can be specified. Please choose either static or
|
||||
dynamic ports and try again.
|
||||
#
|
||||
[include-exclude]
|
||||
Both TCP interface include and exclude lists were specified:
|
||||
|
||||
Include: %s
|
||||
Exclude: %s
|
||||
|
||||
Only one of these can be given.
|
||||
#
|
||||
[not-parseable]
|
||||
The specified network is not parseable. Since we cannot determine
|
||||
your desired intent, we cannot establish a TCP socket for out-of-band
|
||||
communications and will therefore abort. Please correct the network
|
||||
specification and retry.
|
||||
#
|
||||
[no-included-found]
|
||||
None of the networks specified to be included for out-of-band communications
|
||||
could be found:
|
||||
|
||||
Value given: %s
|
||||
|
||||
Please revise the specification and try again.
|
||||
#
|
||||
[excluded-all]
|
||||
The specified list of networks to be excluded for out-of-band communications
|
||||
resulted in no networks being available:
|
||||
|
||||
Value given: %s
|
||||
|
||||
Please revise the specification and try again.
|
||||
#
|
||||
[no-interfaces-avail]
|
||||
No network interfaces were found for out-of-band communications. We require
|
||||
at least one available network for TCP-based messaging.
|
||||
#
|
||||
[invalid if_inexclude]
|
||||
WARNING: An invalid value was given for oob_tcp_if_%s. This
|
||||
value will be ignored.
|
||||
|
||||
Local host: %s
|
||||
Value: %s
|
||||
Message: %s
|
||||
#
|
@ -1,490 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETDB_H
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/oob/usock/oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_ping.h"
|
||||
|
||||
static void usock_init(void);
|
||||
static void usock_fini(void);
|
||||
static void accept_connection(const int accepted_fd,
|
||||
const struct sockaddr *addr);
|
||||
static void ping(const orte_process_name_t *proc);
|
||||
static void send_nb(orte_rml_send_t *msg);
|
||||
static void ft_event(int state);
|
||||
|
||||
mca_oob_usock_module_t mca_oob_usock_module = {
|
||||
{
|
||||
usock_init,
|
||||
usock_fini,
|
||||
accept_connection,
|
||||
ping,
|
||||
send_nb,
|
||||
ft_event
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* Local utility functions
|
||||
*/
|
||||
static void recv_handler(int sd, short flags, void* user);
|
||||
static void* progress_thread_engine(opal_object_t *obj)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK PROGRESS THREAD RUNNING",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
while (mca_oob_usock_module.ev_active) {
|
||||
opal_event_loop(mca_oob_usock_module.ev_base, OPAL_EVLOOP_ONCE);
|
||||
}
|
||||
return OPAL_THREAD_CANCELLED;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initialize global variables used w/in this module.
|
||||
*/
|
||||
static void usock_init(void)
|
||||
{
|
||||
/* setup the module's state variables */
|
||||
OBJ_CONSTRUCT(&mca_oob_usock_module.peers, opal_hash_table_t);
|
||||
opal_hash_table_init(&mca_oob_usock_module.peers, 32);
|
||||
mca_oob_usock_module.ev_active = false;
|
||||
|
||||
if (orte_oob_base.use_module_threads) {
|
||||
/* if we are to use independent progress threads at
|
||||
* the module level, start it now
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s STARTING USOCK PROGRESS THREAD",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
mca_oob_usock_module.ev_base = opal_event_base_create();
|
||||
/* construct the thread object */
|
||||
OBJ_CONSTRUCT(&mca_oob_usock_module.progress_thread, opal_thread_t);
|
||||
/* fork off a thread to progress it */
|
||||
mca_oob_usock_module.progress_thread.t_run = progress_thread_engine;
|
||||
mca_oob_usock_module.progress_thread.t_arg = NULL;
|
||||
mca_oob_usock_module.ev_active = true;
|
||||
if (OPAL_SUCCESS != opal_thread_start(&mca_oob_usock_module.progress_thread)) {
|
||||
opal_output(0, "%s USOCK progress thread failed to start",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
} else {
|
||||
mca_oob_usock_module.ev_base = orte_event_base;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Module cleanup.
|
||||
*/
|
||||
static void usock_fini(void)
|
||||
{
|
||||
/* cleanup all peers */
|
||||
OBJ_DESTRUCT(&mca_oob_usock_module.peers);
|
||||
|
||||
if (mca_oob_usock_module.ev_active) {
|
||||
/* if we used an independent progress thread at
|
||||
* the module level, stop it now
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s STOPPING USOCK PROGRESS THREAD",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* stop the progress thread */
|
||||
mca_oob_usock_module.ev_active = false;
|
||||
/* break the event loop */
|
||||
opal_event_base_loopexit(mca_oob_usock_module.ev_base);
|
||||
/* wait for thread to exit */
|
||||
opal_thread_join(&mca_oob_usock_module.progress_thread, NULL);
|
||||
OBJ_DESTRUCT(&mca_oob_usock_module.progress_thread);
|
||||
/* release the event base */
|
||||
opal_event_base_free(mca_oob_usock_module.ev_base);
|
||||
}
|
||||
}
|
||||
|
||||
/* Called by mca_oob_usock_accept() and connection_handler() on
|
||||
* a socket that has been accepted. This call finishes processing the
|
||||
* socket by registering for the OOB-level connection handshake. Used
|
||||
* in both the threaded and event listen modes.
|
||||
*/
|
||||
static void accept_connection(const int accepted_fd,
|
||||
const struct sockaddr *addr)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s accept_connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* use a one-time event to wait for receipt of peer's
|
||||
* process ident message to complete this connection
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_ACCEPT_STATE(accepted_fd, addr, recv_handler);
|
||||
}
|
||||
|
||||
/* API functions */
|
||||
static void process_ping(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_ping_t *op = (mca_oob_usock_ping_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] processing ping to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
|
||||
/* do we know this peer? */
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&op->peer))) {
|
||||
/* push this back to the framework so another component can try */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] hop %s unknown",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
#if 0
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, NULL, &op->peer, mca_oob_usock_component_hop_unknown);
|
||||
#endif
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if we are already connected, there is nothing to do */
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] already connected to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if we are already connecting, there is nothing to do */
|
||||
if (MCA_OOB_USOCK_CONNECTING == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECT_ACK == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] already connecting to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* attempt the connection */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void ping(const orte_process_name_t *proc)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] pinging peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(proc));
|
||||
|
||||
/* push this into our event base for processing */
|
||||
ORTE_ACTIVATE_USOCK_PING(proc, process_ping);
|
||||
}
|
||||
|
||||
static void process_send(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_msg_op_t *op = (mca_oob_usock_msg_op_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
struct timeval tv;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] processing send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->msg->dst));
|
||||
|
||||
/* if I am an app, the only route is to my daemon, so
|
||||
* send the msg there
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(ORTE_PROC_MY_DAEMON))) {
|
||||
/* we don't know how to talk to our daemon,
|
||||
* which is strange since we already got here.
|
||||
* likely means we lost a race condition, so
|
||||
*
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg,
|
||||
ORTE_PROC_MY_DAEMON,
|
||||
mca_oob_usock_component_cannot_send);
|
||||
goto cleanup;
|
||||
}
|
||||
} else if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* if I am a daemon, the only way I should be given this
|
||||
* message to send is if the proc is local to me
|
||||
*/
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&op->msg->dst))) {
|
||||
/* try this again after a delay for N times */
|
||||
op->reps++;
|
||||
if (20 < op->reps) {
|
||||
/* we don't know how to talk to this proc,
|
||||
* so send this back up to the OOB base so it
|
||||
* can try another transport
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg,
|
||||
&op->msg->dst,
|
||||
mca_oob_usock_component_cannot_send);
|
||||
goto cleanup;
|
||||
}
|
||||
opal_event_evtimer_set(orte_event_base, &op->ev, process_send, op);
|
||||
opal_event_set_priority(&op->ev, ORTE_ERROR_PRI);
|
||||
tv.tv_sec = 1;
|
||||
tv.tv_usec = 0;
|
||||
opal_event_evtimer_add(&op->ev, &tv);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
/* otherwise, this message can't be handled by me, so
|
||||
* notify the component of the mistake
|
||||
*/
|
||||
opal_output(0, "CAN'T BE HANDLED");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* add the msg to the target's send queue */
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb: already connected to %s - queueing for send",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
MCA_OOB_USOCK_QUEUE_SEND(op->msg, peer);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (MCA_OOB_USOCK_CLOSED == peer->state) {
|
||||
/* the peer has gone, it will never come back */
|
||||
goto cleanup;
|
||||
}
|
||||
/* add the message to the queue for sending after the
|
||||
* connection is formed
|
||||
*/
|
||||
MCA_OOB_USOCK_QUEUE_PENDING(op->msg, peer);
|
||||
|
||||
if (MCA_OOB_USOCK_CONNECTING != peer->state &&
|
||||
MCA_OOB_USOCK_CONNECT_ACK != peer->state) {
|
||||
/* we have to initiate the connection - again, we do not
|
||||
* want to block while the connection is created.
|
||||
* So throw us into an event that will create
|
||||
* the connection via a mini-state-machine :-)
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb: initiating connection to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void send_nb(orte_rml_send_t *msg)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->dst));
|
||||
|
||||
/* push this into our event base for processing */
|
||||
ORTE_ACTIVATE_USOCK_POST_SEND(msg, process_send);
|
||||
}
|
||||
|
||||
/*
|
||||
* Event callback when there is data available on the registered
|
||||
* socket to recv. This is called for the listen sockets to accept an
|
||||
* incoming connection, on new sockets trying to complete the software
|
||||
* connection process, and for probes. Data on an established
|
||||
* connection is handled elsewhere.
|
||||
*/
|
||||
static void recv_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_conn_op_t *op = (mca_oob_usock_conn_op_t*)cbdata;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
uint64_t ui64;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* get the handshake */
|
||||
if (ORTE_SUCCESS != mca_oob_usock_peer_recv_connect_ack(NULL, sd, &hdr)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* finish processing ident */
|
||||
if (MCA_OOB_USOCK_IDENT == hdr.type) {
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&hdr.origin))) {
|
||||
/* should never happen */
|
||||
goto cleanup;
|
||||
}
|
||||
/* set socket up to be non-blocking */
|
||||
if ((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s mca_oob_usock_recv_connect: fcntl(F_GETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if (fcntl(sd, F_SETFL, flags) < 0) {
|
||||
opal_output(0, "%s mca_oob_usock_recv_connect: fcntl(F_SETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
}
|
||||
}
|
||||
|
||||
/* is the peer instance willing to accept this connection */
|
||||
peer->sd = sd;
|
||||
if (mca_oob_usock_peer_accept(peer) == false) {
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
opal_output(0, "%s-%s mca_oob_usock_recv_connect: "
|
||||
"rejected connection state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state);
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
memcpy(&ui64, &peer->name, sizeof(uint64_t));
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, NULL);
|
||||
OBJ_RELEASE(peer);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
/* Dummy function for when we are not using FT. */
|
||||
#if OPAL_ENABLE_FT_CR == 0
|
||||
static void ft_event(int state)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
static void ft_event(int state) {
|
||||
#if 0
|
||||
opal_list_item_t *item;
|
||||
#endif
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
#if 0
|
||||
/*
|
||||
* Disable event processing while we are working
|
||||
*/
|
||||
opal_event_disable();
|
||||
#endif
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
#if 0
|
||||
/*
|
||||
* Resume event processing
|
||||
*/
|
||||
opal_event_enable();
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
/*
|
||||
* Clean out cached connection information
|
||||
* Select pieces of finalize/init
|
||||
*/
|
||||
for (item = opal_list_remove_first(&mod->peer_list);
|
||||
item != NULL;
|
||||
item = opal_list_remove_first(&mod->peer_list)) {
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)item;
|
||||
/* JJH: Use the below command for debugging restarts with invalid sockets
|
||||
* mca_oob_usock_peer_dump(peer, "RESTART CLEAN")
|
||||
*/
|
||||
MCA_OOB_USOCK_PEER_RETURN(peer);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&mod->peer_free);
|
||||
OBJ_DESTRUCT(&mod->peer_names);
|
||||
OBJ_DESTRUCT(&mod->peers);
|
||||
OBJ_DESTRUCT(&mod->peer_list);
|
||||
|
||||
OBJ_CONSTRUCT(&mod->peer_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mod->peers, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&mod->peer_names, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&mod->peer_free, opal_free_list_t);
|
||||
|
||||
/*
|
||||
* Resume event processing
|
||||
*/
|
||||
opal_event_enable();
|
||||
#endif
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_H_
|
||||
#define _MCA_OOB_USOCK_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/mca/oob/oob.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* define some debug levels */
|
||||
#define OOB_USOCK_DEBUG_FAIL 2
|
||||
#define OOB_USOCK_DEBUG_CONNECT 7
|
||||
|
||||
/* forward declare a couple of structures */
|
||||
struct mca_oob_usock_module_t;
|
||||
struct mca_oob_usock_msg_error_t;
|
||||
|
||||
/* Module definition */
|
||||
typedef void (*mca_oob_usock_module_init_fn_t)(void);
|
||||
typedef void (*mca_oob_usock_module_fini_fn_t)(void);
|
||||
typedef void (*mca_oob_usock_module_accept_connection_fn_t)(const int accepted_fd,
|
||||
const struct sockaddr *addr);
|
||||
typedef void (*mca_oob_usock_module_ping_fn_t)(const orte_process_name_t *proc);
|
||||
typedef void (*mca_oob_usock_module_send_nb_fn_t)(orte_rml_send_t *msg);
|
||||
typedef void (*mca_oob_usock_module_ft_event_fn_t)(int state);
|
||||
|
||||
typedef struct {
|
||||
mca_oob_usock_module_init_fn_t init;
|
||||
mca_oob_usock_module_fini_fn_t finalize;
|
||||
mca_oob_usock_module_accept_connection_fn_t accept_connection;
|
||||
mca_oob_usock_module_ping_fn_t ping;
|
||||
mca_oob_usock_module_send_nb_fn_t send_nb;
|
||||
mca_oob_usock_module_ft_event_fn_t ft_event;
|
||||
} mca_oob_usock_module_api_t;
|
||||
typedef struct {
|
||||
mca_oob_usock_module_api_t api;
|
||||
opal_event_base_t *ev_base; /* event base for the module progress thread */
|
||||
bool ev_active;
|
||||
opal_thread_t progress_thread;
|
||||
opal_hash_table_t peers; // peer connection info
|
||||
} mca_oob_usock_module_t;
|
||||
ORTE_MODULE_DECLSPEC extern mca_oob_usock_module_t mca_oob_usock_module;
|
||||
|
||||
/**
|
||||
* the state of the connection
|
||||
*/
|
||||
typedef enum {
|
||||
MCA_OOB_USOCK_UNCONNECTED,
|
||||
MCA_OOB_USOCK_CLOSED,
|
||||
MCA_OOB_USOCK_RESOLVE,
|
||||
MCA_OOB_USOCK_CONNECTING,
|
||||
MCA_OOB_USOCK_CONNECT_ACK,
|
||||
MCA_OOB_USOCK_CONNECTED,
|
||||
MCA_OOB_USOCK_FAILED,
|
||||
MCA_OOB_USOCK_ACCEPTING
|
||||
} mca_oob_usock_state_t;
|
||||
|
||||
/* module-level shared functions */
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_send_handler(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_recv_handler(int fd, short args, void *cbdata);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_OOB_USOCK_H_ */
|
||||
|
@ -1,604 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* In windows, many of the socket functions return an EWOULDBLOCK
|
||||
* instead of things like EAGAIN, EINPROGRESS, etc. It has been
|
||||
* verified that this will not conflict with other error codes that
|
||||
* are returned by these functions under UNIX/Linux environments
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETDB_H
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/util/listener.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/oob/usock/oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_ping.h"
|
||||
/*
|
||||
* Local utility functions
|
||||
*/
|
||||
|
||||
static int usock_component_register(void);
|
||||
static int usock_component_open(void);
|
||||
static int usock_component_close(void);
|
||||
|
||||
static int component_available(void);
|
||||
static int component_startup(void);
|
||||
static void component_shutdown(void);
|
||||
static int component_send(orte_rml_send_t *msg);
|
||||
static char* component_get_addr(void);
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
static bool component_is_reachable(orte_process_name_t *peer);
|
||||
|
||||
/*
|
||||
* Struct of function pointers and all that to let us be initialized
|
||||
*/
|
||||
mca_oob_usock_component_t mca_oob_usock_component = {
|
||||
{
|
||||
.oob_base = {
|
||||
MCA_OOB_BASE_VERSION_2_0_0,
|
||||
.mca_component_name = "usock",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_open_component = usock_component_open,
|
||||
.mca_close_component = usock_component_close,
|
||||
.mca_register_component_params = usock_component_register,
|
||||
},
|
||||
.oob_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
.priority = 100,
|
||||
.available = component_available,
|
||||
.startup = component_startup,
|
||||
.shutdown = component_shutdown,
|
||||
.send_nb = component_send,
|
||||
.get_addr = component_get_addr,
|
||||
.set_addr = component_set_addr,
|
||||
.is_reachable = component_is_reachable,
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize global variables used w/in this module.
|
||||
*/
|
||||
static int usock_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup of global variables used by this module.
|
||||
*/
|
||||
static int usock_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int usock_component_register(void)
|
||||
{
|
||||
mca_base_component_t *component = &mca_oob_usock_component.super.oob_base;
|
||||
|
||||
/* register oob module parameters */
|
||||
mca_oob_usock_component.max_retries = 2;
|
||||
(void)mca_base_component_var_register(component, "peer_retries",
|
||||
"Number of times to try shutting down a connection before giving up",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_oob_usock_component.max_retries);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int component_available(void)
|
||||
{
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"oob:usock: component_available called");
|
||||
|
||||
/* if session directories were forbidden, then we cannot be used */
|
||||
if (!orte_create_session_dirs ||
|
||||
NULL == orte_process_info.jobfam_session_dir ) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* this component is not available to tools */
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
/* direct-launched apps cannot use it */
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
/* apps launched by daemons *must* use it */
|
||||
return ORTE_ERR_FORCE_SELECT;
|
||||
}
|
||||
|
||||
/* otherwise, we are available */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for accepting connections from the event library
|
||||
*/
|
||||
static void connection_event_handler(int incoming_sd, short flags, void* cbdata)
|
||||
{
|
||||
orte_pending_connection_t *pending = (orte_pending_connection_t*)cbdata;
|
||||
int sd;
|
||||
|
||||
sd = pending->fd;
|
||||
pending->fd = -1;
|
||||
OBJ_RELEASE(pending);
|
||||
|
||||
/* process the connection */
|
||||
mca_oob_usock_module.api.accept_connection(sd, NULL);
|
||||
}
|
||||
|
||||
/* Start the module */
|
||||
static int component_startup(void)
|
||||
{
|
||||
int rc=ORTE_SUCCESS;
|
||||
char *session;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK STARTUP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* setup the path to the daemon rendezvous point */
|
||||
memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un));
|
||||
mca_oob_usock_component.address.sun_family = AF_UNIX;
|
||||
session = opal_os_path(false, orte_process_info.jobfam_session_dir,
|
||||
"usock", NULL);
|
||||
if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) {
|
||||
opal_output(0, "SESSION DIR TOO LONG");
|
||||
free(session);
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
snprintf(mca_oob_usock_component.address.sun_path,
|
||||
sizeof(mca_oob_usock_component.address.sun_path)-1,
|
||||
"%s", session);
|
||||
free(session);
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"SUNPATH: %s", mca_oob_usock_component.address.sun_path);
|
||||
|
||||
/* if we are a daemon/HNP, register our listener */
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
if (ORTE_SUCCESS != (rc = orte_register_listener((struct sockaddr*)&mca_oob_usock_component.address, sizeof(struct sockaddr_un),
|
||||
orte_event_base, connection_event_handler))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
} else {
|
||||
/* if the rendezvous point isn't there, then that's an error */
|
||||
/* if the rendezvous file doesn't exist, that's an error */
|
||||
if (0 != access(mca_oob_usock_component.address.sun_path, R_OK)) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"SUNPATH: %s NOT READABLE", mca_oob_usock_component.address.sun_path);
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
/* start the module */
|
||||
mca_oob_usock_module.api.init();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void component_shutdown(void)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK SHUTDOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* delete the rendezvous file */
|
||||
unlink(mca_oob_usock_component.address.sun_path);
|
||||
}
|
||||
|
||||
/* shutdown the module */
|
||||
if (NULL != mca_oob_usock_module.api.finalize) {
|
||||
mca_oob_usock_module.api.finalize();
|
||||
}
|
||||
}
|
||||
|
||||
static int component_send(orte_rml_send_t *msg)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s oob:usock:send_nb to peer %s:%d seq_num =%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num);
|
||||
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* daemons can only reach local procs */
|
||||
if (NULL == (proc = orte_get_proc_object(&msg->dst))) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
}
|
||||
|
||||
/* apps can reach anyone via this module as the daemon
|
||||
* will route the message to the final destination
|
||||
*/
|
||||
|
||||
mca_oob_usock_module.api.send_nb(msg);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* although we do not use the uri to determine a peer's
|
||||
* address (since we know the path via the session directory),
|
||||
* we have to provide something to the uri. This is needed
|
||||
* as other places in ORTE use a NULL uri to indicate lack
|
||||
* of a daemon. We may eventually remove that dependency,
|
||||
* but for now, just ensure that the uri is never NULL,
|
||||
* even if we are the only active OOB transport.
|
||||
*/
|
||||
static char* component_get_addr(void)
|
||||
{
|
||||
char *tmp;
|
||||
tmp = strdup("usock");
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
mca_oob_usock_peer_t *pr;
|
||||
uint64_t ui64;
|
||||
|
||||
memcpy(&ui64, peer, sizeof(uint64_t));
|
||||
/* if I am an application, then everything is addressable
|
||||
* by me via my daemon
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
/* if this is my daemon, then take it - otherwise, ignore */
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == peer->jobid &&
|
||||
ORTE_PROC_MY_DAEMON->vpid == peer->vpid) {
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
ui64, (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = *peer;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr);
|
||||
}
|
||||
/* we have to initiate the connection because otherwise the
|
||||
* daemon has no way to communicate to us via this component
|
||||
* as the app doesn't have a listening port */
|
||||
pr->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(pr, mca_oob_usock_peer_try_connect);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
/* otherwise, indicate that we cannot reach this peer */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
/* if I am a daemon or HNP, I can only reach my
|
||||
* own local procs via this component
|
||||
*/
|
||||
if (ORTE_PROC_MY_NAME->jobid == peer->jobid) {
|
||||
/* another daemon */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (NULL == (proc = orte_get_proc_object(peer)) ||
|
||||
!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
/* indicate that this peer is addressable by this component */
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
ui64, (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = *peer;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_set_module(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
orte_oob_base_peer_t *bpr;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:set_module called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->peer->name), sizeof(uint64_t));
|
||||
|
||||
/* make sure the OOB knows that we are handling this peer - we
|
||||
* are in the same event base as the OOB base, so we can
|
||||
* directly access its storage
|
||||
*/
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
|
||||
ui64, (void**)&bpr) || NULL == bpr) {
|
||||
bpr = OBJ_NEW(orte_oob_base_peer_t);
|
||||
}
|
||||
opal_bitmap_set_bit(&bpr->addressable, mca_oob_usock_component.super.idx);
|
||||
bpr->component = &mca_oob_usock_component.super;
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, bpr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_lost_connection(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:lost connection called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->peer->name), sizeof(uint64_t));
|
||||
|
||||
/* mark the OOB's table that we can't reach it any more - for now, we don't
|
||||
* worry about shifting to another component. Eventually, we will want to push
|
||||
* this decision to the OOB so it can try other components and eventually error out
|
||||
*/
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* activate the proc state - since an app only connects to its parent daemon,
|
||||
* and the daemon is *always* its lifeline, activate the lifeline lost state */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||
} else {
|
||||
/* we are the daemon end, so notify that the child's comm failed */
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_COMM_FAILED);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_msg_error_t *pop = (mca_oob_usock_msg_error_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:unable to send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->hop));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->hop), sizeof(uint64_t));
|
||||
|
||||
/* mark the OOB's table that we can't reach it any more - for now, we don't
|
||||
* worry about shifting to another component. Eventually, we will want to push
|
||||
* this decision to the OOB so it can try other components and eventually error out
|
||||
*/
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* have the OOB base try to send it again */
|
||||
ORTE_OOB_SEND(pop->rmsg);
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_failed_to_connect(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:failed_to_connect called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* if we are terminating, then don't do anything further */
|
||||
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
||||
OBJ_RELEASE(pop);
|
||||
return;
|
||||
}
|
||||
|
||||
/* activate the proc state */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:failed_to_connect unable to reach peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* since an app only connects to its parent daemon,
|
||||
* and the daemon is *always* its lifeline, activate the lifeline lost state */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||
} else {
|
||||
/* we are the daemon end, so notify that the child's comm failed */
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_COMM_FAILED);
|
||||
}
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
static bool component_is_reachable(orte_process_name_t *peer)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
|
||||
/* if I am an application, then everything is reachable
|
||||
* by me via my daemon
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* if I am a daemon or HNP, I can only reach my
|
||||
* own local procs via this component
|
||||
*/
|
||||
if (ORTE_PROC_MY_NAME->jobid == peer->jobid) {
|
||||
/* another daemon */
|
||||
return false;
|
||||
}
|
||||
if (NULL == (proc = orte_get_proc_object(peer)) ||
|
||||
!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return false;
|
||||
}
|
||||
/* indicate that this peer is reachable by this component */
|
||||
return true;
|
||||
}
|
||||
|
||||
char* mca_oob_usock_state_print(mca_oob_usock_state_t state)
|
||||
{
|
||||
switch (state) {
|
||||
case MCA_OOB_USOCK_UNCONNECTED:
|
||||
return "UNCONNECTED";
|
||||
case MCA_OOB_USOCK_CLOSED:
|
||||
return "CLOSED";
|
||||
case MCA_OOB_USOCK_RESOLVE:
|
||||
return "RESOLVE";
|
||||
case MCA_OOB_USOCK_CONNECTING:
|
||||
return "CONNECTING";
|
||||
case MCA_OOB_USOCK_CONNECT_ACK:
|
||||
return "ACK";
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
return "CONNECTED";
|
||||
case MCA_OOB_USOCK_FAILED:
|
||||
return "FAILED";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
mca_oob_usock_peer_t* mca_oob_usock_peer_lookup(const orte_process_name_t *name)
|
||||
{
|
||||
mca_oob_usock_peer_t *peer;
|
||||
uint64_t ui64;
|
||||
|
||||
memcpy(&ui64, (char*)name, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, ui64, (void**)&peer)) {
|
||||
return NULL;
|
||||
}
|
||||
return peer;
|
||||
}
|
||||
|
||||
/* OOB USOCK Class instances */
|
||||
|
||||
static void peer_cons(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
peer->auth_method = NULL;
|
||||
peer->sd = -1;
|
||||
peer->state = MCA_OOB_USOCK_UNCONNECTED;
|
||||
peer->retries = 0;
|
||||
OBJ_CONSTRUCT(&peer->send_queue, opal_list_t);
|
||||
peer->send_msg = NULL;
|
||||
peer->recv_msg = NULL;
|
||||
peer->send_ev_active = false;
|
||||
peer->recv_ev_active = false;
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
static void peer_des(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
if (NULL != peer->auth_method) {
|
||||
free(peer->auth_method);
|
||||
}
|
||||
if (0 <= peer->sd) {
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&peer->send_queue);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_peer_t,
|
||||
opal_list_item_t,
|
||||
peer_cons, peer_des);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_peer_op_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void mopcon(mca_oob_usock_msg_op_t *p)
|
||||
{
|
||||
p->reps = 0;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_msg_op_t,
|
||||
opal_object_t,
|
||||
mopcon, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_conn_op_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_ping_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_COMPONENT_H_
|
||||
#define _MCA_OOB_USOCK_COMPONENT_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_UN_H
|
||||
#include <sys/un.h>
|
||||
#endif
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
|
||||
#include "orte/mca/oob/oob.h"
|
||||
#include "oob_usock_peer.h"
|
||||
#include "oob_usock.h"
|
||||
|
||||
/**
|
||||
* OOB USOCK Component
|
||||
*/
|
||||
typedef struct {
|
||||
mca_oob_base_component_t super; /**< base OOB component */
|
||||
int max_retries; /**< max number of retries before declaring peer gone */
|
||||
struct sockaddr_un address; /**< address of our rendezvous point */
|
||||
} mca_oob_usock_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern mca_oob_usock_component_t mca_oob_usock_component;
|
||||
|
||||
ORTE_MODULE_DECLSPEC char* mca_oob_usock_state_print(mca_oob_usock_state_t state);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_set_module(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_lost_connection(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_failed_to_connect(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC mca_oob_usock_peer_t* mca_oob_usock_peer_lookup(const orte_process_name_t *name);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_COMPONENT_H_ */
|
@ -1,967 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_NET_UIO_H
|
||||
#include <net/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_TCP_H
|
||||
#include <netinet/tcp.h>
|
||||
#endif
|
||||
|
||||
#include "opal/types.h"
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/mca/backtrace/backtrace.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
#include "opal/mca/sec/sec.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/fd.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
|
||||
static void usock_peer_event_init(mca_oob_usock_peer_t* peer);
|
||||
static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer);
|
||||
static int usock_peer_send_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size);
|
||||
static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size);
|
||||
static void usock_peer_connected(mca_oob_usock_peer_t* peer);
|
||||
|
||||
static int usock_peer_create_socket(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
int flags;
|
||||
|
||||
if (peer->sd >=0) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_oob_base_framework.framework_output,
|
||||
"%s oob:usock:peer creating socket to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name))));
|
||||
peer->sd = socket(PF_UNIX, SOCK_STREAM, 0);
|
||||
|
||||
if (peer->sd < 0) {
|
||||
opal_output(0, "%s-%s usock_peer_create_socket: socket() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
/* Set this fd to be close-on-exec so that subsequent children don't see it */
|
||||
if (opal_fd_set_cloexec(peer->sd) != OPAL_SUCCESS) {
|
||||
opal_output(0, "%s unable to set socket to CLOEXEC",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
close(peer->sd);
|
||||
peer->sd = -1;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* setup event callbacks */
|
||||
usock_peer_event_init(peer);
|
||||
|
||||
/* setup the socket as non-blocking */
|
||||
if (peer->sd >= 0) {
|
||||
if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s-%s usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if(fcntl(peer->sd, F_SETFL, flags) < 0)
|
||||
opal_output(0, "%s-%s usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Try connecting to a peer
|
||||
*/
|
||||
void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_conn_op_t *op = (mca_oob_usock_conn_op_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer = op->peer;
|
||||
int rc;
|
||||
opal_socklen_t addrlen = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
rc = usock_peer_create_socket(peer);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
/* FIXME: we cannot create a USOCK socket - report
|
||||
* back to the component that this peer is
|
||||
* unreachable so it can remove the peer
|
||||
* from its list and report back to the base
|
||||
* NOTE: this could be a reconnect attempt,
|
||||
* so we also need to mark any queued messages
|
||||
* and return them as "unreachable"
|
||||
*/
|
||||
opal_output(0, "%s CANNOT CREATE SOCKET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
|
||||
addrlen = sizeof(struct sockaddr_un);
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s - %d retries",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->retries);
|
||||
|
||||
retry_connect:
|
||||
peer->retries++;
|
||||
if (connect(peer->sd, (struct sockaddr *) &mca_oob_usock_component.address, addrlen) < 0) {
|
||||
/* non-blocking so wait for completion */
|
||||
if (opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s waiting for connect completion to %s - activating send event",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
/* just ensure the send_event is active */
|
||||
if (!peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Some kernels (Linux 2.6) will automatically software
|
||||
abort a connection that was ECONNREFUSED on the last
|
||||
attempt, without even trying to establish the
|
||||
connection. Handle that case in a semi-rational
|
||||
way by trying twice before giving up */
|
||||
if (ECONNABORTED == opal_socket_errno) {
|
||||
if (peer->retries < mca_oob_usock_component.max_retries) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connection aborted by OS to %s - retrying",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
goto retry_connect;
|
||||
} else {
|
||||
/* We were unsuccessful in establishing this connection, and are
|
||||
* not likely to suddenly become successful,
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"Connection across unix domain socket to local proc %s failed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
/* let the USOCK component know that this module failed to make
|
||||
* the connection so it can try other modules, and/or fail back
|
||||
* to the OOB level so another component can try. This will activate
|
||||
* an event in the component event base, and so it will fire async
|
||||
* from us if we are in our own progress thread
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_failed_to_connect);
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* connection succeeded */
|
||||
peer->retries = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"Connection across to proc %s succeeded",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
/* setup our recv to catch the return ack call */
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
|
||||
/* send our globally unique process identifier to the peer */
|
||||
if (ORTE_SUCCESS == (rc = usock_peer_send_connect_ack(peer))) {
|
||||
peer->state = MCA_OOB_USOCK_CONNECT_ACK;
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"usock_peer_send_connect_ack to proc %s failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
opal_strerror(rc), rc);
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
char *msg;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
int rc;
|
||||
size_t sdsize;
|
||||
char *cred;
|
||||
size_t credsize;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s SEND CONNECT ACK", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* send a handshake that includes our process identifier
|
||||
* to ensure we are talking to another OMPI process
|
||||
*/
|
||||
hdr.origin = *ORTE_PROC_MY_NAME;
|
||||
hdr.dst = peer->name;
|
||||
hdr.type = MCA_OOB_USOCK_IDENT;
|
||||
hdr.tag = 0;
|
||||
hdr.channel = 0xffffffff;
|
||||
hdr.seq_num = 0;
|
||||
|
||||
/* get our security credential*/
|
||||
if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(peer->auth_method,
|
||||
ORTE_PROC_MY_NAME, &cred, &credsize))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* set the number of bytes to be read beyond the header */
|
||||
hdr.nbytes = strlen(orte_version_string) + 1 + credsize;
|
||||
|
||||
/* create a space for our message */
|
||||
sdsize = (sizeof(hdr) + strlen(orte_version_string) + 1 + credsize);
|
||||
if (NULL == (msg = (char*)malloc(sdsize))) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
memset(msg, 0, sdsize);
|
||||
|
||||
/* load the message */
|
||||
memcpy(msg, &hdr, sizeof(hdr));
|
||||
memcpy(msg+sizeof(hdr), orte_version_string, strlen(orte_version_string));
|
||||
memcpy(msg+sizeof(hdr)+strlen(orte_version_string)+1, cred, credsize);
|
||||
free(cred);
|
||||
|
||||
if (ORTE_SUCCESS != usock_peer_send_blocking(peer, peer->sd, msg, sdsize)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_UNREACH);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
free(msg);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize events to be used by the peer instance for USOCK select/poll callbacks.
|
||||
*/
|
||||
static void usock_peer_event_init(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
if (peer->sd >= 0) {
|
||||
opal_event_set(mca_oob_usock_module.ev_base,
|
||||
&peer->recv_event,
|
||||
peer->sd,
|
||||
OPAL_EV_READ|OPAL_EV_PERSIST,
|
||||
mca_oob_usock_recv_handler,
|
||||
peer);
|
||||
opal_event_set_priority(&peer->recv_event, ORTE_MSG_PRI);
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
opal_event_set(mca_oob_usock_module.ev_base,
|
||||
&peer->send_event,
|
||||
peer->sd,
|
||||
OPAL_EV_WRITE|OPAL_EV_PERSIST,
|
||||
mca_oob_usock_send_handler,
|
||||
peer);
|
||||
opal_event_set_priority(&peer->send_event, ORTE_MSG_PRI);
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the status of the connection. If the connection failed, will retry
|
||||
* later. Otherwise, send this processes identifier to the peer on the
|
||||
* newly connected socket.
|
||||
*/
|
||||
void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
int so_error = 0, rc;
|
||||
opal_socklen_t so_length = sizeof(so_error);
|
||||
orte_oob_base_peer_t *bpr;
|
||||
uint64_t ui64;
|
||||
mca_oob_usock_peer_t *pr;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:complete_connect called for peer %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->sd);
|
||||
|
||||
/* check connect completion status */
|
||||
if (getsockopt(peer->sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
|
||||
opal_output(0, "%s usock_peer_complete_connect: getsockopt() to %s failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (so_error == EINPROGRESS) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:send:handler still in progress",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return;
|
||||
} else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_complete_connect: connection failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(so_error),
|
||||
so_error);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
} else if (so_error != 0) {
|
||||
/* No need to worry about the return code here - we return regardless
|
||||
at this point, and if an error did occur a message has already been
|
||||
printed for the user */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_complete_connect: "
|
||||
"connection failed with error %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), so_error);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_complete_connect: "
|
||||
"sending ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
if (usock_peer_send_connect_ack(peer) == ORTE_SUCCESS) {
|
||||
peer->state = MCA_OOB_USOCK_CONNECT_ACK;
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_complete_connect: "
|
||||
"setting read event on connection to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
} else {
|
||||
opal_output(0, "%s usock_peer_complete_connect: unable to send connect ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
}
|
||||
|
||||
/* make sure the OOB knows that we are handling this peer - we
|
||||
* are in the same event base as the OOB base, so we can
|
||||
* directly access its storage
|
||||
*/
|
||||
memcpy(&ui64, (char*)&(peer->name), sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
|
||||
ui64, (void**)&bpr) || NULL == bpr) {
|
||||
bpr = OBJ_NEW(orte_oob_base_peer_t);
|
||||
}
|
||||
opal_bitmap_set_bit(&bpr->addressable, mca_oob_usock_component.super.idx);
|
||||
bpr->component = &mca_oob_usock_component.super;
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, bpr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
/* record it locally too */
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
ui64, (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = peer->name;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A blocking send on a non-blocking socket. Used to send the small amount of connection
|
||||
* information that identifies the peers endpoint.
|
||||
*/
|
||||
static int usock_peer_send_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size)
|
||||
{
|
||||
unsigned char* ptr = (unsigned char*)data;
|
||||
size_t cnt = 0;
|
||||
int retval;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s send blocking of %"PRIsize_t" bytes to socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
size, sd);
|
||||
|
||||
while (cnt < size) {
|
||||
retval = send(sd, (char*)ptr+cnt, size-cnt, 0);
|
||||
if (retval < 0) {
|
||||
if (opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
||||
opal_output(0, "%s usock_peer_send_blocking: send() to socket %d failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd,
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
cnt += retval;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s blocking send complete to socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Receive the peers globally unique process identification from a newly
|
||||
* connected socket and verify the expected response. If so, move the
|
||||
* socket to a connected state.
|
||||
*/
|
||||
int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* pr, int sd,
|
||||
mca_oob_usock_hdr_t *dhdr)
|
||||
{
|
||||
char *msg;
|
||||
char *version;
|
||||
int rc, cmpval;
|
||||
char *cred;
|
||||
size_t credsize;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
uint64_t ui64;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECV CONNECT ACK FROM %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == pr) ? "UNKNOWN" : ORTE_NAME_PRINT(&pr->name), sd);
|
||||
|
||||
peer = pr;
|
||||
/* ensure all is zero'd */
|
||||
memset(&hdr, 0, sizeof(mca_oob_usock_hdr_t));
|
||||
|
||||
if (usock_peer_recv_blocking(peer, sd, &hdr, sizeof(mca_oob_usock_hdr_t))) {
|
||||
if (NULL != peer) {
|
||||
/* If the peer state is CONNECT_ACK, then we were waiting for
|
||||
* the connection to be ack'd
|
||||
*/
|
||||
if (peer->state != MCA_OOB_USOCK_CONNECT_ACK) {
|
||||
/* handshake broke down - abort this connection */
|
||||
opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), sd);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* unable to complete the recv */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s unable to complete recv of connect-ack from %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name), sd);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
/* if the requestor wanted the header returned, then do so now */
|
||||
if (NULL != dhdr) {
|
||||
*dhdr = hdr;
|
||||
}
|
||||
|
||||
if (MCA_OOB_USOCK_PROBE == hdr.type) {
|
||||
/* send a header back */
|
||||
hdr.type = MCA_OOB_USOCK_PROBE;
|
||||
hdr.dst = hdr.origin;
|
||||
hdr.origin = *ORTE_PROC_MY_NAME;
|
||||
usock_peer_send_blocking(peer, sd, &hdr, sizeof(mca_oob_usock_hdr_t));
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (hdr.type != MCA_OOB_USOCK_IDENT) {
|
||||
opal_output(0, "usock_peer_recv_connect_ack: invalid header type: %d\n", hdr.type);
|
||||
if (NULL != peer) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
} else {
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
}
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack recvd from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* if we don't already have it, get the peer */
|
||||
if (NULL == peer) {
|
||||
peer = mca_oob_usock_peer_lookup(&hdr.origin);
|
||||
if (NULL == peer) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s mca_oob_usock_recv_connect: connection from new peer",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
peer = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
peer->name = hdr.origin;
|
||||
peer->state = MCA_OOB_USOCK_ACCEPTING;
|
||||
peer->sd = sd;
|
||||
memcpy(&ui64, &peer->name, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, peer)) {
|
||||
OBJ_RELEASE(peer);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
} else {
|
||||
/* check for a race condition - if I was in the process of
|
||||
* creating a connection to the peer, or have already established
|
||||
* such a connection, then we need to reject this connection. We will
|
||||
* let the higher ranked process retry - if I'm the lower ranked
|
||||
* process, I'll simply defer until I receive the request
|
||||
*/
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECTING == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECT_ACK == peer->state) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s SIMUL CONNECTION WITH %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr.origin));
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
if (0 < peer->sd) {
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
peer->sd = -1;
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
peer->retries = 0;
|
||||
cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &hdr.origin, ORTE_PROC_MY_NAME);
|
||||
if (OPAL_VALUE1_GREATER == cmpval) {
|
||||
/* force the other end to retry the connection */
|
||||
peer->state = MCA_OOB_USOCK_UNCONNECTED;
|
||||
return ORTE_ERR_UNREACH;
|
||||
} else {
|
||||
/* retry the connection */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* compare the peers name to the expected value */
|
||||
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->name, &hdr.origin)) {
|
||||
opal_output(0, "%s usock_peer_recv_connect_ack: "
|
||||
"received unexpected process identifier %s from %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(hdr.origin)),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack header from %s is okay",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* get the authentication and version payload */
|
||||
if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (!usock_peer_recv_blocking(peer, sd, msg, hdr.nbytes)) {
|
||||
/* unable to complete the recv */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s unable to complete recv of connect-ack from %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->sd);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
/* check that this is from a matching version */
|
||||
version = (char*)(msg);
|
||||
if (0 != strcmp(version, orte_version_string)) {
|
||||
opal_output(0, "%s usock_peer_recv_connect_ack: "
|
||||
"received different version from %s: %s instead of %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
version, orte_version_string);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack version from %s matches ours",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* check security token */
|
||||
cred = (char*)(msg + strlen(version) + 1);
|
||||
credsize = hdr.nbytes - strlen(version) - 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, &peer->auth_method))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
free(msg);
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack %s authenticated",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* if the requestor wanted the header returned, then they
|
||||
* will complete their processing
|
||||
*/
|
||||
if (NULL != dhdr) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_set_module);
|
||||
|
||||
/* connected */
|
||||
usock_peer_connected(peer);
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
mca_oob_usock_peer_dump(peer, "connected");
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup peer state to reflect that connection has been established,
|
||||
* and start any pending sends.
|
||||
*/
|
||||
static void usock_peer_connected(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_connected on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
peer->state = MCA_OOB_USOCK_CONNECTED;
|
||||
|
||||
/* initiate send of first message on queue */
|
||||
if (NULL == peer->send_msg) {
|
||||
peer->send_msg = (mca_oob_usock_send_t*)
|
||||
opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove any event registrations associated with the socket
|
||||
* and update the peer state to reflect the connection has
|
||||
* been closed.
|
||||
*/
|
||||
void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
mca_oob_usock_send_t *snd;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_close for %s sd %d state %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->sd, mca_oob_usock_state_print(peer->state));
|
||||
|
||||
peer->state = MCA_OOB_USOCK_CLOSED;
|
||||
|
||||
/* release the socket */
|
||||
close(peer->sd);
|
||||
peer->sd = -1;
|
||||
|
||||
/* inform the component-level that we have lost a connection so
|
||||
* it can decide what to do about it.
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_lost_connection);
|
||||
|
||||
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
||||
/* nothing more to do */
|
||||
return;
|
||||
}
|
||||
|
||||
/* FIXME: push any queued messages back onto the OOB for retry - note that
|
||||
* this must be done after the prior call to ensure that the component
|
||||
* processes the "lost connection" notice before the OOB begins to
|
||||
* handle these recycled messages. This prevents us from unintentionally
|
||||
* attempting to send the message again across the now-failed interface
|
||||
*/
|
||||
if (NULL != peer->send_msg) {
|
||||
}
|
||||
while (NULL != (snd = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue))) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A blocking recv on a non-blocking socket. Used to receive the small amount of connection
|
||||
* information that identifies the peers endpoint.
|
||||
*/
|
||||
static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size)
|
||||
{
|
||||
unsigned char* ptr = (unsigned char*)data;
|
||||
size_t cnt = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s waiting for connect ack from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
while (cnt < size) {
|
||||
int retval = recv(sd, (char *)ptr+cnt, size-cnt, 0);
|
||||
|
||||
/* remote closed connection */
|
||||
if (retval == 0) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_recv_blocking: "
|
||||
"peer closed connection: peer state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
||||
(NULL == peer) ? 0 : peer->state);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* socket is non-blocking so handle errors */
|
||||
if (retval < 0) {
|
||||
if (opal_socket_errno != EINTR &&
|
||||
opal_socket_errno != EAGAIN &&
|
||||
opal_socket_errno != EWOULDBLOCK) {
|
||||
if (peer->state == MCA_OOB_USOCK_CONNECT_ACK) {
|
||||
/* If we overflow the listen backlog, it's
|
||||
possible that even though we finished the three
|
||||
way handshake, the remote host was unable to
|
||||
transition the connection from half connected
|
||||
(received the initial SYN) to fully connected
|
||||
(in the listen backlog). We likely won't see
|
||||
the failure until we try to receive, due to
|
||||
timing and the like. The first thing we'll get
|
||||
in that case is a RST packet, which receive
|
||||
will turn into a connection reset by peer
|
||||
errno. In that case, leave the socket in
|
||||
CONNECT_ACK and propogate the error up to
|
||||
recv_connect_ack, who will try to establish the
|
||||
connection again */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect ack received error %s from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
strerror(opal_socket_errno),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
return false;
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s usock_peer_recv_blocking: "
|
||||
"recv() failed for %s: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
if (NULL != peer) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
} else {
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
cnt += retval;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect ack received from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Routine for debugging to print the connection state and socket options
|
||||
*/
|
||||
void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg)
|
||||
{
|
||||
char buff[255];
|
||||
int nodelay,flags;
|
||||
|
||||
if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "usock_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
#if defined(USOCK_NODELAY)
|
||||
optlen = sizeof(nodelay);
|
||||
if (getsockopt(peer->sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) {
|
||||
opal_output(0, "usock_peer_dump: USOCK_NODELAY option: %s (%d)\n",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
#else
|
||||
nodelay = 0;
|
||||
#endif
|
||||
|
||||
snprintf(buff, sizeof(buff), "%s-%s %s: nodelay %d flags %08x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg, nodelay, flags);
|
||||
opal_output(0, "%s", buff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accept incoming connection - if not already connected
|
||||
*/
|
||||
|
||||
bool mca_oob_usock_peer_accept(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:peer_accept called for peer %s in state %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name),
|
||||
mca_oob_usock_state_print(peer->state), peer->sd);
|
||||
|
||||
if (peer->state != MCA_OOB_USOCK_CONNECTED) {
|
||||
|
||||
usock_peer_event_init(peer);
|
||||
|
||||
if (usock_peer_send_connect_ack(peer) != ORTE_SUCCESS) {
|
||||
opal_output(0, "%s-%s usock_peer_accept: "
|
||||
"usock_peer_send_connect_ack failed\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_set_module);
|
||||
|
||||
usock_peer_connected(peer);
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
/* if a message is waiting to be sent, ensure the send event is active */
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
mca_oob_usock_peer_dump(peer, "accepted");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:peer_accept ignored for peer %s in state %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name),
|
||||
mca_oob_usock_state_print(peer->state), peer->sd);
|
||||
return false;
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_CONNECTION_H_
|
||||
#define _MCA_OOB_USOCK_CONNECTION_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_peer.h"
|
||||
|
||||
/* State machine for connection operations */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
opal_event_t ev;
|
||||
} mca_oob_usock_conn_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_conn_op_t);
|
||||
|
||||
#define CLOSE_THE_SOCKET(socket) \
|
||||
do { \
|
||||
shutdown(socket, 2); \
|
||||
close(socket); \
|
||||
} while(0)
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_CONN_STATE(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] connect to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((&(p)->name))); \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
cop->peer = (p); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &cop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), cop); \
|
||||
opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&cop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_ACCEPT_STATE(s, a, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &cop->ev, s, \
|
||||
OPAL_EV_READ, (cbfunc), cop); \
|
||||
opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_add(&cop->ev, 0); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_RETRY_USOCK_CONN_STATE(p, cbfunc, tv) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] retry connect to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((&(p)->name))); \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
cop->peer = (p); \
|
||||
opal_event_evtimer_set(mca_oob_usock_module.ev_base, \
|
||||
&cop->ev, \
|
||||
(cbfunc), cop); \
|
||||
opal_event_evtimer_add(&cop->ev, (tv)); \
|
||||
} while(0);
|
||||
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg);
|
||||
ORTE_MODULE_DECLSPEC bool mca_oob_usock_peer_accept(mca_oob_usock_peer_t* peer);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t* peer);
|
||||
ORTE_MODULE_DECLSPEC int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* peer,
|
||||
int sd, mca_oob_usock_hdr_t *hdr);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_CONNECTION_H_ */
|
@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_HDR_H_
|
||||
#define _MCA_OOB_USOCK_HDR_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
/* define several internal-only message
|
||||
* types this component uses for its own
|
||||
* handshake operations, plus one indicating
|
||||
* the message came from an external (to
|
||||
* this component) source
|
||||
*/
|
||||
typedef enum {
|
||||
MCA_OOB_USOCK_IDENT,
|
||||
MCA_OOB_USOCK_PROBE,
|
||||
MCA_OOB_USOCK_PING,
|
||||
MCA_OOB_USOCK_USER
|
||||
} mca_oob_usock_msg_type_t;
|
||||
|
||||
/* header for usock msgs */
|
||||
typedef struct {
|
||||
/* the original sender */
|
||||
orte_process_name_t origin;
|
||||
/* the intended final recipient */
|
||||
orte_process_name_t dst;
|
||||
/* type of message */
|
||||
mca_oob_usock_msg_type_t type;
|
||||
/* the rml tag where this message is headed */
|
||||
orte_rml_tag_t tag;
|
||||
/* the rml channel to which this message is headed */
|
||||
orte_rml_channel_num_t channel;
|
||||
/* msg seq number on the src channel */
|
||||
uint32_t seq_num;
|
||||
/* number of bytes in message */
|
||||
uint32_t nbytes;
|
||||
} mca_oob_usock_hdr_t;
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_HDR_H_ */
|
@ -1,85 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_PEER_H_
|
||||
#define _MCA_OOB_USOCK_PEER_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_sendrecv.h"
|
||||
|
||||
/* object for tracking peers */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
/* although not required, there is enough debug
|
||||
* value that retaining the name makes sense
|
||||
*/
|
||||
orte_process_name_t name;
|
||||
char *auth_method; // how the peer authenticated themselves to use
|
||||
int sd;
|
||||
int retries; // number of times we have tried to connect to this address
|
||||
mca_oob_usock_state_t state;
|
||||
opal_event_t op_event; // used for connecting and operations other than read/write
|
||||
opal_event_t send_event; /**< registration with event thread for send events */
|
||||
bool send_ev_active;
|
||||
opal_event_t recv_event; /**< registration with event thread for recv events */
|
||||
bool recv_ev_active;
|
||||
opal_event_t timer_event; /**< timer for retrying connection failures */
|
||||
bool timer_ev_active;
|
||||
opal_list_t send_queue; /**< list of messages to send */
|
||||
mca_oob_usock_send_t *send_msg; /**< current send in progress */
|
||||
mca_oob_usock_recv_t *recv_msg; /**< current recv in progress */
|
||||
} mca_oob_usock_peer_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_peer_t);
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
} mca_oob_usock_peer_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_peer_op_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_PEER_OP(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_peer_op_t *op; \
|
||||
op = OBJ_NEW(mca_oob_usock_peer_op_t); \
|
||||
op->peer = (p); \
|
||||
opal_event_set(mca_usock_component.ev_base, &op->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), op); \
|
||||
opal_event_set_priority(&op->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_CMP_OP(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_peer_op_t *pop; \
|
||||
pop = OBJ_NEW(mca_oob_usock_peer_op_t); \
|
||||
pop->peer = (p); \
|
||||
opal_event_set(orte_event_base, &pop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), pop); \
|
||||
opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_PEER_H_ */
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_PING_H_
|
||||
#define _MCA_OOB_USOCK_PING_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_sendrecv.h"
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_process_name_t peer;
|
||||
} mca_oob_usock_ping_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_ping_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_PING(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_ping_t *pop; \
|
||||
pop = OBJ_NEW(mca_oob_usock_ping_t); \
|
||||
pop->peer.jobid = (p)->jobid; \
|
||||
pop->peer.vpid = (p)->vpid; \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &pop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), pop); \
|
||||
opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_PING_H_ */
|
@ -1,610 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* In windows, many of the socket functions return an EWOULDBLOCK
|
||||
* instead of \ things like EAGAIN, EINPROGRESS, etc. It has been
|
||||
* verified that this will \ not conflict with other error codes that
|
||||
* are returned by these functions \ under UNIX/Linux environments
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_NET_UIO_H
|
||||
#include <net/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_TCP_H
|
||||
#include <netinet/tcp.h>
|
||||
#endif
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/types.h"
|
||||
#include "opal/mca/backtrace/backtrace.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
|
||||
static int send_bytes(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
mca_oob_usock_send_t* msg = peer->send_msg;
|
||||
int rc;
|
||||
|
||||
while (0 < msg->sdbytes) {
|
||||
rc = write(peer->sd, msg->sdptr, msg->sdbytes);
|
||||
if (rc < 0) {
|
||||
if (opal_socket_errno == EINTR) {
|
||||
continue;
|
||||
} else if (opal_socket_errno == EAGAIN) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_RESOURCE_BUSY;
|
||||
} else if (opal_socket_errno == EWOULDBLOCK) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we hit an error and cannot progress this message */
|
||||
opal_output(0, "%s->%s mca_oob_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno,
|
||||
peer->sd);
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
}
|
||||
/* update location */
|
||||
msg->sdbytes -= rc;
|
||||
msg->sdptr += rc;
|
||||
}
|
||||
/* we sent the full data block */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* A file descriptor is available/ready for send. Check the state
|
||||
* of the socket and take the appropriate action.
|
||||
*/
|
||||
void mca_oob_usock_send_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata;
|
||||
mca_oob_usock_send_t* msg = peer->send_msg;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler called to send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
switch (peer->state) {
|
||||
case MCA_OOB_USOCK_CONNECTING:
|
||||
case MCA_OOB_USOCK_CLOSED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
mca_oob_usock_state_print(peer->state));
|
||||
mca_oob_usock_peer_complete_connect(peer);
|
||||
/* de-activate the send event until the connection
|
||||
* handshake completes
|
||||
*/
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler SENDING TO %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer->send_msg) ? "NULL" : ORTE_NAME_PRINT(&peer->name));
|
||||
if (NULL != msg) {
|
||||
/* if the header hasn't been completely sent, send it */
|
||||
if (!msg->hdr_sent) {
|
||||
if (ORTE_SUCCESS == (rc = send_bytes(peer))) {
|
||||
/* header is completely sent */
|
||||
msg->hdr_sent = true;
|
||||
/* setup to send the data */
|
||||
if (NULL == msg->msg) {
|
||||
/* this was a zero-byte msg - nothing more to do */
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
goto next;
|
||||
} else if (NULL != msg->msg->buffer) {
|
||||
/* send the buffer data as a single block */
|
||||
msg->sdptr = msg->msg->buffer->base_ptr;
|
||||
msg->sdbytes = msg->msg->buffer->bytes_used;
|
||||
} else if (NULL != msg->msg->iov) {
|
||||
/* start with the first iovec */
|
||||
msg->sdptr = msg->msg->iov[0].iov_base;
|
||||
msg->sdbytes = msg->msg->iov[0].iov_len;
|
||||
msg->iovnum = 0;
|
||||
} else {
|
||||
msg->sdptr = msg->msg->data;
|
||||
msg->sdbytes = msg->msg->count;
|
||||
}
|
||||
/* fall thru and let the send progress */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: unable to send header",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
msg->msg->status = rc;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
/* progress the data transmission */
|
||||
if (msg->hdr_sent) {
|
||||
if (ORTE_SUCCESS == (rc = send_bytes(peer))) {
|
||||
/* this block is complete */
|
||||
if (NULL != msg->msg->buffer) {
|
||||
/* we are done - notify the RML */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
msg->msg->status = ORTE_SUCCESS;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
} else if (NULL != msg->msg->data) {
|
||||
/* this was a relay message - nothing more to do */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
} else {
|
||||
/* rotate to the next iovec */
|
||||
msg->iovnum++;
|
||||
if (msg->iovnum < msg->msg->count) {
|
||||
msg->sdptr = msg->msg->iov[msg->iovnum].iov_base;
|
||||
msg->sdbytes = msg->msg->iov[msg->iovnum].iov_len;
|
||||
/* exit this event to give the event lib
|
||||
* a chance to progress any other pending
|
||||
* actions
|
||||
*/
|
||||
return;
|
||||
} else {
|
||||
/* this message is complete - notify the RML */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
msg->msg->status = ORTE_SUCCESS;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
}
|
||||
}
|
||||
/* fall thru to queue the next message */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: unable to send message ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
msg->msg->status = rc;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
next:
|
||||
/* if current message completed - progress any pending sends by
|
||||
* moving the next in the queue into the "on-deck" position. Note
|
||||
* that this doesn't mean we send the message right now - we will
|
||||
* wait for another send_event to fire before doing so. This gives
|
||||
* us a chance to service any pending recvs.
|
||||
*/
|
||||
peer->send_msg = (mca_oob_usock_send_t*)
|
||||
opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
/* if nothing else to do unregister for send event notifications */
|
||||
if (NULL == peer->send_msg && peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: invalid connection state (%d) on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state, peer->sd);
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int read_bytes(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* read until all bytes recvd or error */
|
||||
while (0 < peer->recv_msg->rdbytes) {
|
||||
rc = read(peer->sd, peer->recv_msg->rdptr, peer->recv_msg->rdbytes);
|
||||
if (rc < 0) {
|
||||
if(opal_socket_errno == EINTR) {
|
||||
continue;
|
||||
} else if (opal_socket_errno == EAGAIN) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_RESOURCE_BUSY;
|
||||
} else if (opal_socket_errno == EWOULDBLOCK) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we hit an error and cannot progress this message - report
|
||||
* the error back to the RML and let the caller know
|
||||
* to abort this message
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
|
||||
"%s-%s mca_oob_usock_msg_recv: readv failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
// mca_oob_usock_peer_close(peer);
|
||||
// if (NULL != mca_oob_usock.oob_exception_callback) {
|
||||
// mca_oob_usock.oob_exception_callback(&peer->name, ORTE_RML_PEER_DISCONNECTED);
|
||||
//}
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
} else if (rc == 0) {
|
||||
/* the remote peer closed the connection - report that condition
|
||||
* and let the caller know
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
|
||||
"%s-%s mca_oob_usock_msg_recv: peer closed connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
/* stop all events */
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
if (NULL != peer->recv_msg) {
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
peer->recv_msg = NULL;
|
||||
}
|
||||
mca_oob_usock_peer_close(peer);
|
||||
//if (NULL != mca_oob_usock.oob_exception_callback) {
|
||||
// mca_oob_usock.oob_exception_callback(&peer->peer_name, ORTE_RML_PEER_DISCONNECTED);
|
||||
//}
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we were able to read something, so adjust counters and location */
|
||||
peer->recv_msg->rdbytes -= rc;
|
||||
peer->recv_msg->rdptr += rc;
|
||||
}
|
||||
|
||||
/* we read the full data block */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatch to the appropriate action routine based on the state
|
||||
* of the connection with the peer.
|
||||
*/
|
||||
|
||||
void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata;
|
||||
int rc;
|
||||
orte_rml_send_t *snd;
|
||||
|
||||
if (orte_abnormal_term_ordered) {
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
switch (peer->state) {
|
||||
case MCA_OOB_USOCK_CONNECT_ACK:
|
||||
if (ORTE_SUCCESS == (rc = mca_oob_usock_peer_recv_connect_ack(peer, peer->sd, NULL))) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler starting send/recv events",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* we connected! Start the send/recv events */
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
/* if there is a message waiting to be sent, queue it */
|
||||
if (NULL == peer->send_msg) {
|
||||
peer->send_msg = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
/* update our state */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTED;
|
||||
} else {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s UNABLE TO COMPLETE CONNECT ACK WITH %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler CONNECTED",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* allocate a new message and setup for recv */
|
||||
if (NULL == peer->recv_msg) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler allocate new recv msg",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
peer->recv_msg = OBJ_NEW(mca_oob_usock_recv_t);
|
||||
if (NULL == peer->recv_msg) {
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to allocate recv message\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
return;
|
||||
}
|
||||
/* start by reading the header */
|
||||
peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
|
||||
peer->recv_msg->rdbytes = sizeof(mca_oob_usock_hdr_t);
|
||||
}
|
||||
/* if the header hasn't been completely read, read it */
|
||||
if (!peer->recv_msg->hdr_recvd) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler read hdr",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
|
||||
/* completed reading the header */
|
||||
peer->recv_msg->hdr_recvd = true;
|
||||
/* if this is a zero-byte message, then we are done */
|
||||
if (0 == peer->recv_msg->hdr.nbytes) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag);
|
||||
peer->recv_msg->data = NULL; // make sure
|
||||
peer->recv_msg->rdptr = NULL;
|
||||
peer->recv_msg->rdbytes = 0;
|
||||
} else {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler allocate data region of size %lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes);
|
||||
/* allocate the data region */
|
||||
peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes);
|
||||
/* point to it */
|
||||
peer->recv_msg->rdptr = peer->recv_msg->data;
|
||||
peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes;
|
||||
}
|
||||
/* fall thru and attempt to read the data */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
/* close the connection */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler error reading bytes - closing connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (peer->recv_msg->hdr_recvd) {
|
||||
/* continue to read the data block - we start from
|
||||
* wherever we left off, which could be at the
|
||||
* beginning or somewhere in the message
|
||||
*/
|
||||
if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
|
||||
/* we recvd all of the message */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECVD COMPLETE MESSAGE FROM %s OF %d BYTES FOR DEST %s TAG %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin),
|
||||
(int)peer->recv_msg->hdr.nbytes,
|
||||
ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
|
||||
peer->recv_msg->hdr.tag);
|
||||
/* am I the intended recipient? */
|
||||
if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||
peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* yes - post it to the RML for delivery */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s DELIVERING TO RML",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
|
||||
peer->recv_msg->hdr.seq_num,
|
||||
peer->recv_msg->data,
|
||||
peer->recv_msg->hdr.nbytes);
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
} else {
|
||||
/* no - we don't route things, so we promote this
|
||||
* back to the OOB and let another transport move
|
||||
* it along. If we are a daemon and it is intended
|
||||
* for another of our local procs, it will just come
|
||||
* back to us and be handled then
|
||||
*/
|
||||
snd = OBJ_NEW(orte_rml_send_t);
|
||||
snd->dst = peer->recv_msg->hdr.dst;
|
||||
snd->origin = peer->recv_msg->hdr.origin;
|
||||
snd->tag = peer->recv_msg->hdr.tag;
|
||||
snd->data = peer->recv_msg->data;
|
||||
snd->seq_num = peer->recv_msg->hdr.seq_num;
|
||||
snd->count = peer->recv_msg->hdr.nbytes;
|
||||
snd->cbfunc.iov = NULL;
|
||||
snd->cbdata = NULL;
|
||||
/* activate the OOB send state */
|
||||
ORTE_OOB_SEND(snd);
|
||||
/* protect the data */
|
||||
peer->recv_msg->data = NULL;
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
return;
|
||||
}
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to recv message",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
/* turn off the recv event */
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: invalid socket state(%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state);
|
||||
// mca_oob_usock_peer_close(peer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void snd_cons(mca_oob_usock_send_t *ptr)
|
||||
{
|
||||
ptr->msg = NULL;
|
||||
ptr->data = NULL;
|
||||
ptr->hdr_sent = false;
|
||||
ptr->iovnum = 0;
|
||||
ptr->sdptr = NULL;
|
||||
ptr->sdbytes = 0;
|
||||
}
|
||||
/* we don't destruct any RML msg that is
|
||||
* attached to our send as the RML owns
|
||||
* that memory. However, if we relay a
|
||||
* msg, the data in the relay belongs to
|
||||
* us and must be free'd
|
||||
*/
|
||||
static void snd_des(mca_oob_usock_send_t *ptr)
|
||||
{
|
||||
if (NULL != ptr->data) {
|
||||
free(ptr->data);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_send_t,
|
||||
opal_list_item_t,
|
||||
snd_cons, snd_des);
|
||||
|
||||
static void rcv_cons(mca_oob_usock_recv_t *ptr)
|
||||
{
|
||||
ptr->hdr_recvd = false;
|
||||
ptr->rdptr = NULL;
|
||||
ptr->rdbytes = 0;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_recv_t,
|
||||
opal_list_item_t,
|
||||
rcv_cons, NULL);
|
||||
|
||||
static void err_cons(mca_oob_usock_msg_error_t *ptr)
|
||||
{
|
||||
ptr->rmsg = NULL;
|
||||
ptr->snd = NULL;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_msg_error_t,
|
||||
opal_object_t,
|
||||
err_cons, NULL);
|
||||
|
@ -1,254 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_SENDRECV_H_
|
||||
#define _MCA_OOB_USOCK_SENDRECV_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_hdr.h"
|
||||
|
||||
/* usock structure for sending a message */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
orte_rml_send_t *msg;
|
||||
char *data;
|
||||
bool hdr_sent;
|
||||
int iovnum;
|
||||
char *sdptr;
|
||||
size_t sdbytes;
|
||||
} mca_oob_usock_send_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_send_t);
|
||||
|
||||
/* usock structure for recving a message */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
bool hdr_recvd;
|
||||
char *data;
|
||||
char *rdptr;
|
||||
size_t rdbytes;
|
||||
} mca_oob_usock_recv_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t);
|
||||
|
||||
/* Queue a message to be sent to a specified peer. The macro
|
||||
* checks to see if a message is already in position to be
|
||||
* sent - if it is, then the message provided is simply added
|
||||
* to the peer's message queue. If not, then the provided message
|
||||
* is placed in the "ready" position
|
||||
*
|
||||
* If the provided boolean is true, then the send event for the
|
||||
* peer is checked and activated if not already active. This allows
|
||||
* the macro to either immediately send the message, or to queue
|
||||
* it as "pending" for later transmission - e.g., after the
|
||||
* connection procedure is completed
|
||||
*
|
||||
* p => pointer to mca_oob_usock_peer_t
|
||||
* s => pointer to mca_oob_usock_send_t
|
||||
* f => true if send event is to be activated
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_MSG(p, s, f) \
|
||||
do { \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue msg to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((s)->hdr.dst))); \
|
||||
/* if there is no message on-deck, put this one there */ \
|
||||
if (NULL == (p)->send_msg) { \
|
||||
(p)->send_msg = (s); \
|
||||
} else { \
|
||||
/* add it to the queue */ \
|
||||
opal_list_append(&(p)->send_queue, &(s)->super); \
|
||||
} \
|
||||
if ((f)) { \
|
||||
/* if we aren't connected, then start connecting */ \
|
||||
if (MCA_OOB_USOCK_CONNECTED != (p)->state) { \
|
||||
(p)->state = MCA_OOB_USOCK_CONNECTING; \
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE((p), \
|
||||
mca_oob_usock_peer_try_connect); \
|
||||
} else { \
|
||||
/* ensure the send event is active */ \
|
||||
if (!(p)->send_ev_active) { \
|
||||
opal_event_add(&(p)->send_event, 0); \
|
||||
(p)->send_ev_active = true; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}while(0);
|
||||
|
||||
/* queue a message to be sent by one of our modules - must
|
||||
* provide the following params:
|
||||
*
|
||||
* m - the RML message to be sent
|
||||
* p - the final recipient
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_SEND(m, p) \
|
||||
do { \
|
||||
mca_oob_usock_send_t *msg; \
|
||||
int i; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue send to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((m)->dst))); \
|
||||
msg = OBJ_NEW(mca_oob_usock_send_t); \
|
||||
/* setup the header */ \
|
||||
msg->hdr.origin = (m)->origin; \
|
||||
msg->hdr.dst = (m)->dst; \
|
||||
msg->hdr.type = MCA_OOB_USOCK_USER; \
|
||||
msg->hdr.tag = (m)->tag; \
|
||||
msg->hdr.seq_num = (m)->seq_num; \
|
||||
/* point to the actual message */ \
|
||||
msg->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
if (NULL != (m)->buffer) { \
|
||||
msg->hdr.nbytes = (m)->buffer->bytes_used; \
|
||||
} else if (NULL != (m)->iov) { \
|
||||
msg->hdr.nbytes = 0; \
|
||||
for (i=0; i < (m)->count; i++) { \
|
||||
msg->hdr.nbytes += (m)->iov[i].iov_len; \
|
||||
} \
|
||||
} else { \
|
||||
msg->hdr.nbytes = (m)->count; \
|
||||
} \
|
||||
/* start the send with the header */ \
|
||||
msg->sdptr = (char*)&msg->hdr; \
|
||||
msg->sdbytes = sizeof(mca_oob_usock_hdr_t); \
|
||||
/* add to the msg queue for this peer */ \
|
||||
MCA_OOB_USOCK_QUEUE_MSG((p), msg, true); \
|
||||
}while(0);
|
||||
|
||||
/* queue a message to be sent by one of our modules upon completing
|
||||
* the connection process - must provide the following params:
|
||||
*
|
||||
* m - the RML message to be sent
|
||||
* p - the final recipient
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_PENDING(m, p) \
|
||||
do { \
|
||||
mca_oob_usock_send_t *msg; \
|
||||
int i; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue pending to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((m)->dst))); \
|
||||
msg = OBJ_NEW(mca_oob_usock_send_t); \
|
||||
/* setup the header */ \
|
||||
msg->hdr.origin = (m)->origin; \
|
||||
msg->hdr.dst = (m)->dst; \
|
||||
msg->hdr.type = MCA_OOB_USOCK_USER; \
|
||||
msg->hdr.tag = (m)->tag; \
|
||||
msg->hdr.seq_num = (m)->seq_num; \
|
||||
/* point to the actual message */ \
|
||||
msg->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
if (NULL != (m)->buffer) { \
|
||||
msg->hdr.nbytes = (m)->buffer->bytes_used; \
|
||||
} else if (NULL != (m)->iov) { \
|
||||
msg->hdr.nbytes = 0; \
|
||||
for (i=0; i < (m)->count; i++) { \
|
||||
msg->hdr.nbytes += (m)->iov[i].iov_len; \
|
||||
} \
|
||||
} else { \
|
||||
msg->hdr.nbytes = (m)->count; \
|
||||
} \
|
||||
/* start the send with the header */ \
|
||||
msg->sdptr = (char*)&msg->hdr; \
|
||||
msg->sdbytes = sizeof(mca_oob_usock_hdr_t); \
|
||||
/* add to the msg queue for this peer */ \
|
||||
MCA_OOB_USOCK_QUEUE_MSG((p), msg, false); \
|
||||
}while(0);
|
||||
|
||||
/* State machine for processing message */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
int reps;
|
||||
orte_rml_send_t *msg;
|
||||
} mca_oob_usock_msg_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_msg_op_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_POST_SEND(ms, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_msg_op_t *mop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] post send to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((ms)->dst))); \
|
||||
mop = OBJ_NEW(mca_oob_usock_msg_op_t); \
|
||||
mop->msg = (ms); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &mop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), mop); \
|
||||
opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_rml_send_t *rmsg;
|
||||
mca_oob_usock_send_t *snd;
|
||||
orte_process_name_t hop;
|
||||
} mca_oob_usock_msg_error_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_msg_error_t);
|
||||
|
||||
/* macro for reporting delivery errors back to the
|
||||
* component for error handling
|
||||
*
|
||||
* s -> mca_oob_usock_send_t that failed (can be NULL)
|
||||
* r -> orte_rml_send_t that failed (can be NULL)
|
||||
* h -> process name for the next recipient
|
||||
* cbfunc -> function to handle the callback
|
||||
*/
|
||||
#define ORTE_ACTIVATE_USOCK_MSG_ERROR(s, r, h, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_msg_error_t *mop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] post msg error to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((h))); \
|
||||
mop = OBJ_NEW(mca_oob_usock_msg_error_t); \
|
||||
if (NULL != (s)) { \
|
||||
mop->snd = (s); \
|
||||
} else if (NULL != (r)) { \
|
||||
/* use a proxy so we can pass NULL into the macro */ \
|
||||
mop->rmsg = (r); \
|
||||
} \
|
||||
mop->hop.jobid = (h)->jobid; \
|
||||
mop->hop.vpid = (h)->vpid; \
|
||||
opal_event_set(orte_event_base, &mop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), mop); \
|
||||
opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_SENDRECV_H_ */
|
@ -1,7 +0,0 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: INTEL
|
||||
status: maintenance
|
@ -27,7 +27,7 @@
|
||||
*
|
||||
* RML Framework maintenence interface
|
||||
*
|
||||
* Interface for starting / stopping / controlling the RML framework,
|
||||
* Interface for starting / stopping / controlling the RML framework,307
|
||||
* as well as support for modifying RML datatypes.
|
||||
*
|
||||
* @note The only RML datatype exposed to the user is the RML tag.
|
||||
@ -91,16 +91,17 @@ ORTE_DECLSPEC void orte_rml_base_comm_stop(void);
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
int pri;
|
||||
orte_rml_base_module_t *module;
|
||||
mca_base_component_t *component;
|
||||
orte_rml_component_t *component;
|
||||
} orte_rml_base_active_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_base_active_t);
|
||||
|
||||
/* a global struct containing framework-level values */
|
||||
typedef struct {
|
||||
opal_list_t actives; /* list to hold the active plugins */
|
||||
opal_list_t actives; /* list to hold the active components */
|
||||
opal_pointer_array_t conduits; /* array to hold the open conduits */
|
||||
opal_list_t posted_recvs;
|
||||
opal_list_t unmatched_msgs;
|
||||
orte_rml_conduit_t def_conduit_id;
|
||||
#if OPAL_ENABLE_TIMING
|
||||
bool timing;
|
||||
#endif
|
||||
@ -108,17 +109,6 @@ typedef struct {
|
||||
ORTE_DECLSPEC extern orte_rml_base_t orte_rml_base;
|
||||
|
||||
|
||||
/**
|
||||
* List of components that are available to the RML
|
||||
*
|
||||
* List of components that are currently available to the RML
|
||||
* framework. Useable between calls to orte_rml_base_open() and
|
||||
* orte_rml_base_close().
|
||||
*
|
||||
* @note This list should not be used by code outside the RML base.
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_list_t orte_rml_base_components;
|
||||
|
||||
/* structure to send RML messages - used internally */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
@ -153,6 +143,8 @@ typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_rml_send_t send;
|
||||
/* conduit_id */
|
||||
orte_rml_conduit_t conduit_id;
|
||||
} orte_rml_send_request_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_send_request_t);
|
||||
|
||||
@ -222,21 +214,6 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
|
||||
opal_event_active(&(m)->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
/*
|
||||
reactivates rcv msg on the unposted rcvd list when a match occurs
|
||||
need a different path as the QoS recv processing was already done
|
||||
for this process
|
||||
*/
|
||||
#define ORTE_RML_REACTIVATE_MESSAGE(m) \
|
||||
do { \
|
||||
/* setup the event */ \
|
||||
opal_event_set(orte_event_base, &(m)->ev, -1, \
|
||||
OPAL_EV_WRITE, \
|
||||
orte_rml_base_reprocess_msg, (m)); \
|
||||
opal_event_set_priority(&(m)->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&(m)->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_RML_SEND_COMPLETE(m) \
|
||||
do { \
|
||||
opal_output_verbose(5, orte_rml_base_framework.framework_output, \
|
||||
@ -264,47 +241,60 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
|
||||
/* common implementations */
|
||||
ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_process_error(int fd, short flags, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg);
|
||||
|
||||
|
||||
/* Stub API interfaces to cycle through active plugins and call highest priority */
|
||||
ORTE_DECLSPEC int orte_rml_API_enable_comm(void);
|
||||
ORTE_DECLSPEC void orte_rml_API_finalize(void);
|
||||
ORTE_DECLSPEC char* orte_rml_API_get_contact_info(void);
|
||||
ORTE_DECLSPEC void orte_rml_API_set_contact_info(const char *contact_info);
|
||||
ORTE_DECLSPEC int orte_rml_API_ping(const char* contact_info, const struct timeval* tv);
|
||||
ORTE_DECLSPEC int orte_rml_API_send_nb(orte_process_name_t* peer, struct iovec* msg,
|
||||
int count, orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc, void* cbdata);
|
||||
ORTE_DECLSPEC int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
/* Stub API interfaces to cycle through active plugins */
|
||||
char* orte_rml_API_get_contact_info(void);
|
||||
void orte_rml_API_set_contact_info(const char *contact_info);
|
||||
|
||||
int orte_rml_API_ping(const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
int orte_rml_API_ping_conduit(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
int orte_rml_API_send_nb(orte_process_name_t* peer, struct iovec* msg,
|
||||
int count, orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc, void* cbdata);
|
||||
int orte_rml_API_send_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer, struct iovec* msg,
|
||||
int count, orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc, void* cbdata);
|
||||
|
||||
int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
int orte_rml_API_send_buffer_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
|
||||
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
|
||||
|
||||
ORTE_DECLSPEC int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc);
|
||||
void orte_rml_API_purge(orte_process_name_t *peer);
|
||||
|
||||
ORTE_DECLSPEC int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc);
|
||||
int orte_rml_API_query_transports(opal_list_t *providers);
|
||||
|
||||
ORTE_DECLSPEC int orte_rml_API_ft_event(int state);
|
||||
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_API_purge(orte_process_name_t *peer);
|
||||
void orte_rml_API_close_conduit(orte_rml_conduit_t id);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_RML_BASE_H */
|
||||
#endif /* MCA_RML_BASE_H */
|
||||
|
@ -139,7 +139,7 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data)
|
||||
}
|
||||
|
||||
/* if we changed it, then we better update the routing
|
||||
* plan so daemon collectives work correctly
|
||||
* plan so daemon collectives work correctly.
|
||||
*/
|
||||
orte_routed.update_routing_plan();
|
||||
}
|
||||
|
@ -38,21 +38,22 @@
|
||||
|
||||
|
||||
/* Initialising stub fns in the global var used by other modules */
|
||||
orte_rml_base_module_t orte_rml = {
|
||||
orte_rml_API_enable_comm,
|
||||
orte_rml_API_finalize,
|
||||
orte_rml_API_get_contact_info,
|
||||
orte_rml_API_set_contact_info,
|
||||
orte_rml_API_ping,
|
||||
orte_rml_API_send_nb,
|
||||
orte_rml_API_send_buffer_nb,
|
||||
orte_rml_API_recv_nb,
|
||||
orte_rml_API_recv_buffer_nb,
|
||||
orte_rml_API_recv_cancel,
|
||||
orte_rml_API_add_exception_handler,
|
||||
orte_rml_API_del_exception_handler,
|
||||
orte_rml_API_ft_event,
|
||||
orte_rml_API_purge
|
||||
orte_rml_base_API_t orte_rml = {
|
||||
.get_contact_info = orte_rml_API_get_contact_info,
|
||||
.set_contact_info = orte_rml_API_set_contact_info,
|
||||
.ping = orte_rml_API_ping,
|
||||
.ping_conduit = orte_rml_API_ping_conduit,
|
||||
.send_nb = orte_rml_API_send_nb,
|
||||
.send_buffer_nb = orte_rml_API_send_buffer_nb,
|
||||
.send_nb_conduit = orte_rml_API_send_nb_conduit,
|
||||
.send_buffer_nb_conduit = orte_rml_API_send_buffer_nb_conduit,
|
||||
.recv_nb = orte_rml_API_recv_nb,
|
||||
.recv_buffer_nb = orte_rml_API_recv_buffer_nb,
|
||||
.recv_cancel = orte_rml_API_recv_cancel,
|
||||
.purge = orte_rml_API_purge,
|
||||
.query_transports = orte_rml_API_query_transports,
|
||||
.open_conduit = orte_rml_API_open_conduit,
|
||||
.close_conduit = orte_rml_API_close_conduit
|
||||
};
|
||||
|
||||
orte_rml_base_t orte_rml_base = {{{0}}};
|
||||
@ -89,16 +90,24 @@ static void cleanup(int sd, short args, void *cbdata)
|
||||
static int orte_rml_base_close(void)
|
||||
{
|
||||
volatile bool active;
|
||||
int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits);
|
||||
orte_rml_base_module_t *mod;
|
||||
orte_rml_component_t *comp;
|
||||
|
||||
orte_rml_base_active_t *active_module;
|
||||
|
||||
/*close the active modules */
|
||||
OPAL_LIST_FOREACH(active_module, &orte_rml_base.actives, orte_rml_base_active_t)
|
||||
/* cycle thru the conduits opened and call each module's finalize */
|
||||
/* The components finalise/close() will be responsible for freeing the module pointers */
|
||||
for (idx = 0; idx < total_conduits ; idx++)
|
||||
{
|
||||
if (NULL != active_module->module->finalize) {
|
||||
active_module->module->finalize();
|
||||
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits,idx))) {
|
||||
/* close the conduit */
|
||||
comp = (orte_rml_component_t*)mod->component;
|
||||
if (NULL != comp && NULL != comp->close_conduit) {
|
||||
comp->close_conduit(mod);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
OPAL_LIST_DESTRUCT(&orte_rml_base.actives)
|
||||
|
||||
/* because the RML posted recvs list is in a separate
|
||||
@ -130,6 +139,8 @@ static int orte_rml_base_open(mca_base_open_flag_t flags)
|
||||
OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&orte_rml_base.conduits,1,INT_MAX,1);
|
||||
|
||||
OPAL_TIMING_INIT(&tm_rml);
|
||||
/* Open up all available components */
|
||||
@ -145,18 +156,15 @@ OBJ_CLASS_INSTANCE(orte_rml_base_active_t,
|
||||
NULL, NULL);
|
||||
|
||||
/**
|
||||
* Function for selecting one component(plugin) from all those that are
|
||||
* available.
|
||||
* Function for ordering the component(plugin) by priority
|
||||
*/
|
||||
int orte_rml_base_select(void)
|
||||
{
|
||||
mca_base_component_list_item_t *cli=NULL;
|
||||
mca_base_component_t *component=NULL;
|
||||
mca_base_module_t *module=NULL;
|
||||
orte_rml_base_module_t *nmodule;
|
||||
orte_rml_component_t *component=NULL;
|
||||
orte_rml_base_active_t *newmodule, *mod;
|
||||
int priority;
|
||||
bool inserted;
|
||||
opal_list_t conduit_attr;
|
||||
|
||||
if (selected) {
|
||||
return ORTE_SUCCESS;
|
||||
@ -164,59 +172,65 @@ int orte_rml_base_select(void)
|
||||
selected = true;
|
||||
|
||||
OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) {
|
||||
component = (mca_base_component_t *) cli->cli_component;
|
||||
component = (orte_rml_component_t*) cli->cli_component;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"orte_rml_base_select: Initializing %s component %s",
|
||||
component->mca_type_name,
|
||||
component->mca_component_name);
|
||||
component->base.mca_type_name,
|
||||
component->base.mca_component_name);
|
||||
|
||||
if (NULL == ((orte_rml_component_t *)component)->rml_init) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"orte_rml_base_select: no init function; ignoring component [%s]",component->mca_component_name);
|
||||
} else {
|
||||
module = (mca_base_module_t *) ((orte_rml_component_t *)component)->rml_init(&priority);
|
||||
if (NULL == module) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"orte_rml_base_select: init returned failure [%s]",component->mca_component_name);
|
||||
continue;
|
||||
}
|
||||
/* add to the list of available components */
|
||||
newmodule = OBJ_NEW(orte_rml_base_active_t);
|
||||
newmodule->pri = component->priority;
|
||||
newmodule->component = component;
|
||||
|
||||
/* based on priority add it to the actives list */
|
||||
nmodule = (orte_rml_base_module_t*) module;
|
||||
/* add to the list of selected modules */
|
||||
newmodule = OBJ_NEW(orte_rml_base_active_t);
|
||||
newmodule->pri = priority;
|
||||
newmodule->module = nmodule;
|
||||
newmodule->component = component;
|
||||
|
||||
/* maintain priority order */
|
||||
inserted = false;
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (priority > mod->pri) {
|
||||
opal_list_insert_pos(&orte_rml_base.actives,
|
||||
(opal_list_item_t*)mod, &newmodule->super);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!inserted) {
|
||||
/* must be lowest priority - add to end */
|
||||
opal_list_append(&orte_rml_base.actives, &newmodule->super);
|
||||
/* maintain priority order */
|
||||
inserted = false;
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (newmodule->pri > mod->pri) {
|
||||
opal_list_insert_pos(&orte_rml_base.actives,
|
||||
(opal_list_item_t*)mod, &newmodule->super);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) {
|
||||
if (!inserted) {
|
||||
/* must be lowest priority - add to end */
|
||||
opal_list_append(&orte_rml_base.actives, &newmodule->super);
|
||||
}
|
||||
}
|
||||
|
||||
if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) {
|
||||
opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* show the prioritized list */
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
opal_output(0, "\tComponent: %s Priority: %d", mod->component->mca_component_name, mod->pri);
|
||||
opal_output(0, "\tComponent: %s Priority: %d", mod->component->base.mca_component_name, mod->pri);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Open the default oob conduit */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Opening the default conduit - oob component",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
OBJ_CONSTRUCT(&conduit_attr, opal_list_t);
|
||||
orte_set_attribute(&conduit_attr, ORTE_RML_INCLUDE_COMP_ATTRIB, ORTE_ATTR_LOCAL,"oob",OPAL_STRING);
|
||||
orte_rml_base.def_conduit_id = orte_rml_API_open_conduit(&conduit_attr);
|
||||
OPAL_LIST_DESTRUCT(&conduit_attr);
|
||||
if (0 <= orte_rml_base.def_conduit_id) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Default conduit (oob) opened with conduit id = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_rml_base.def_conduit_id);
|
||||
} else {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Default conduit (oob) could not be opened",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void orte_rml_send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
@ -315,3 +329,21 @@ static void prq_des(orte_rml_recv_request_t *ptr)
|
||||
OBJ_CLASS_INSTANCE(orte_rml_recv_request_t,
|
||||
opal_object_t,
|
||||
prq_cons, prq_des);
|
||||
|
||||
static void pthcons(orte_rml_pathway_t *p)
|
||||
{
|
||||
p->component = NULL;
|
||||
OBJ_CONSTRUCT(&p->attributes, opal_list_t);
|
||||
OBJ_CONSTRUCT(&p->transports, opal_list_t);
|
||||
}
|
||||
static void pthdes(orte_rml_pathway_t *p)
|
||||
{
|
||||
if (NULL != p->component) {
|
||||
free(p->component);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&p->attributes);
|
||||
OPAL_LIST_DESTRUCT(&p->transports);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_rml_pathway_t,
|
||||
opal_list_item_t,
|
||||
pthcons, pthdes);
|
||||
|
@ -210,7 +210,7 @@ static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all)
|
||||
*/
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &msg->sender, &rcv->peer) &&
|
||||
msg->tag == rcv->tag) {
|
||||
ORTE_RML_REACTIVATE_MESSAGE(msg);
|
||||
ORTE_RML_ACTIVATE_MESSAGE(msg);
|
||||
opal_list_remove_item(&orte_rml_base.unmatched_msgs, item);
|
||||
if (!get_all) {
|
||||
break;
|
||||
@ -233,19 +233,3 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata)
|
||||
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));
|
||||
orte_rml_base_complete_recv_msg(&msg);
|
||||
}
|
||||
|
||||
void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata)
|
||||
{
|
||||
orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output,
|
||||
"%s reprocessing msg received from %s for tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->sender),
|
||||
msg->tag));
|
||||
|
||||
OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes",
|
||||
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));
|
||||
orte_rml_base_complete_recv_msg ( &msg);
|
||||
/* the msg should be matched and released in this path
|
||||
add an assert (msg!= NULL) ?? */
|
||||
}
|
||||
|
@ -37,72 +37,98 @@
|
||||
* The stub API interface functions
|
||||
*/
|
||||
|
||||
/** Enable communication once a process name has been assigned */
|
||||
int orte_rml_API_enable_comm(void)
|
||||
/** Open a conduit - check if the ORTE_RML_INCLUDE_COMP attribute is provided, this is */
|
||||
/* a comma seperated list of components, try to open the conduit in this order. */
|
||||
/* if the ORTE_RML_INCLUDE_COMP is not provided or this list was not able to open conduit */
|
||||
/* call the open_conduit() of the component in priority order to see if they can use the */
|
||||
/* attribute to open a conduit. */
|
||||
/* Note: The component takes care of checking for duplicate and returning the previously */
|
||||
/* opened module* in case of duplicates. Currently we are saving it in a new conduit_id */
|
||||
/* even if it is duplicate. [ToDo] compare the module* received from component to see if */
|
||||
/* already present in array and return the prev conduit_id instead of adding it again to array */
|
||||
/* @param[in] attributes The attributes is a list of opal_value_t of type OPAL_STRING */
|
||||
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes)
|
||||
{
|
||||
orte_rml_base_active_t *active, *next;
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_component_t *comp;
|
||||
orte_rml_base_module_t *mod, *ourmod=NULL;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:enable_comm",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:open_conduit",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives and let each one enable their comm */
|
||||
OPAL_LIST_FOREACH_SAFE(active, next, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->enable_comm) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->enable_comm())) {
|
||||
/* bozo check - you cannot specify both include and exclude */
|
||||
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, NULL, OPAL_STRING) &&
|
||||
orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, NULL, OPAL_STRING)) {
|
||||
// orte_show_help();
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* cycle thru the actives in priority order and let each one see if they can support this request */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
comp = (orte_rml_component_t *)active->component;
|
||||
if (NULL != comp->open_conduit) {
|
||||
if (NULL != (mod = comp->open_conduit(attributes))) {
|
||||
opal_output_verbose(2, orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:enable_comm Component %s was unable to enable comm",
|
||||
"%s rml:base:open_conduit Component %s provided a conduit",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
active->component->mca_component_name);
|
||||
/* remove this component from our actives */
|
||||
opal_list_remove_item(&orte_rml_base.actives, &active->super);
|
||||
/* give the module a chance to finalize */
|
||||
if (NULL != active->module->finalize) {
|
||||
active->module->finalize();
|
||||
active->component->base.mca_component_name);
|
||||
/* retain this answer */
|
||||
if (NULL != ourmod) {
|
||||
free(ourmod);
|
||||
}
|
||||
OBJ_RELEASE(active);
|
||||
ourmod = mod;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* ensure we still have someone */
|
||||
if (0 < opal_list_get_size(&orte_rml_base.actives)) {
|
||||
return ORTE_SUCCESS;
|
||||
if (NULL != ourmod) {
|
||||
/* we got an answer - store this conduit in our array */
|
||||
rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod);
|
||||
return rc;
|
||||
}
|
||||
return ORTE_ERR_UNREACH;
|
||||
/* we get here if nobody could support it */
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Shutdown the communication system and clean up resources */
|
||||
void orte_rml_API_finalize(void)
|
||||
void orte_rml_API_close_conduit(orte_rml_conduit_t id)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_base_module_t *mod;
|
||||
orte_rml_component_t *comp;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:finalize()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:close_conduit(%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)id);
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->finalize) {
|
||||
active->module->finalize();
|
||||
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) {
|
||||
comp = (orte_rml_component_t*)mod->component;
|
||||
if (NULL != comp && NULL != comp->close_conduit) {
|
||||
comp->close_conduit(mod);
|
||||
}
|
||||
opal_pointer_array_set_item(&orte_rml_base.conduits, id, NULL);
|
||||
free(mod);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Get contact information for local process */
|
||||
char* orte_rml_API_get_contact_info(void)
|
||||
{
|
||||
char **rc = NULL, *tmp;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:get_contact_info()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
/* cycle thru the actives and get their contact info */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->get_contact_info) {
|
||||
tmp = active->module->get_contact_info();
|
||||
if (NULL != active->component->get_contact_info) {
|
||||
tmp = active->component->get_contact_info();
|
||||
if (NULL != tmp) {
|
||||
opal_argv_append_nosize(&rc, tmp);
|
||||
free(tmp);
|
||||
@ -122,44 +148,104 @@ void orte_rml_API_set_contact_info(const char *contact_info)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:set_contact_info()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives and let all modules parse the info
|
||||
* to extract their relevant portions */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->set_contact_info) {
|
||||
active->module->set_contact_info(contact_info);
|
||||
if (NULL != active->component->set_contact_info) {
|
||||
active->component->set_contact_info(contact_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
int orte_rml_API_ping(const char* contact_info,
|
||||
const struct timeval* tv)
|
||||
int orte_rml_API_ping_conduit(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:ping()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:ping(conduit-%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),conduit_id);
|
||||
|
||||
/* cycle thru the actives and see if anyone can confirm connection */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->ping) {
|
||||
rc = active->module->ping(contact_info, tv);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* at least someone can reach this target */
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->ping) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->ping((struct orte_rml_base_module_t*)mod, contact_info, tv);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
int orte_rml_API_ping(const char* contact_info,
|
||||
const struct timeval* tv)
|
||||
{
|
||||
return orte_rml_API_ping_conduit(orte_rml_base.def_conduit_id, contact_info, tv);
|
||||
}
|
||||
|
||||
/** Send non-blocking iovec message through a specific conduit*/
|
||||
int orte_rml_API_send_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_nb_conduit() to peer %s through conduit %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),conduit_id);
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->send_nb) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->send_nb((struct orte_rml_base_module_t*)mod, peer, msg, count, tag, cbfunc, cbdata);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
int orte_rml_API_send_buffer_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_buffer_nb_conduit() to peer %s through conduit %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),conduit_id);
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->send_buffer_nb) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->send_buffer_nb((struct orte_rml_base_module_t*)mod, peer, buffer, tag, cbfunc, cbdata);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Send non-blocking iovec message through a specific conduit*/
|
||||
int orte_rml_API_send_nb(orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
@ -167,25 +253,7 @@ int orte_rml_API_send_nb(orte_process_name_t* peer,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_nb() to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->send_nb) {
|
||||
rc = active->module->send_nb(peer, msg, count, tag, cbfunc, cbdata);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* someone was able to send it */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
return orte_rml_API_send_nb_conduit(orte_rml_base.def_conduit_id, peer, msg, count, tag, cbfunc, cbdata);
|
||||
}
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
@ -195,48 +263,26 @@ int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_buffer_nb()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->send_buffer_nb) {
|
||||
if (ORTE_SUCCESS == (rc = active->module->send_buffer_nb(peer, buffer, tag, cbfunc, cbdata))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
return orte_rml_API_send_buffer_nb_conduit(orte_rml_base.def_conduit_id, peer, buffer, tag, cbfunc, cbdata);
|
||||
}
|
||||
|
||||
|
||||
/** post a receive for an IOV message */
|
||||
/** post a receive for an IOV message - this is done
|
||||
* strictly in the base, and so it does not go to a module */
|
||||
void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag));
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* cycle thru the actives and give each module a chance
|
||||
* to do whatever module-specific things they need to do */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->recv_nb) {
|
||||
active->module->recv_nb(peer, tag, persistent, cbfunc, cbdata);
|
||||
}
|
||||
}
|
||||
/* now push the request into the event base so we can add
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = false;
|
||||
@ -260,23 +306,14 @@ void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_buffer_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag));
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* cycle thru the actives and give each module a chance
|
||||
* to do whatever module-specific things they need to do */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->recv_buffer_nb) {
|
||||
active->module->recv_buffer_nb(peer, tag, persistent, cbfunc, cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
/* now push the request into the event base so we can add
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = true;
|
||||
@ -296,23 +333,14 @@ void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
/** Cancel posted non-blocking receive */
|
||||
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_cancel for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag));
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* cycle thru the actives and give each module a chance
|
||||
* to do whatever module-specific things they need to do */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->recv_cancel) {
|
||||
active->module->recv_cancel(peer,tag);
|
||||
}
|
||||
}
|
||||
|
||||
/* now push the request into the event base so we can remove
|
||||
/* push the request into the event base so we can remove
|
||||
* the receive from our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->cancel = true;
|
||||
@ -326,83 +354,46 @@ void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
|
||||
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
|
||||
}
|
||||
|
||||
/** Add callback for communication exception */
|
||||
int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
int rc = ORTE_ERROR;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:add_exception_handler()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->add_exception_handler) {
|
||||
if (ORTE_SUCCESS == (rc = active->module->add_exception_handler(cbfunc))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Delete callback for communication exception */
|
||||
int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
int rc = ORTE_ERROR;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:del_exception_handler()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->del_exception_handler) {
|
||||
if (ORTE_SUCCESS == (rc = active->module->del_exception_handler(cbfunc))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Fault tolerance handler */
|
||||
int orte_rml_API_ft_event(int state)
|
||||
{
|
||||
int rc = ORTE_ERROR;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:ft_event()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and let them all handle this event */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->ft_event) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/** Purge information */
|
||||
void orte_rml_API_purge(orte_process_name_t *peer)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_base_module_t *mod;
|
||||
int i;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:purge() - calling the respective plugin that implements this",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and let everyone purge related info */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->purge) {
|
||||
active->module->purge(peer);
|
||||
for (i=0; i < orte_rml_base.conduits.size; i++) {
|
||||
/* get the module */
|
||||
if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, i))) {
|
||||
if (NULL != mod->purge) {
|
||||
mod->purge(peer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int orte_rml_API_query_transports(opal_list_t *providers)
|
||||
{
|
||||
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_pathway_t *p;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:orte_rml_API_query_transports()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->component->query_transports) {
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"\n calling module: %s->query_transports() \n",
|
||||
active->component->base.mca_component_name);
|
||||
if (NULL != (p = active->component->query_transports())) {
|
||||
/* pass the results across */
|
||||
OBJ_RETAIN(p);
|
||||
opal_list_append(providers, &p->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
}
|
||||
|
0
orte/mca/rml/ofi/.opal_ignore
Обычный файл
0
orte/mca/rml/ofi/.opal_ignore
Обычный файл
@ -10,9 +10,7 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved
|
||||
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -21,36 +19,35 @@
|
||||
#
|
||||
|
||||
sources = \
|
||||
oob_usock_component.h \
|
||||
oob_usock.h \
|
||||
oob_usock_component.c \
|
||||
oob_usock_connection.h \
|
||||
oob_usock_sendrecv.h \
|
||||
oob_usock_hdr.h \
|
||||
oob_usock_peer.h \
|
||||
oob_usock_ping.h \
|
||||
oob_usock.c \
|
||||
oob_usock_connection.c \
|
||||
oob_usock_sendrecv.c
|
||||
rml_ofi.h \
|
||||
rml_ofi_request.h \
|
||||
rml_ofi_component.c \
|
||||
rml_ofi_send.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_oob_usock_DSO
|
||||
if MCA_BUILD_orte_rml_ofi_DSO
|
||||
component_noinst =
|
||||
component_install = mca_oob_usock.la
|
||||
component_install = mca_rml_ofi.la
|
||||
else
|
||||
component_noinst = libmca_oob_usock.la
|
||||
component_noinst = libmca_rml_ofi.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ortelibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_oob_usock_la_SOURCES = $(sources)
|
||||
mca_oob_usock_la_LDFLAGS = -module -avoid-version
|
||||
mca_rml_ofi_la_SOURCES = $(sources)
|
||||
mca_rml_ofi_la_LDFLAGS = \
|
||||
$(orte_rml_ofi_LDFLAGS) \
|
||||
-module -avoid-version
|
||||
mca_rml_ofi_la_LIBADD = $(orte_rml_ofi_LIBS) \
|
||||
$(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib@OPAL_LIB_PREFIX@mca_common_libfabric.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_oob_usock_la_SOURCES = $(sources)
|
||||
libmca_oob_usock_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
libmca_rml_ofi_la_SOURCES = $(sources)
|
||||
libmca_rml_ofi_la_LDFLAGS = \
|
||||
$(orte_rml_ofi_LDFLAGS) \
|
||||
-module -avoid-version
|
||||
libmca_rml_ofi_la_LIBADD = $(orte_rml_ofi_LIBS)
|
29
orte/mca/rml/ofi/configure.m4
Обычный файл
29
orte/mca/rml/ofi/configure.m4
Обычный файл
@ -0,0 +1,29 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
#
|
||||
# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_orte_rml_ofi_POST_CONFIG(will_build)
|
||||
# ----------------------------------------
|
||||
# Only require the tag if we're actually going to be built
|
||||
|
||||
# MCA_mtl_ofi_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_rml_ofi_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/rml/ofi/Makefile])
|
||||
|
||||
# ensure we already ran the common libfabric config
|
||||
AC_REQUIRE([MCA_opal_common_libfabric_CONFIG])
|
||||
|
||||
AS_IF([test "$opal_common_libfabric_happy" = "yes"],
|
||||
[$1],
|
||||
[$2])
|
||||
])dnl
|
191
orte/mca/rml/ofi/rml_ofi.h
Обычный файл
191
orte/mca/rml/ofi/rml_ofi.h
Обычный файл
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_RML_OFI_RML_OFI_H
|
||||
#define MCA_RML_OFI_RML_OFI_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
|
||||
#include <rdma/fabric.h>
|
||||
#include <rdma/fi_cm.h>
|
||||
#include <rdma/fi_domain.h>
|
||||
#include <rdma/fi_endpoint.h>
|
||||
#include <rdma/fi_errno.h>
|
||||
#include <rdma/fi_tagged.h>
|
||||
|
||||
#include "rml_ofi_request.h"
|
||||
|
||||
|
||||
/** RML/OFI key values **/
|
||||
/* (char*) ofi socket address (type IN) of the node process is running on */
|
||||
#define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin"
|
||||
/* (char*) ofi socket address (type PSM) of the node process is running on */
|
||||
#define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx"
|
||||
|
||||
// MULTI_BUF_SIZE_FACTOR defines how large the multi recv buffer will be.
|
||||
// In order to use FI_MULTI_RECV feature efficiently, we need to have a
|
||||
// large recv buffer so that we don't need to repost the buffer often to
|
||||
// get the remaining data when the buffer is full
|
||||
#define MULTI_BUF_SIZE_FACTOR 128
|
||||
#define MIN_MULTI_BUF_SIZE (1024 * 1024)
|
||||
|
||||
#define CLOSE_FID(fd) \
|
||||
do { \
|
||||
int _ret = 0; \
|
||||
if (0 != (fd)) { \
|
||||
_ret = fi_close(&(fd)->fid); \
|
||||
fd = NULL; \
|
||||
if (0 != _ret) { \
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output, \
|
||||
" %s - fi_close failed with error- %d", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret); \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
|
||||
#define RML_OFI_RETRY_UNTIL_DONE(FUNC) \
|
||||
do { \
|
||||
do { \
|
||||
ret = FUNC; \
|
||||
if(OPAL_LIKELY(0 == ret)) {break;} \
|
||||
} while(-FI_EAGAIN == ret); \
|
||||
} while(0);
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct orte_rml_ofi_module_t;
|
||||
|
||||
/** This structure will hold the ep and all ofi objects for each transport
|
||||
and also the corresponding fi_info
|
||||
**/
|
||||
typedef struct {
|
||||
|
||||
/** OFI conduit ID **/
|
||||
uint8_t conduit_id;
|
||||
|
||||
/** fi_info for this transport */
|
||||
struct fi_info *fabric_info;
|
||||
|
||||
/** Fabric Domain handle */
|
||||
struct fid_fabric *fabric;
|
||||
|
||||
/** Access Domain handle */
|
||||
struct fid_domain *domain;
|
||||
|
||||
/** Address vector handle */
|
||||
struct fid_av *av;
|
||||
|
||||
/** Completion queue handle */
|
||||
struct fid_cq *cq;
|
||||
|
||||
/** Endpoint to communicate on */
|
||||
struct fid_ep *ep;
|
||||
|
||||
/** Endpoint name */
|
||||
char ep_name[FI_NAME_MAX];
|
||||
|
||||
/** Endpoint name length */
|
||||
size_t epnamelen;
|
||||
|
||||
/** OFI memory region */
|
||||
struct fid_mr *mr_multi_recv;
|
||||
|
||||
/** buffer for tx and rx */
|
||||
void *rxbuf;
|
||||
|
||||
uint64_t rxbuf_size;
|
||||
|
||||
/* event,fd associated with the cq */
|
||||
int fd;
|
||||
|
||||
/*event associated with progress fn */
|
||||
opal_event_t progress_event;
|
||||
bool progress_ev_active;
|
||||
|
||||
struct fi_context rx_ctx1;
|
||||
|
||||
/* module associated with this conduit_id returned to rml
|
||||
from open_conduit call */
|
||||
struct orte_rml_ofi_module_t *ofi_module;
|
||||
|
||||
} ofi_transport_conduit_t;
|
||||
|
||||
|
||||
struct orte_rml_ofi_module_t {
|
||||
orte_rml_base_module_t api;
|
||||
|
||||
/** current ofi transport id the component is using, this will be initialised
|
||||
** in the open_conduit() call **/
|
||||
int cur_transport_id;
|
||||
|
||||
/** Fabric info structure of all supported transports in system **/
|
||||
struct fi_info *fi_info_list;
|
||||
|
||||
/** OFI ep and corr fi_info for all the transports (conduit) **/
|
||||
ofi_transport_conduit_t ofi_conduits[MAX_CONDUIT];
|
||||
|
||||
size_t min_ofi_recv_buf_sz;
|
||||
|
||||
/** "Any source" address */
|
||||
fi_addr_t any_addr;
|
||||
|
||||
/** number of conduits currently opened **/
|
||||
uint8_t conduit_open_num;
|
||||
|
||||
/** Unique message id for every message that is fragmented to be sent over OFI **/
|
||||
uint32_t cur_msgid;
|
||||
|
||||
opal_list_t recv_msg_queue_list;
|
||||
opal_list_t queued_routing_messages;
|
||||
opal_event_t *timer_event;
|
||||
struct timeval timeout;
|
||||
} ;
|
||||
typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t;
|
||||
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ofi_component;
|
||||
|
||||
int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
int orte_rml_ofi_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* iov,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/****************** INTERNAL OFI Functions*************/
|
||||
void free_conduit_resources( int conduit_id);
|
||||
void print_provider_list_info (struct fi_info *fi );
|
||||
|
||||
/** Send callback */
|
||||
int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc,
|
||||
orte_rml_ofi_request_t*);
|
||||
|
||||
/** Error callback */
|
||||
int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
|
||||
orte_rml_ofi_request_t*);
|
||||
|
||||
/* OFI Recv handler */
|
||||
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t conduit_id);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
1088
orte/mca/rml/ofi/rml_ofi_component.c
Обычный файл
1088
orte/mca/rml/ofi/rml_ofi_component.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
127
orte/mca/rml/ofi/rml_ofi_request.h
Обычный файл
127
orte/mca/rml/ofi/rml_ofi_request.h
Обычный файл
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_RML_OFI_REQUEST_H
|
||||
#define ORTE_RML_OFI_REQUEST_H
|
||||
|
||||
|
||||
#define TO_OFI_REQ(_ptr_ctx) \
|
||||
container_of((_ptr_ctx), orte_rml_ofi_request_t, ctx)
|
||||
|
||||
typedef enum {
|
||||
ORTE_RML_OFI_SEND,
|
||||
ORTE_RML_OFI_RECV,
|
||||
ORTE_RML_OFI_ACK,
|
||||
ORTE_RML_OFI_PROBE
|
||||
} orte_rml_ofi_request_type_t;
|
||||
/* orte_rml_ofi_msg_header_t contains the header information for the message being sent.
|
||||
The header and data is passed on to the destination. The destination will re-construct the
|
||||
orte_rml_sent_t struct once it receives this header and data.This header has the required information
|
||||
to construct the orte_rml_sent_t struct and also if the message is split into packets,
|
||||
then the packet information - total number of packets and the current packet number.
|
||||
*/
|
||||
struct orte_rml_ofi_msg_header_t{
|
||||
opal_process_name_t origin; // originator process id from the send message
|
||||
opal_process_name_t dst; // Destination process id from the send message
|
||||
uint32_t seq_num; // seq_num from the send message
|
||||
orte_rml_tag_t tag; // tag from the send message
|
||||
uint32_t msgid; // unique msgid added by ofi plugin to keep track of fragmented msgs
|
||||
uint32_t tot_pkts; // total packets this msg will be fragmented into by ofi plugin
|
||||
uint32_t cur_pkt_num; // current packet number
|
||||
};
|
||||
typedef struct orte_rml_ofi_msg_header_t orte_rml_ofi_msg_header_t;
|
||||
|
||||
/*
|
||||
orte_rml_ofi_pkts_t defines the packets in the message. Each packet contains header information
|
||||
and the data. Create a list of packets to hold the entire message.
|
||||
*/
|
||||
typedef struct {
|
||||
//list_item_t
|
||||
opal_list_item_t super;
|
||||
/* header + data size */
|
||||
size_t pkt_size;
|
||||
//header + data
|
||||
void *data;
|
||||
}orte_rml_ofi_send_pkt_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_ofi_send_pkt_t);
|
||||
|
||||
/*
|
||||
orte_rml_ofi_recv_pkt_t defines the packets in the receiving end of message.
|
||||
Each packet contains the packet number and the data.
|
||||
Create a list of packets to hold the entire message.
|
||||
*/
|
||||
typedef struct {
|
||||
//list_item_t
|
||||
opal_list_item_t super;
|
||||
/* current packet number */
|
||||
uint32_t cur_pkt_num;
|
||||
/*data size */
|
||||
size_t pkt_size;
|
||||
//data
|
||||
void *data;
|
||||
}orte_rml_ofi_recv_pkt_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_ofi_recv_pkt_t);
|
||||
|
||||
/*
|
||||
orte_rml_ofi_request_t holds the send request (orte_rml_send_t)
|
||||
*/
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
|
||||
/** OFI context */
|
||||
struct fi_context ctx;
|
||||
|
||||
orte_rml_send_t *send;
|
||||
|
||||
/** OFI conduit_id the request will use - this is
|
||||
* the reference to element into the orte_rml_ofi.ofi_conduits[] **/
|
||||
uint8_t conduit_id;
|
||||
|
||||
/** OFI Request type */
|
||||
orte_rml_ofi_request_type_t type;
|
||||
|
||||
/** Completion count used by blocking and/or synchronous operations */
|
||||
volatile int completion_count;
|
||||
|
||||
/** Reference to the RML used to lookup */
|
||||
/* source of an ANY_SOURCE Recv */
|
||||
struct orte_rml_base_module_t* rml;
|
||||
|
||||
/** header being sent **/
|
||||
orte_rml_ofi_msg_header_t hdr;
|
||||
|
||||
/** Pack buffer */
|
||||
void *data_blob;
|
||||
|
||||
/** Pack buffer size */
|
||||
size_t length;
|
||||
|
||||
/** Header and data in a list of Packets orte_rml_ofi_send_pkt_t */
|
||||
opal_list_t pkt_list;
|
||||
|
||||
} orte_rml_ofi_request_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_ofi_request_t);
|
||||
|
||||
|
||||
/* This will hold all the pckts received at the destination.
|
||||
Each entry will be indexed by [sender,msgid] and will have
|
||||
all the packets for that msgid and sender.
|
||||
*/
|
||||
typedef struct {
|
||||
|
||||
opal_list_item_t super; //list_item_t
|
||||
uint32_t msgid; // unique msgid added by ofi plugin to keep track of fragmented msgs
|
||||
opal_process_name_t sender; // originator process id from the send message
|
||||
uint32_t tot_pkts; // total packets this msg will be fragmented into by ofi plugin
|
||||
uint32_t pkt_recd; // current packet number
|
||||
opal_list_t pkt_list; // list holding Packets in this msg of type orte_rml_ofi_recv_pkt_t
|
||||
} ofi_recv_msg_queue_t;
|
||||
OBJ_CLASS_DECLARATION( ofi_recv_msg_queue_t);
|
||||
|
||||
#endif
|
665
orte/mca/rml/ofi/rml_ofi_send.c
Обычный файл
665
orte/mca/rml/ofi/rml_ofi_send.c
Обычный файл
@ -0,0 +1,665 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
|
||||
#include <rdma/fabric.h>
|
||||
#include <rdma/fi_cm.h>
|
||||
#include <rdma/fi_domain.h>
|
||||
#include <rdma/fi_endpoint.h>
|
||||
#include <rdma/fi_errno.h>
|
||||
#include <rdma/fi_tagged.h>
|
||||
|
||||
#include "rml_ofi.h"
|
||||
|
||||
|
||||
static void ofi_req_cons(orte_rml_ofi_request_t *ptr)
|
||||
{
|
||||
OBJ_CONSTRUCT(&ptr->pkt_list, opal_list_t);
|
||||
}
|
||||
static void ofi_req_des(orte_rml_ofi_request_t *ptr)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&ptr->pkt_list);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_rml_ofi_request_t,
|
||||
opal_object_t,
|
||||
ofi_req_cons, ofi_req_des);
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_rml_ofi_send_pkt_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_rml_ofi_recv_pkt_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void ofi_recv_msg_queue_cons(ofi_recv_msg_queue_t *ptr)
|
||||
{
|
||||
ptr->msgid = 0;
|
||||
ptr->tot_pkts = 1;
|
||||
ptr->pkt_recd = 0;
|
||||
OBJ_CONSTRUCT(&ptr->pkt_list, opal_list_t);
|
||||
}
|
||||
static void ofi_recv_msg_queue_des(ofi_recv_msg_queue_t *ptr)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&ptr->pkt_list);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(ofi_recv_msg_queue_t,
|
||||
opal_list_item_t,
|
||||
ofi_recv_msg_queue_cons, ofi_recv_msg_queue_des);
|
||||
|
||||
/** Send callback */
|
||||
/* [Desc] This is called from the progress fn when a send completion
|
||||
** is received in the cq
|
||||
** wc [in] : the completion queue data entry
|
||||
** ofi_send_req [in]: ofi send request with the send msg and callback
|
||||
*/
|
||||
int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc,
|
||||
orte_rml_ofi_request_t* ofi_req)
|
||||
{
|
||||
orte_rml_ofi_send_pkt_t *ofi_send_pkt, *next;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s orte_rml_ofi_send_callback called, completion count = %d, msgid = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_req->completion_count, ofi_req->hdr.msgid);
|
||||
assert(ofi_req->completion_count > 0);
|
||||
ofi_req->completion_count--;
|
||||
if ( 0 == ofi_req->completion_count ) {
|
||||
// call the callback fn of the sender
|
||||
ofi_req->send->status = ORTE_SUCCESS;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s calling ORTE_RML_SEND_COMPLETE macro for msgid = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_req->hdr.msgid);
|
||||
ORTE_RML_SEND_COMPLETE(ofi_req->send);
|
||||
OPAL_LIST_FOREACH_SAFE(ofi_send_pkt, next, &ofi_req->pkt_list, orte_rml_ofi_send_pkt_t) {
|
||||
free( ofi_send_pkt->data);
|
||||
ofi_send_pkt->pkt_size=0;
|
||||
opal_list_remove_item(&ofi_req->pkt_list, &ofi_send_pkt->super);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Removed pkt from list ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
OBJ_RELEASE(ofi_send_pkt);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Released packet ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
}
|
||||
free(ofi_req->data_blob);
|
||||
OBJ_RELEASE(ofi_req);
|
||||
}
|
||||
|
||||
// [TODO] need to check for error before returning success
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/** Error callback */
|
||||
/* [Desc] This is called from the progress fn when a send completion
|
||||
** is received in the cq
|
||||
** wc [in] : the completion queue data entry
|
||||
** ofi_send_req [in]: ofi send request with the send msg and callback
|
||||
*/
|
||||
int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
|
||||
orte_rml_ofi_request_t* ofi_req)
|
||||
{
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s orte_rml_ofi_error_callback called ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
switch(error->err) {
|
||||
default:
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
ofi_req->send->status = ORTE_ERR_CONDUIT_SEND_FAIL;
|
||||
ORTE_RML_SEND_COMPLETE(ofi_req->send);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/** Recv handler */
|
||||
/* [Desc] This is called from the progress fn when a recv completion
|
||||
** is received in the cq
|
||||
** wc [in] : the completion queue data entry */
|
||||
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t conduit_id)
|
||||
{
|
||||
orte_rml_ofi_msg_header_t msg_hdr;
|
||||
uint32_t msglen, datalen = 0;
|
||||
char *data, *totdata, *nextpkt;
|
||||
ofi_recv_msg_queue_t *recv_msg_queue, *new_msg;
|
||||
orte_rml_ofi_recv_pkt_t *ofi_recv_pkt, *new_pkt, *next;
|
||||
bool msg_in_queue = false;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s orte_rml_ofi_recv_handler called ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
/*copy the header and data from buffer and pass it on
|
||||
** since this is the conduit recv buffer don't want it to be released as
|
||||
** considering re-using it, so for now copying to newly allocated *data
|
||||
** the *data will be released by orte_rml_base functions */
|
||||
|
||||
memcpy(&msg_hdr,wc->buf,sizeof(orte_rml_ofi_msg_header_t));
|
||||
msglen = wc->len - sizeof(orte_rml_ofi_msg_header_t);
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Received packet -> msg id = %d wc->len = %d, msglen = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid, wc->len, msglen );
|
||||
data = (char *)malloc(msglen);
|
||||
memcpy(data,(wc->buf+sizeof(orte_rml_ofi_msg_header_t)),msglen);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s header info of received packet -> cur_pkt_num = %d, tot_pkts = %d ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.cur_pkt_num, msg_hdr.tot_pkts );
|
||||
/* To accomodate message bigger than recv buffer size,
|
||||
check if current message is in multiple blocks and append them before sending it to RML */
|
||||
if ( msg_hdr.tot_pkts == 1) {
|
||||
/* Since OFI is point-to-point, no need to check if the intended destination is me
|
||||
send to RML */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Posting Recv for msgid %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
ORTE_RML_POST_MESSAGE(&msg_hdr.origin, msg_hdr.tag, msg_hdr.seq_num,data,msglen);
|
||||
} else {
|
||||
msg_in_queue = false;
|
||||
new_pkt = OBJ_NEW(orte_rml_ofi_recv_pkt_t);
|
||||
new_pkt->cur_pkt_num = msg_hdr.cur_pkt_num;
|
||||
new_pkt->pkt_size = msglen;
|
||||
new_pkt->data = data;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Just beofe checking if this message-pkt is already in queue. msgid-%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
/* check if the queue has the [msgid,sender] entry */
|
||||
OPAL_LIST_FOREACH(recv_msg_queue, &orte_rml_ofi.recv_msg_queue_list, ofi_recv_msg_queue_t) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Checking msgid-%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid );
|
||||
if( (recv_msg_queue->msgid == msg_hdr.msgid) && (recv_msg_queue->sender.jobid == msg_hdr.origin.jobid)
|
||||
&& (recv_msg_queue->sender.vpid == msg_hdr.origin.vpid) ) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Found Msg entry in queue for msgid %d, sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid, recv_msg_queue->sender.vpid);
|
||||
msg_in_queue = true;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s msgid %d, tot_pkts=%d, opal_list_get_size()=%d,total pkt_recd=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->tot_pkts,
|
||||
opal_list_get_size(&recv_msg_queue->pkt_list), recv_msg_queue->pkt_recd );
|
||||
if( recv_msg_queue->tot_pkts == (recv_msg_queue->pkt_recd +1) ) {
|
||||
/* all packets received for this message - post message to rml and remove this from queue */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s All packets recd for msgid %d, tot_pkts=%d, opal_list_get_size()=%d,total pkt_recd=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->tot_pkts,
|
||||
opal_list_get_size(&recv_msg_queue->pkt_list), recv_msg_queue->pkt_recd );
|
||||
totdata = NULL;
|
||||
datalen = 0;
|
||||
OPAL_LIST_FOREACH(ofi_recv_pkt, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding data for packet %d, pktlength = %d, cumulative datalen so far = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num, ofi_recv_pkt->pkt_size, datalen );
|
||||
if (0 == datalen) {
|
||||
totdata = (char *)malloc(ofi_recv_pkt->pkt_size);
|
||||
if( totdata == NULL) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Error: malloc failed for msgid %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),recv_msg_queue->msgid );
|
||||
return 1; //[TODO: error-handling needs to be implemented
|
||||
}
|
||||
memcpy(totdata,ofi_recv_pkt->data,ofi_recv_pkt->pkt_size);
|
||||
|
||||
} else {
|
||||
totdata = realloc(totdata,datalen+ofi_recv_pkt->pkt_size);
|
||||
if (NULL != totdata ) {
|
||||
memcpy((totdata+datalen),ofi_recv_pkt->data,ofi_recv_pkt->pkt_size);
|
||||
} else {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Error: realloc failed for msgid %d, from sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid,
|
||||
recv_msg_queue->sender.vpid);
|
||||
return 1; //[TODO: error-handling needs to be implemented
|
||||
}
|
||||
}
|
||||
datalen += ofi_recv_pkt->pkt_size;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s packet %d done, datalen = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num,datalen);
|
||||
}
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding leftover data recd, datalen = %d, new_pkt->pkt_size = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size);
|
||||
//add the last packet
|
||||
totdata =realloc(totdata,datalen+new_pkt->pkt_size);
|
||||
if( NULL != totdata ) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Realloc completed for leftover data recd, datalen = %d, new->pkt->pkt_size = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size);
|
||||
nextpkt = totdata+datalen;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s totdata = %p,nextpkt = %p ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), totdata, nextpkt);
|
||||
memcpy(nextpkt,new_pkt->data,new_pkt->pkt_size);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s memcpy completed for leftover data recd, datalen = %d, new->pkt->pkt_size = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size);
|
||||
datalen += new_pkt->pkt_size;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Posting Recv for msgid %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
ORTE_RML_POST_MESSAGE(&msg_hdr.origin, msg_hdr.tag, msg_hdr.seq_num,totdata,datalen);\
|
||||
|
||||
// free the pkts
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s msgid %d - posting recv completed, freeing packets",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
OPAL_LIST_FOREACH_SAFE(ofi_recv_pkt, next, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) {
|
||||
free( ofi_recv_pkt->data);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s freed data for packet %d",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num );
|
||||
ofi_recv_pkt->pkt_size=0;
|
||||
opal_list_remove_item(&recv_msg_queue->pkt_list, &ofi_recv_pkt->super);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Removed pkt from list ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
OBJ_RELEASE(ofi_recv_pkt);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Released packet ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
}
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s freeing packets completed",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
//free the msg from the queue-list
|
||||
opal_list_remove_item(&orte_rml_ofi.recv_msg_queue_list,&recv_msg_queue->super);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Successfully removed msg from queue",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
OBJ_RELEASE(recv_msg_queue);
|
||||
} else {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Error: realloc failed for msgid %d, from sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid,
|
||||
recv_msg_queue->sender.vpid);
|
||||
return 1; //[TODO: error-handling needs to be implemented
|
||||
}
|
||||
} else {
|
||||
/* add this packet to the msg in the queue ordered by cur_pkt_num */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding packet to list, msgid %d, pkt - %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, msg_hdr.cur_pkt_num );
|
||||
|
||||
bool pkt_added = false;
|
||||
OPAL_LIST_FOREACH(ofi_recv_pkt, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) {
|
||||
if( msg_hdr.cur_pkt_num < ofi_recv_pkt->cur_pkt_num ) {
|
||||
opal_list_insert_pos(&recv_msg_queue->pkt_list, (opal_list_item_t*)ofi_recv_pkt, &new_pkt->super);
|
||||
recv_msg_queue->pkt_recd++;
|
||||
pkt_added = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!pkt_added) {
|
||||
opal_list_append(&recv_msg_queue->pkt_list,&new_pkt->super);
|
||||
recv_msg_queue->pkt_recd++;
|
||||
}
|
||||
}
|
||||
}
|
||||
break; //we found the msg or added it so exit out of the msg_queue loop
|
||||
}
|
||||
if( !msg_in_queue ) {
|
||||
/*add to the queue as this is the first packet for [msgid,sender] */
|
||||
new_msg = OBJ_NEW(ofi_recv_msg_queue_t);
|
||||
new_msg->msgid = msg_hdr.msgid;
|
||||
new_msg->sender = msg_hdr.origin;
|
||||
new_msg->tot_pkts = msg_hdr.tot_pkts;
|
||||
new_msg->pkt_recd = 1;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding first Msg entry in queue for msgid %d, sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), new_msg->msgid, new_msg->sender.jobid, new_msg->sender.vpid);
|
||||
opal_list_append(&new_msg->pkt_list, &new_pkt->super);
|
||||
opal_list_append(&orte_rml_ofi.recv_msg_queue_list, &new_msg->super);
|
||||
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static void send_msg(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata;
|
||||
orte_process_name_t *peer = &(req->send.dst);
|
||||
orte_rml_tag_t tag = req->send.tag;
|
||||
char *dest_ep_name;
|
||||
size_t dest_ep_namelen = 0;
|
||||
int ret = OPAL_ERROR;
|
||||
uint32_t total_packets;
|
||||
fi_addr_t dest_fi_addr;
|
||||
orte_rml_send_t *snd;
|
||||
orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t );
|
||||
uint8_t conduit_id = req->conduit_id;
|
||||
orte_rml_ofi_send_pkt_t* ofi_msg_pkt;
|
||||
size_t datalen_per_pkt, hdrsize, data_in_pkt; // the length of data in per packet excluding the header size
|
||||
|
||||
|
||||
snd = OBJ_NEW(orte_rml_send_t);
|
||||
snd->dst = *peer;
|
||||
snd->origin = *ORTE_PROC_MY_NAME;
|
||||
snd->tag = tag;
|
||||
if (NULL != req->send.iov) {
|
||||
snd->iov = req->send.iov;
|
||||
snd->count = req->send.count;
|
||||
snd->cbfunc.iov = req->send.cbfunc.iov;
|
||||
} else {
|
||||
snd->buffer = req->send.buffer;
|
||||
snd->cbfunc.buffer = req->send.cbfunc.buffer;
|
||||
}
|
||||
snd->cbdata = req->send.cbdata;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s send_msg_transport to peer %s at tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
|
||||
/* get the peer address by doing modex_receive */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s calling OPAL_MODEX_RECV_STRING ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
switch ( orte_rml_ofi.ofi_conduits[conduit_id].fabric_info->addr_format)
|
||||
{
|
||||
case FI_SOCKADDR_IN :
|
||||
OPAL_MODEX_RECV_STRING(ret, OPAL_RML_OFI_FI_SOCKADDR_IN, peer , (char **) &dest_ep_name, &dest_ep_namelen);
|
||||
/*print the sockaddr - port and s_addr */
|
||||
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*) dest_ep_name;
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s obtained for peer %s port = 0x%printinx, InternetAddr = %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),ntohs(ep_sockaddr->sin_port),
|
||||
inet_ntoa(ep_sockaddr->sin_addr));
|
||||
break;
|
||||
case FI_ADDR_PSMX :
|
||||
OPAL_MODEX_RECV_STRING(ret, OPAL_RML_OFI_FI_ADDR_PSMX, peer , (char **) &dest_ep_name, &dest_ep_namelen);
|
||||
break;
|
||||
default:
|
||||
/* we shouldn't be getting here as only above are supported and address sent
|
||||
* to PMIX (OPAL_MODEX_SEND) in orte_component_init() */
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Error: Unhandled address format type in ofi_send_msg", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
return;
|
||||
}
|
||||
opal_output_verbose(50, orte_rml_base_framework.framework_output,
|
||||
"%s Return value from OPAL_MODEX_RECV_STRING - %d, length returned - %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret, dest_ep_namelen);
|
||||
|
||||
|
||||
if ( OPAL_SUCCESS == ret) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), dest_ep_namelen);
|
||||
ret = fi_av_insert(orte_rml_ofi.ofi_conduits[conduit_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL);
|
||||
if( ret != 1) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s fi_av_insert failed in send_msg() returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret );
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
//OBJ_RELEASE( ofi_send_req);
|
||||
return;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s OPAL_MODEX_RECV failed to obtain %s peer ep name ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer));
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
//OBJ_RELEASE( ofi_send_req);
|
||||
return;
|
||||
}
|
||||
|
||||
ofi_send_req->send = snd;
|
||||
ofi_send_req->completion_count = 1;
|
||||
|
||||
/* [DESC] we want to send the pid,seqnum,tag in addition to the data
|
||||
* copy all of this to header of message from the ofi_send_t* send
|
||||
*/
|
||||
ofi_send_req->hdr.dst = ofi_send_req->send->dst;
|
||||
ofi_send_req->hdr.origin = ofi_send_req->send->origin;
|
||||
ofi_send_req->hdr.seq_num = ofi_send_req->send->seq_num;
|
||||
ofi_send_req->hdr.tag = ofi_send_req->send->tag;
|
||||
|
||||
/*
|
||||
* also insert ofi plugin specific header details -
|
||||
* the unique msgid, for now initalise total_packets to 1
|
||||
*/
|
||||
ofi_send_req->hdr.msgid = orte_rml_ofi.cur_msgid;
|
||||
orte_rml_ofi.cur_msgid += 1;
|
||||
total_packets = 1;
|
||||
|
||||
/* copy the buffer/iov/data to the ofi_send_req->datablob and update ofi_send_req->length*/
|
||||
ofi_send_req->length = 0;
|
||||
if( NULL != ofi_send_req->send->buffer) {
|
||||
ofi_send_req->length = ofi_send_req->send->buffer->bytes_used;
|
||||
ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length);
|
||||
memcpy(ofi_send_req->data_blob ,
|
||||
ofi_send_req->send->buffer->base_ptr,
|
||||
ofi_send_req->send->buffer->bytes_used);
|
||||
} else if ( NULL != ofi_send_req->send->iov) {
|
||||
for (int i=0; i < ofi_send_req->send->count; i++) {
|
||||
ofi_send_req->length += ofi_send_req->send->iov[i].iov_len;
|
||||
}
|
||||
ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length);
|
||||
int iovlen=0;
|
||||
for (int i=0; i < ofi_send_req->send->count; i++) {
|
||||
memcpy((ofi_send_req->data_blob + iovlen ),
|
||||
ofi_send_req->send->iov[i].iov_base,
|
||||
ofi_send_req->send->iov[i].iov_len);
|
||||
iovlen += ofi_send_req->send->iov[i].iov_len;
|
||||
}
|
||||
} else {
|
||||
//just send the data
|
||||
ofi_send_req->length = ofi_send_req->send->count;
|
||||
ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length);
|
||||
memcpy(ofi_send_req->data_blob ,
|
||||
ofi_send_req->send->data,
|
||||
ofi_send_req->send->count);
|
||||
}
|
||||
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Completed copying all data into ofi_send_req->data_blob, total data - %d bytes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_send_req->length );
|
||||
|
||||
/* Each packet will have header information, so the data length in each packet is datalen_per_packet.
|
||||
* check if the ofi_send_req->send->buffer->bytes_used is greater than the data per packet datalen_per_packet(recv buffer)
|
||||
* if so fragment and add info to header and send it in a loop back-to-back */
|
||||
hdrsize = sizeof(orte_rml_ofi_msg_header_t);
|
||||
datalen_per_pkt = MIN_MULTI_BUF_SIZE - hdrsize;
|
||||
if (ofi_send_req->length > datalen_per_pkt )
|
||||
{
|
||||
total_packets = ( ofi_send_req->length / datalen_per_pkt ) + 1 ;
|
||||
}
|
||||
ofi_send_req->hdr.tot_pkts = total_packets;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s datalen_per_pkt = %d, ofi_send_req->length= %d, total packets = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen_per_pkt, ofi_send_req->length, total_packets );
|
||||
|
||||
/* in a loop send create and send the packets */
|
||||
for(size_t pkt_num=1,sent_data=0; sent_data < ofi_send_req->length; pkt_num++) {
|
||||
ofi_send_req->hdr.cur_pkt_num = pkt_num;
|
||||
/* create the packet */
|
||||
ofi_msg_pkt = OBJ_NEW(orte_rml_ofi_send_pkt_t);
|
||||
data_in_pkt = ((ofi_send_req->length - sent_data) >= datalen_per_pkt) ?
|
||||
datalen_per_pkt : (ofi_send_req->length - sent_data);
|
||||
ofi_msg_pkt->pkt_size = hdrsize + data_in_pkt;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Packet %d -> data_in_pkt= %d, header_size= %d, pkt_size=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), pkt_num,data_in_pkt,hdrsize,ofi_msg_pkt->pkt_size );
|
||||
/* copy the header and data for this pkt */
|
||||
ofi_msg_pkt->data = malloc( ofi_msg_pkt->pkt_size);
|
||||
memcpy(ofi_msg_pkt->data, &ofi_send_req->hdr, hdrsize );
|
||||
memcpy( (ofi_msg_pkt->data + hdrsize ),
|
||||
(ofi_send_req->data_blob + sent_data),
|
||||
data_in_pkt);
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Copying header, data into packets completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
/* add it to list */
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Before adding packet %d to list. List addr -> 0x%x, ofi_msg_pkt->super is 0x%x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),pkt_num,&(ofi_send_req->pkt_list), &ofi_msg_pkt->super );
|
||||
opal_list_append(&(ofi_send_req->pkt_list), &ofi_msg_pkt->super);
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s adding packet %d to list done successful",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),pkt_num );
|
||||
sent_data += data_in_pkt;
|
||||
}
|
||||
|
||||
if( ofi_send_req->hdr.tot_pkts != ofi_send_req->hdr.cur_pkt_num ) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Error: Total packets calculated [%d] does not match total created-%d pkts to peer %s with tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_send_req->hdr.tot_pkts, ofi_send_req->hdr.cur_pkt_num,
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
}
|
||||
/* do the fi_send() for all the pkts */
|
||||
ofi_send_req->completion_count= ofi_send_req->hdr.tot_pkts;
|
||||
OPAL_LIST_FOREACH(ofi_msg_pkt, &ofi_send_req->pkt_list, orte_rml_ofi_send_pkt_t) {
|
||||
/* debug purpose - copying the header from packet to verify if it is correct */
|
||||
struct orte_rml_ofi_msg_header_t *cur_hdr;
|
||||
cur_hdr = (struct orte_rml_ofi_msg_header_t* ) ofi_msg_pkt->data;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Sending Pkt[%d] of total %d pkts for msgid:%d to peer %s with tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_hdr->cur_pkt_num, ofi_send_req->completion_count,
|
||||
cur_hdr->msgid, ORTE_NAME_PRINT(peer), tag);
|
||||
/* end debug*/
|
||||
|
||||
RML_OFI_RETRY_UNTIL_DONE(fi_send(orte_rml_ofi.ofi_conduits[conduit_id].ep,
|
||||
ofi_msg_pkt->data,
|
||||
ofi_msg_pkt->pkt_size,
|
||||
fi_mr_desc(orte_rml_ofi.ofi_conduits[conduit_id].mr_multi_recv),
|
||||
dest_fi_addr,
|
||||
(void *)&ofi_send_req->ctx));
|
||||
|
||||
}
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s End of send_msg_transport. fi_send completed to peer %s with tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
free(dest_ep_name);
|
||||
OBJ_RELEASE(req);
|
||||
}
|
||||
|
||||
int orte_rml_ofi_send_nb(void* mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* iov,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_send_request_t *req;
|
||||
orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod;
|
||||
int conduit_id = ofi_mod->cur_transport_id;
|
||||
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml_ofi_send_transport to peer %s at tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
|
||||
if( (0 > conduit_id) || ( conduit_id >= orte_rml_ofi.conduit_open_num ) ) {
|
||||
/* Invalid conduit ID provided */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (ORTE_RML_TAG_INVALID == tag) {
|
||||
/* cannot send to an invalid tag */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (NULL == peer ||
|
||||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
|
||||
/* cannot send to an invalid peer */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
/* get ourselves into an event to protect against
|
||||
* race conditions and threads
|
||||
*/
|
||||
req = OBJ_NEW(orte_rml_send_request_t);
|
||||
req->conduit_id = conduit_id;
|
||||
req->send.dst = *peer;
|
||||
req->send.iov = iov;
|
||||
req->send.count = count;
|
||||
req->send.tag = tag;
|
||||
req->send.cbfunc.iov = cbfunc;
|
||||
req->send.cbdata = cbdata;
|
||||
|
||||
/* setup the event for the send callback */
|
||||
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
|
||||
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
|
||||
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int orte_rml_ofi_send_buffer_nb(void* mod,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_send_request_t *req;
|
||||
orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod;
|
||||
int conduit_id = ofi_mod->cur_transport_id;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml_ofi_send_buffer_transport to peer %s at tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
|
||||
if( (0 > conduit_id) || ( conduit_id >= orte_rml_ofi.conduit_open_num ) ) {
|
||||
/* Invalid conduit ID provided */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (ORTE_RML_TAG_INVALID == tag) {
|
||||
/* cannot send to an invalid tag */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (NULL == peer ||
|
||||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
|
||||
/* cannot send to an invalid peer */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
/* get ourselves into an event to protect against
|
||||
* race conditions and threads
|
||||
*/
|
||||
req = OBJ_NEW(orte_rml_send_request_t);
|
||||
req->conduit_id = conduit_id;
|
||||
req->send.dst = *peer;
|
||||
req->send.buffer = buffer;
|
||||
req->send.tag = tag;
|
||||
req->send.cbfunc.buffer = cbfunc;
|
||||
req->send.cbdata = cbdata;
|
||||
|
||||
/* setup the event for the send callback */
|
||||
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
|
||||
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
|
||||
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -21,9 +21,6 @@
|
||||
sources = \
|
||||
rml_oob.h \
|
||||
rml_oob_component.c \
|
||||
rml_oob_contact.c \
|
||||
rml_oob_exception.c \
|
||||
rml_oob_ping.c \
|
||||
rml_oob_send.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
|
@ -37,47 +37,35 @@
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
struct orte_rml_base_module_t super;
|
||||
opal_list_t exceptions;
|
||||
opal_list_t queued_routing_messages;
|
||||
orte_rml_base_module_t api;
|
||||
opal_list_t queued_routing_messages;
|
||||
opal_event_t *timer_event;
|
||||
struct timeval timeout;
|
||||
struct timeval timeout;
|
||||
} orte_rml_oob_module_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component;
|
||||
extern orte_rml_oob_module_t orte_rml_oob_module;
|
||||
|
||||
int orte_rml_oob_init(void);
|
||||
void orte_rml_oob_fini(void);
|
||||
int orte_rml_oob_ft_event(int state);
|
||||
void orte_rml_oob_fini(struct orte_rml_base_module_t *mod);
|
||||
|
||||
int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
int orte_rml_oob_ping(const char* uri,
|
||||
int orte_rml_oob_ping(struct orte_rml_base_module_t *mod,
|
||||
const char* uri,
|
||||
const struct timeval* tv);
|
||||
|
||||
char* orte_rml_oob_get_uri(void);
|
||||
void orte_rml_oob_set_uri(const char*);
|
||||
|
||||
int orte_rml_oob_add_exception(orte_rml_exception_callback_t cbfunc);
|
||||
int orte_rml_oob_del_exception(orte_rml_exception_callback_t cbfunc);
|
||||
void orte_rml_oob_exception_callback(orte_process_name_t *peer,
|
||||
orte_rml_exception_t exception);
|
||||
|
||||
|
||||
void orte_rml_oob_purge(orte_process_name_t *peer);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -51,10 +51,13 @@
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "rml_oob.h"
|
||||
|
||||
static orte_rml_base_module_t* rml_oob_init(int* priority);
|
||||
static int rml_oob_open(void);
|
||||
static int rml_oob_close(void);
|
||||
|
||||
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes);
|
||||
static orte_rml_pathway_t* query_transports(void);
|
||||
static char* get_contact_info(void);
|
||||
static void set_contact_info(const char *uri);
|
||||
static void close_conduit(orte_rml_base_module_t *mod);
|
||||
/**
|
||||
* component definition
|
||||
*/
|
||||
@ -62,156 +65,159 @@ orte_rml_component_t mca_rml_oob_component = {
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
.rml_version = {
|
||||
ORTE_RML_BASE_VERSION_2_0_0,
|
||||
.base = {
|
||||
ORTE_RML_BASE_VERSION_3_0_0,
|
||||
|
||||
.mca_component_name = "oob",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_open_component = rml_oob_open,
|
||||
.mca_close_component = rml_oob_close,
|
||||
|
||||
},
|
||||
.rml_data = {
|
||||
.data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
.rml_init = rml_oob_init,
|
||||
};
|
||||
|
||||
orte_rml_oob_module_t orte_rml_oob_module = {
|
||||
{
|
||||
.finalize = orte_rml_oob_fini,
|
||||
|
||||
.get_contact_info = orte_rml_oob_get_uri,
|
||||
.set_contact_info = orte_rml_oob_set_uri,
|
||||
|
||||
.ping = orte_rml_oob_ping,
|
||||
|
||||
.send_nb = orte_rml_oob_send_nb,
|
||||
.send_buffer_nb = orte_rml_oob_send_buffer_nb,
|
||||
|
||||
.add_exception_handler = orte_rml_oob_add_exception,
|
||||
.del_exception_handler = orte_rml_oob_del_exception,
|
||||
.ft_event = orte_rml_oob_ft_event,
|
||||
.purge = orte_rml_oob_purge
|
||||
}
|
||||
.priority = 5,
|
||||
.open_conduit = open_conduit,
|
||||
.query_transports = query_transports,
|
||||
.get_contact_info = get_contact_info,
|
||||
.set_contact_info = set_contact_info,
|
||||
.close_conduit = close_conduit
|
||||
};
|
||||
|
||||
/* Local variables */
|
||||
static bool init_done = false;
|
||||
static orte_rml_pathway_t pathway;
|
||||
static orte_rml_base_module_t base_module = {
|
||||
.component = (struct orte_rml_component_t*)&mca_rml_oob_component,
|
||||
.ping = NULL,
|
||||
.send_nb = orte_rml_oob_send_nb,
|
||||
.send_buffer_nb = orte_rml_oob_send_buffer_nb,
|
||||
.purge = NULL
|
||||
};
|
||||
|
||||
static int
|
||||
rml_oob_open(void)
|
||||
static int rml_oob_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
rml_oob_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static orte_rml_base_module_t*
|
||||
rml_oob_init(int* priority)
|
||||
{
|
||||
if (init_done) {
|
||||
*priority = 1;
|
||||
return &orte_rml_oob_module.super;
|
||||
}
|
||||
|
||||
*priority = 1;
|
||||
|
||||
OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);
|
||||
|
||||
init_done = true;
|
||||
return &orte_rml_oob_module.super;
|
||||
}
|
||||
|
||||
int
|
||||
orte_rml_oob_init(void)
|
||||
{
|
||||
/* enable the base receive to get updates on contact info */
|
||||
orte_rml_base_comm_start();
|
||||
/* ask our OOB transports for their info */
|
||||
OBJ_CONSTRUCT(&pathway, orte_rml_pathway_t);
|
||||
pathway.component = strdup("oob");
|
||||
ORTE_OOB_GET_TRANSPORTS(&pathway.transports);
|
||||
/* add any component attributes of our own */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
orte_rml_oob_fini(void)
|
||||
static int rml_oob_close(void)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&pathway);
|
||||
|
||||
while (NULL !=
|
||||
(item = opal_list_remove_first(&orte_rml_oob_module.exceptions))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&orte_rml_oob_module.exceptions);
|
||||
|
||||
/* clear the base receive */
|
||||
orte_rml_base_comm_stop();
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
int
|
||||
orte_rml_oob_ft_event(int state) {
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
int ret;
|
||||
static orte_rml_base_module_t* make_module(void)
|
||||
{
|
||||
orte_rml_oob_module_t *mod;
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CHECKPOINT);
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CONTINUE);
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_RESTART);
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
/* create a new module */
|
||||
mod = (orte_rml_oob_module_t*)malloc(sizeof(orte_rml_oob_module_t));
|
||||
if (NULL == mod) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* copy the APIs over to it */
|
||||
memcpy(mod, &base_module, sizeof(base_module));
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
(void) mca_base_framework_close(&orte_oob_base_framework);
|
||||
/* initialize its internal storage */
|
||||
OBJ_CONSTRUCT(&mod->queued_routing_messages, opal_list_t);
|
||||
mod->timer_event = NULL;
|
||||
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
/* return the result */
|
||||
return (orte_rml_base_module_t*)mod;
|
||||
}
|
||||
|
||||
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
|
||||
{
|
||||
char *comp_attrib = NULL;
|
||||
char **comps;
|
||||
int i;
|
||||
orte_attribute_t *attr;
|
||||
|
||||
opal_output_verbose(20,orte_rml_base_framework.framework_output,
|
||||
"%s - Entering rml_oob_open_conduit()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* someone may require this specific component, so look for "oob" */
|
||||
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
/* they specified specific components - could be multiple */
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcmp(comps[i], "oob")) {
|
||||
/* we are a candidate */
|
||||
opal_argv_free(comps);
|
||||
return make_module();
|
||||
}
|
||||
}
|
||||
|
||||
if( ORTE_SUCCESS != (ret = orte_oob_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
/* we are not a candidate */
|
||||
opal_argv_free(comps);
|
||||
return NULL;
|
||||
} else if (orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
/* see if we are on the list */
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcmp(comps[i], "oob")) {
|
||||
/* we cannot be a candidate */
|
||||
opal_argv_free(comps);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
|
||||
/* Alternatively, check the attributes to see if we qualify - we only handle
|
||||
* "routed", "Ethernet", and "TCP" */
|
||||
OPAL_LIST_FOREACH(attr, attributes, orte_attribute_t) {
|
||||
|
||||
}
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
/* if we get here, we cannot handle it */
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
int
|
||||
orte_rml_oob_ft_event(int state) {
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
static orte_rml_pathway_t* query_transports(void)
|
||||
{
|
||||
/* if we have any available transports, make them available */
|
||||
if (0 < opal_list_get_size(&pathway.transports)) {
|
||||
return &pathway;
|
||||
}
|
||||
/* if not, then return NULL */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void close_conduit(orte_rml_base_module_t *md)
|
||||
{
|
||||
orte_rml_oob_module_t *mod = (orte_rml_oob_module_t*)md;
|
||||
|
||||
/* cleanup the list of messages */
|
||||
OBJ_DESTRUCT(&mod->queued_routing_messages);
|
||||
|
||||
/* the rml_base_stub takes care of clearing the base receive
|
||||
* and free'ng the module */
|
||||
return;
|
||||
}
|
||||
|
||||
static char* get_contact_info(void)
|
||||
{
|
||||
char *ret;
|
||||
|
||||
ORTE_OOB_GET_URI(&ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_contact_info(const char *uri)
|
||||
{
|
||||
ORTE_OOB_SET_URI(uri);
|
||||
}
|
||||
#endif
|
||||
|
@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "rml_oob.h"
|
||||
|
||||
char* orte_rml_oob_get_uri(void)
|
||||
{
|
||||
char *ret;
|
||||
|
||||
ORTE_OOB_GET_URI(&ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void orte_rml_oob_set_uri(const char* uri)
|
||||
{
|
||||
ORTE_OOB_SET_URI(uri);
|
||||
}
|
||||
|
||||
|
||||
void orte_rml_oob_purge(orte_process_name_t *peer)
|
||||
{
|
||||
#if 0
|
||||
opal_list_item_t *item, *next;
|
||||
orte_rml_oob_queued_msg_t *qmsg;
|
||||
orte_rml_oob_msg_header_t *hdr;
|
||||
orte_process_name_t step;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
|
||||
/* clear our message queue */
|
||||
item = opal_list_get_first(&orte_rml_oob_module.queued_routing_messages);
|
||||
while (item != opal_list_get_end(&orte_rml_oob_module.queued_routing_messages)) {
|
||||
next = opal_list_get_next(item);
|
||||
qmsg = (orte_rml_oob_queued_msg_t*)item;
|
||||
hdr = (orte_rml_oob_msg_header_t*) qmsg->payload[0].iov_base;
|
||||
step = orte_routed.get_route(&hdr->destination);
|
||||
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
|
||||
if (OPAL_EQUAL ==
|
||||
orte_util_compare_name_fields(mask, peer, &hdr->destination)) {
|
||||
opal_list_remove_item(&orte_rml_oob_module.queued_routing_messages, item);
|
||||
OBJ_RELEASE(item);
|
||||
} else if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &step, &hdr->destination)) {
|
||||
opal_list_remove_item(&orte_rml_oob_module.queued_routing_messages, item);
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
item = next;
|
||||
}
|
||||
#endif
|
||||
}
|
@ -1,84 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "rml_oob.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
struct orte_rml_oob_exception_t {
|
||||
opal_list_item_t super;
|
||||
orte_rml_exception_callback_t cbfunc;
|
||||
};
|
||||
typedef struct orte_rml_oob_exception_t orte_rml_oob_exception_t;
|
||||
static OBJ_CLASS_INSTANCE(orte_rml_oob_exception_t, opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
|
||||
void
|
||||
orte_rml_oob_exception_callback(orte_process_name_t *peer,
|
||||
orte_rml_exception_t exception)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
for (item = opal_list_get_first(&orte_rml_oob_module.exceptions) ;
|
||||
item != opal_list_get_end(&orte_rml_oob_module.exceptions) ;
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rml_oob_exception_t *ex = (orte_rml_oob_exception_t*) item;
|
||||
ex->cbfunc(peer, exception);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_rml_oob_add_exception(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
orte_rml_oob_exception_t *ex = OBJ_NEW(orte_rml_oob_exception_t);
|
||||
|
||||
if (NULL == ex) return ORTE_ERROR;
|
||||
|
||||
ex->cbfunc = cbfunc;
|
||||
opal_list_append(&orte_rml_oob_module.exceptions, &ex->super);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_rml_oob_del_exception(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
for (item = opal_list_get_first(&orte_rml_oob_module.exceptions) ;
|
||||
item != opal_list_get_end(&orte_rml_oob_module.exceptions) ;
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rml_oob_exception_t *ex = (orte_rml_oob_exception_t*) item;
|
||||
|
||||
if (cbfunc == ex->cbfunc) {
|
||||
opal_list_remove_item(&orte_rml_oob_module.exceptions, item);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "rml_oob.h"
|
||||
|
||||
int
|
||||
orte_rml_oob_ping(const char* uri,
|
||||
const struct timeval* tv)
|
||||
{
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
@ -203,7 +203,8 @@ static void send_msg(int fd, short args, void *cbdata)
|
||||
OBJ_RELEASE(req);
|
||||
}
|
||||
|
||||
int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* iov,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
@ -246,7 +247,8 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
|
@ -55,10 +55,10 @@ BEGIN_C_DECLS
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
struct opal_buffer_t;
|
||||
/* forward declare */
|
||||
struct orte_rml_base_module_t;
|
||||
struct orte_rml_API_module_t;
|
||||
struct orte_rml_component_t;
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
orte_process_name_t name;
|
||||
@ -79,60 +79,8 @@ ORTE_DECLSPEC void orte_rml_recv_callback(int status, orte_process_name_t* sende
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_open_channel_recv_callback(int status,
|
||||
orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_close_channel_recv_callback(int status,
|
||||
orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
/**
|
||||
* RML component initialization
|
||||
*
|
||||
* Create an instance (module) of the given RML component. Upon
|
||||
* returning, the module data structure should be fully populated and
|
||||
* all functions should be usable. Non-blocking receive calls may be
|
||||
* posted upon return from this function, although communication need
|
||||
* not be enabled until enable_comm() call is called on the module.
|
||||
*
|
||||
* @return Exactly one module created by the call to the component's
|
||||
* initialization function should be returned. The module structure
|
||||
* should be fully populated, and the priority should be set to a
|
||||
* reasonable value.
|
||||
*
|
||||
* @param[out] priority Selection priority for the given component
|
||||
*
|
||||
* @retval NULL An error occurred and initialization did not occur
|
||||
* @retval non-NULL The module was successfully initialized
|
||||
*/
|
||||
typedef struct orte_rml_base_module_t* (*orte_rml_component_init_fn_t)(int *priority);
|
||||
|
||||
/**
|
||||
* RML component interface
|
||||
*
|
||||
* Component interface for the RML framework. A public instance of
|
||||
* this structure, called mca_rml_[component name]_component, must
|
||||
* exist in any RML component.
|
||||
*/
|
||||
struct orte_rml_component_2_0_0_t {
|
||||
/* Base component description */
|
||||
mca_base_component_t rml_version;
|
||||
/* Base component data block */
|
||||
mca_base_component_data_t rml_data;
|
||||
/* Component intialization function */
|
||||
orte_rml_component_init_fn_t rml_init;
|
||||
};
|
||||
/** Convienence typedef */
|
||||
typedef struct orte_rml_component_2_0_0_t orte_rml_component_t;
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
/* RML CALLBACK FUNCTION DEFINITIONS */
|
||||
|
||||
/**
|
||||
* Funtion prototype for callback from non-blocking iovec send and recv
|
||||
@ -205,77 +153,7 @@ typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer,
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
/**
|
||||
* Enable communication using the RML module
|
||||
*
|
||||
* Enable communication using the RML module. Before this call, only
|
||||
* the non-blocking receive and ping interfaces may be used. After
|
||||
* this call returns, the module must be fully functional, capable of
|
||||
* sending and receiving data. This function will be called after the
|
||||
* process has been assigned a proces identifier.
|
||||
*
|
||||
* @note While the ping interface may be used between the call to the
|
||||
* component's initialization function and this call, care must be
|
||||
* taken when doing so. The remote process must have already called
|
||||
* enable_comm() or the remote process will not reply to the ping.
|
||||
* As the ping interface is generally used by MPI processes to find a
|
||||
* daemon to contact, this should not be a major limitation.
|
||||
*
|
||||
* @retval ORTE_SUCCESS Communications successfully enabled
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_enable_comm_fn_t)(void);
|
||||
|
||||
/**
|
||||
* Finalize the RML module
|
||||
*
|
||||
* Finalize the RML module, ending all communication and cleaning up
|
||||
* all resources associated with the module. After the finalize
|
||||
* function is called, all interface functions (and the module
|
||||
* structure itself) are not available for use.
|
||||
*/
|
||||
typedef void (*orte_rml_module_finalize_fn_t)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Get a "contact info" string for the local process
|
||||
*
|
||||
* Get a "contact info" string that can be used by other processes to
|
||||
* share the contact information for the given process. The "contact
|
||||
* info" string includes the process identifier for the given process
|
||||
* and uses only basic ascii characters. It should be quoted when
|
||||
* evaluated by a shell, although no special escaping is necessary.
|
||||
*
|
||||
* @note The function may return a contact info string which contains
|
||||
* multiple addresses.
|
||||
*
|
||||
* @retval non-NULL The contact information for this process
|
||||
* @retval NULL An error occurred when trying to get the current
|
||||
* process contact info
|
||||
*/
|
||||
typedef char* (*orte_rml_module_get_contact_info_fn_t)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Update the RML with a remote process's contact info
|
||||
*
|
||||
* Update the RML with a remote process's contact information, as
|
||||
* returned from the get_contact_info() function on the remote
|
||||
* process. Before a send can be initiated to a remote process,
|
||||
* either this function must be called for that process or that
|
||||
* process must have already established a connection to the local
|
||||
* process.
|
||||
*
|
||||
* @note The user may not always explicitly call this function
|
||||
* directly, but may instead cause it to be called through one of the
|
||||
* contact setup functions available in
|
||||
* orte/mca/rml/base/rml_contact.h.
|
||||
*
|
||||
* @param[in] contact_info The contact information string of a peer
|
||||
*/
|
||||
typedef void (*orte_rml_module_set_contact_info_fn_t)(const char *contact_info);
|
||||
/* RML INTERNAL MODULE API DEFINITION */
|
||||
|
||||
|
||||
/**
|
||||
@ -294,7 +172,8 @@ typedef void (*orte_rml_module_set_contact_info_fn_t)(const char *contact_info);
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
|
||||
typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
|
||||
@ -321,7 +200,8 @@ typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer,
|
||||
typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
@ -351,83 +231,13 @@ typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer,
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Receive an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Receive a buffer non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Cancel a posted non-blocking receive
|
||||
*
|
||||
* Attempt to cancel a posted non-blocking receive.
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
|
||||
* to the non-blocking receive call
|
||||
* @param[in] tag Posted receive tag
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag);
|
||||
|
||||
|
||||
/**
|
||||
* Register or deregister an exception callback function
|
||||
*
|
||||
* Register or deregister a callback when an asynchronous
|
||||
* communication exception occurs.
|
||||
*
|
||||
* @param[in] cbfunc User callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The operation completed successfully
|
||||
* @retval ORTE_ERROR An unspecifed error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_exception_fn_t)(orte_rml_exception_callback_t cbfunc);
|
||||
|
||||
|
||||
/**
|
||||
* Handle fault tolerance updates
|
||||
*
|
||||
* Handle fault tolerance updates
|
||||
*
|
||||
* @param[in] state Fault tolerance state update
|
||||
*
|
||||
* @retval ORTE_SUCCESS The operation completed successfully
|
||||
* @retval ORTE_ERROR An unspecifed error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_ft_event_fn_t)(int state);
|
||||
|
||||
/**
|
||||
* Purge the RML/OOB of contact info and pending messages
|
||||
* to/from a specified process. Used when a process aborts
|
||||
@ -435,80 +245,128 @@ typedef int (*orte_rml_module_ft_event_fn_t)(int state);
|
||||
*/
|
||||
typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer);
|
||||
|
||||
/********* NEW RML QOS MESSAGING APIS *****************/
|
||||
/***** Questions *****/
|
||||
/*
|
||||
1 : Should we provide a func for the user to get qos attributes of a channel? (do we allow for sets?? )
|
||||
2 : Should open channel - have a channel error callback function?
|
||||
*/
|
||||
typedef void (*orte_rml_channel_callback_fn_t) (int status,
|
||||
orte_rml_channel_num_t channel_num,
|
||||
orte_process_name_t * peer,
|
||||
opal_list_t *qos_attributes,
|
||||
void * cbdata);
|
||||
/**
|
||||
* Funtion prototype for callback from non-blocking iovec send on a channel
|
||||
*
|
||||
* Funtion prototype for callback from non-blocking iovec send on a channel
|
||||
* On send, the iovec pointer will be the same pointer passed to
|
||||
* send_nb and count will equal the count given to send.
|
||||
*
|
||||
*
|
||||
* @note The parameter in/out parameters are relative to the user's callback
|
||||
* function.
|
||||
*
|
||||
* @param[in] status Completion status
|
||||
* @param[in] channel Opaque channel number on which the msg was sent (input to rml_send_channel)
|
||||
* @param[in] msg Pointer to the array of iovec that was sent
|
||||
* or to a single iovec that has been recvd
|
||||
* @param[in] count Number of iovecs in the array
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbdata User data passed to send_nb()
|
||||
*/
|
||||
typedef void (*orte_rml_send_channel_callback_fn_t)(int status,
|
||||
orte_rml_channel_num_t channel,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
/**
|
||||
* Funtion prototype for callback from non-blocking buffer send on a channel
|
||||
*
|
||||
* Function prototype for callback from non-blocking buffer send on a
|
||||
* channel. On send, the buffer will be the same pointer passed to
|
||||
* send_buffer_nb.
|
||||
*
|
||||
* @note The parameter in/out parameters are relative to the user's callback
|
||||
* function.
|
||||
*
|
||||
* @param[in] status Completion status
|
||||
* @param[in] channel channel number on which the msg was sent
|
||||
* @param[in] buffer Message buffer
|
||||
* @param[in] tag User defined tag for matching send
|
||||
* @param[in] cbdata User data passed to send_buffer_nb()
|
||||
*/
|
||||
typedef void (*orte_rml_send_buffer_channel_callback_fn_t)(int status,
|
||||
orte_rml_channel_num_t channel,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* * Open a messaging channel with specified QoS to a specific peer
|
||||
*
|
||||
* @param[in] peer End point Peer to which the channel needs to be opened
|
||||
* @param[in] qos_attributes List of Quality of Service Attributes for the channel
|
||||
* @param[in] cbfunc Callback function on channel create (open) comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS - the channel was successfully created at the source and a request was sent to the dest.
|
||||
* @retval ORTE_ERROR - unknown error
|
||||
* @retval ORTE_ERROR_UNSUPPORTED_QOS - the requested QoS cannot be provided.
|
||||
* RML internal module interface - these will be implemented by all RML components
|
||||
*/
|
||||
typedef int (*orte_rml_module_open_channel_fn_t)(orte_process_name_t* peer,
|
||||
opal_list_t *qos_attributes,
|
||||
orte_rml_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
typedef struct {
|
||||
/* pointer to the parent component for this module */
|
||||
struct orte_rml_component_t *component;
|
||||
/** Ping process for connectivity check */
|
||||
orte_rml_module_ping_fn_t ping;
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
orte_rml_module_send_nb_fn_t send_nb;
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
|
||||
|
||||
/** Purge information */
|
||||
orte_rml_module_purge_fn_t purge;
|
||||
} orte_rml_base_module_t;
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
/* RML PUBLIC MODULE API DEFINITION */
|
||||
|
||||
/** Open conduit - call each component and see if they can provide a
|
||||
* conduit that can satisfy all these attributes - return the conduit id
|
||||
* (a negative value indicates error)
|
||||
*/
|
||||
typedef orte_rml_conduit_t (*orte_rml_API_open_conduit_fn_t)(opal_list_t *attributes);
|
||||
|
||||
/**
|
||||
* Close a conduit - allow the component to cleanup.
|
||||
*/
|
||||
typedef void (*orte_rml_API_close_conduit_fn_t)(orte_rml_conduit_t id);
|
||||
|
||||
/**
|
||||
* Query the library to provide all the supported interfaces/transport
|
||||
* providers in the current node/system.
|
||||
*
|
||||
* @param[out] List of providers and their attributes.
|
||||
*/
|
||||
typedef int (*orte_rml_API_query_transports_fn_t)(opal_list_t *transports);
|
||||
|
||||
/**
|
||||
* Get a "contact info" string for the local process
|
||||
*
|
||||
* Get a "contact info" string that can be used by other processes to
|
||||
* share the contact information for the given process. The "contact
|
||||
* info" string includes the process identifier for the given process
|
||||
* and uses only basic ascii characters. It should be quoted when
|
||||
* evaluated by a shell, although no special escaping is necessary.
|
||||
*
|
||||
* @note The function may return a contact info string which contains
|
||||
* multiple addresses.
|
||||
*
|
||||
* @retval non-NULL The contact information for this process
|
||||
* @retval NULL An error occurred when trying to get the current
|
||||
* process contact info
|
||||
*/
|
||||
typedef char* (*orte_rml_API_get_contact_info_fn_t)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Update the RML with a remote process's contact info
|
||||
*
|
||||
* Update the RML with a remote process's contact information, as
|
||||
* returned from the get_contact_info() function on the remote
|
||||
* process. Before a send can be initiated to a remote process,
|
||||
* either this function must be called for that process or that
|
||||
* process must have already established a connection to the local
|
||||
* process.
|
||||
*
|
||||
* @note The user may not always explicitly call this function
|
||||
* directly, but may instead cause it to be called through one of the
|
||||
* contact setup functions available in
|
||||
* orte/mca/rml/base/rml_contact.h.
|
||||
*
|
||||
* @param[in] contact_info The contact information string of a peer
|
||||
*/
|
||||
typedef void (*orte_rml_API_set_contact_info_fn_t)(const char *contact_info);
|
||||
|
||||
|
||||
/**
|
||||
* "Ping" another process to determine availability
|
||||
*
|
||||
* Ping another process to determine if it is available. This
|
||||
* function only verifies that the process is alive and will allow a
|
||||
* connection to the local process. It does *not* qualify as
|
||||
* establishing communication with the remote process, as required by
|
||||
* the note for set_contact_info().
|
||||
*
|
||||
* @param[in] contact_info The contact info string for the remote process
|
||||
* @param[in] tv Timeout after which the ping should be failed
|
||||
*
|
||||
* @retval ORTE_SUCESS The process is available and will allow connections
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_API_ping_conduit_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
/**
|
||||
* "Ping" another process to determine availability using the default conduit_id
|
||||
*
|
||||
* Ping another process to determine if it is available. This
|
||||
* function only verifies that the process is alive and will allow a
|
||||
* connection to the local process. It does *not* qualify as
|
||||
* establishing communication with the remote process, as required by
|
||||
* the note for set_contact_info().
|
||||
*
|
||||
* @param[in] contact_info The contact info string for the remote process
|
||||
* @param[in] tv Timeout after which the ping should be failed
|
||||
*
|
||||
* @retval ORTE_SUCESS The process is available and will allow connections
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_API_ping_fn_t)(const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
|
||||
/**
|
||||
* Send an iovec non-blocking message
|
||||
@ -520,7 +378,7 @@ typedef int (*orte_rml_module_open_channel_fn_t)(orte_process_name_t* peer,
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] channel Channel number of the specific channel (given to user in the channel open completion callback fn.)
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] msg Pointer to an array of iovecs to be sent
|
||||
* @param[in] count Number of iovecs in array
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
@ -529,120 +387,284 @@ typedef int (*orte_rml_module_open_channel_fn_t)(orte_process_name_t* peer,
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_CHANNEL_UNKNOWN Channel specified does not exist.
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_channel_nb_fn_t)(orte_rml_channel_num_t channel,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_send_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
typedef int (*orte_rml_API_send_nb_fn_t)(orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Send a buffer non-blocking message
|
||||
*
|
||||
* Send a buffer on specific prestablished channel. The call
|
||||
* Send a buffer to the specified peer. The call
|
||||
* will return immediately, although the buffer may not be modified
|
||||
* until the completion callback is triggered. The buffer *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] channel Channel number of the specific channel (given to user in the channel open completion callback fn.)
|
||||
* @param[in] buffer Pointer to buffer to be sent
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] buffer Pointer to buffer to be sent
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_CHANNEL_UNKNOWN Channel specified does not exist.
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
|
||||
typedef int (*orte_rml_module_send_buffer_channel_nb_fn_t) (orte_rml_channel_num_t channel,
|
||||
struct opal_buffer_t * buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_send_buffer_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
typedef int (*orte_rml_API_send_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* * close a messaging channel with specified QoS to a specific peer
|
||||
* Send an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer End point Peer to which the channel needs to be opened
|
||||
* @param[in] channel_num The channel number returned in the channel open completion callback function.
|
||||
* @param[in] cbfunc Callback function on channel close comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
* Send an array of iovecs to the specified peer. The call
|
||||
* will return immediately, although the iovecs may not be modified
|
||||
* until the completion callback is triggered. The iovecs *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @retval ORTE_SUCCESS - the channel was successfully closed at the source and a request was sent to the dest.
|
||||
* @retval ORTE_ERROR - unknown error
|
||||
* @retval ORTE_ERROR_UNKNOWN_CHANNEL - cannot find the specified QoS channel
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] msg Pointer to an array of iovecs to be sent
|
||||
* @param[in] count Number of iovecs in array
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_close_channel_fn_t)( orte_rml_channel_num_t channel_num,
|
||||
orte_rml_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/* ******************************************************************** */
|
||||
typedef int (*orte_rml_API_send_nb_conduit_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* RML module interface
|
||||
* Send a buffer non-blocking message
|
||||
*
|
||||
* Send a buffer to the specified peer. The call
|
||||
* will return immediately, although the buffer may not be modified
|
||||
* until the completion callback is triggered. The buffer *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] buffer Pointer to buffer to be sent
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
struct orte_rml_base_module_t {
|
||||
/** Enable communication once a process name has been assigned */
|
||||
orte_rml_module_enable_comm_fn_t enable_comm;
|
||||
/** Shutdown the communication system and clean up resources */
|
||||
orte_rml_module_finalize_fn_t finalize;
|
||||
typedef int (*orte_rml_API_send_buffer_nb_conduit_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Purge the RML/OOB of contact info and pending messages
|
||||
* to/from a specified process. Used when a process aborts
|
||||
* and is to be restarted
|
||||
*/
|
||||
typedef void (*orte_rml_API_purge_fn_t)(orte_process_name_t *peer);
|
||||
|
||||
/**
|
||||
* Receive an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Receive a buffer non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Cancel a posted non-blocking receive
|
||||
*
|
||||
* Attempt to cancel a posted non-blocking receive.
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
|
||||
* to the non-blocking receive call
|
||||
* @param[in] tag Posted receive tag
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_cancel_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag);
|
||||
|
||||
/**
|
||||
* RML API interface
|
||||
*/
|
||||
typedef struct {
|
||||
/** Open Conduit **/
|
||||
orte_rml_API_open_conduit_fn_t open_conduit;
|
||||
|
||||
/** Shutdown the conduit and clean up resources */
|
||||
orte_rml_API_close_conduit_fn_t close_conduit;
|
||||
|
||||
/** Get contact information for local process */
|
||||
orte_rml_module_get_contact_info_fn_t get_contact_info;
|
||||
orte_rml_API_get_contact_info_fn_t get_contact_info;
|
||||
/** Set contact information for remote process */
|
||||
orte_rml_module_set_contact_info_fn_t set_contact_info;
|
||||
orte_rml_API_set_contact_info_fn_t set_contact_info;
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
orte_rml_module_ping_fn_t ping;
|
||||
orte_rml_API_ping_fn_t ping;
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
orte_rml_API_ping_conduit_fn_t ping_conduit;
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
orte_rml_module_send_nb_fn_t send_nb;
|
||||
orte_rml_API_send_nb_fn_t send_nb;
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
|
||||
orte_rml_API_send_buffer_nb_fn_t send_buffer_nb;
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
orte_rml_API_send_nb_conduit_fn_t send_nb_conduit;
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_API_send_buffer_nb_conduit_fn_t send_buffer_nb_conduit;
|
||||
|
||||
/** Receive non-blocking iovec message */
|
||||
orte_rml_module_recv_nb_fn_t recv_nb;
|
||||
orte_rml_API_recv_nb_fn_t recv_nb;
|
||||
|
||||
/** Receive non-blocking buffer message */
|
||||
orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb;
|
||||
orte_rml_API_recv_buffer_nb_fn_t recv_buffer_nb;
|
||||
|
||||
/** Cancel posted non-blocking receive */
|
||||
orte_rml_module_recv_cancel_fn_t recv_cancel;
|
||||
|
||||
/** Add callback for communication exception */
|
||||
orte_rml_module_exception_fn_t add_exception_handler;
|
||||
/** Delete callback for communication exception */
|
||||
orte_rml_module_exception_fn_t del_exception_handler;
|
||||
|
||||
/** Fault tolerance handler */
|
||||
orte_rml_module_ft_event_fn_t ft_event;
|
||||
orte_rml_API_recv_cancel_fn_t recv_cancel;
|
||||
|
||||
/** Purge information */
|
||||
orte_rml_module_purge_fn_t purge;
|
||||
};
|
||||
/** Convenience typedef */
|
||||
typedef struct orte_rml_base_module_t orte_rml_base_module_t;
|
||||
orte_rml_API_purge_fn_t purge;
|
||||
|
||||
/** Query information of transport in system */
|
||||
orte_rml_API_query_transports_fn_t query_transports;
|
||||
|
||||
} orte_rml_base_API_t;
|
||||
|
||||
/** Interface for RML communication */
|
||||
ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml;
|
||||
ORTE_DECLSPEC extern orte_rml_base_API_t orte_rml;
|
||||
|
||||
/* ******************************************************************** */
|
||||
/* RML COMPONENT DEFINITION */
|
||||
|
||||
/**
|
||||
* RML open_conduit
|
||||
*
|
||||
* Create an instance (module) of the given RML component. Upon
|
||||
* returning, the module data structure should be fully populated and
|
||||
* all functions should be usable and will have the conduit information.
|
||||
*
|
||||
* @param[in] opal_list_t of all attributes requested for the conduit.
|
||||
* Each attribute will be key-value.
|
||||
* [TODO] put in examples of the key-value here.
|
||||
* @return Exactly one module created by the call to the component's
|
||||
* initialization function should be returned. The module structure
|
||||
* should be fully populated, and the priority should be set to a
|
||||
* reasonable value.
|
||||
*
|
||||
* @retval NULL An error occurred and initialization did not occur
|
||||
* @retval non-NULL The module was successfully initialized
|
||||
*/
|
||||
typedef orte_rml_base_module_t* (*orte_rml_component_open_conduit_fn_t)(opal_list_t *attributes);
|
||||
|
||||
/**
|
||||
* Query the library to provide all the supported interfaces/transport
|
||||
* providers in the current node/system.
|
||||
*
|
||||
*/
|
||||
typedef orte_rml_pathway_t* (*orte_rml_component_query_transports_fn_t)(void);
|
||||
|
||||
/* Get the contact info for this component */
|
||||
typedef char* (*orte_rml_component_get_contact_info_fn_t)(void);
|
||||
|
||||
/* Set contact info */
|
||||
typedef void (*orte_rml_component_set_contact_info_fn_t)(const char *uri);
|
||||
|
||||
/** Close conduit - allow the specific component to
|
||||
* cleanup the module for this conduit
|
||||
*/
|
||||
typedef void (*orte_rml_module_close_conduit_fn_t)(orte_rml_base_module_t *mod);
|
||||
|
||||
/**
|
||||
* RML component interface
|
||||
*
|
||||
* Component interface for the RML framework. A public instance of
|
||||
* this structure, called mca_rml_[component name]_component, must
|
||||
* exist in any RML component.
|
||||
*/
|
||||
struct orte_rml_component_3_0_0_t {
|
||||
/* Base component description */
|
||||
mca_base_component_t base;
|
||||
/* Base component data block */
|
||||
mca_base_component_data_t data;
|
||||
/* Component priority */
|
||||
int priority;
|
||||
/* Component interface functions */
|
||||
orte_rml_component_open_conduit_fn_t open_conduit;
|
||||
orte_rml_component_query_transports_fn_t query_transports;
|
||||
orte_rml_component_get_contact_info_fn_t get_contact_info;
|
||||
orte_rml_component_set_contact_info_fn_t set_contact_info;
|
||||
orte_rml_module_close_conduit_fn_t close_conduit;
|
||||
};
|
||||
/** Convienence typedef */
|
||||
typedef struct orte_rml_component_3_0_0_t orte_rml_component_t;
|
||||
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
/** Macro for use in components that are of type rml */
|
||||
#define ORTE_RML_BASE_VERSION_2_0_0 \
|
||||
ORTE_MCA_BASE_VERSION_2_1_0("rml", 2, 0, 0)
|
||||
#define ORTE_RML_BASE_VERSION_3_0_0 \
|
||||
ORTE_MCA_BASE_VERSION_2_1_0("rml", 3, 0, 0)
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
@ -171,9 +171,15 @@ BEGIN_C_DECLS
|
||||
|
||||
#define ORTE_RML_TAG_MAX 100
|
||||
|
||||
|
||||
#define ORTE_RML_TAG_NTOH(t) ntohl(t)
|
||||
#define ORTE_RML_TAG_HTON(t) htonl(t)
|
||||
|
||||
/*** length of the tag. change this when type of orte_rml_tag_t is changed ***/
|
||||
/*** max valu in unit32_t is 0xFFFF_FFFF when converted to char this is 8 **
|
||||
#define ORTE_RML_TAG_T_CHAR_LEN 8
|
||||
#define ORTE_RML_TAG_T_SPRINT "%8x" */
|
||||
|
||||
/**
|
||||
* Message matching tag
|
||||
*
|
||||
@ -184,11 +190,17 @@ BEGIN_C_DECLS
|
||||
*/
|
||||
typedef uint32_t orte_rml_tag_t;
|
||||
|
||||
/**
|
||||
* Channel number
|
||||
* Reference to a rml channel
|
||||
*/
|
||||
typedef uint32_t orte_rml_channel_num_t;
|
||||
/* Conduit ID */
|
||||
typedef uint16_t orte_rml_conduit_t;
|
||||
|
||||
/* define an object for reporting transports */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
char *component;
|
||||
opal_list_t attributes;
|
||||
opal_list_t transports;
|
||||
} orte_rml_pathway_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_pathway_t);
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits \
|
||||
orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix opal_interface orte_spin segfault \
|
||||
orte_exit test-time event-threads psm_keygen regex orte_errors evpri-test opal-evpri-test evpri-test2 \
|
||||
mapper reducer opal_hotel orte_dfs ulfm pmixtool orte_notify
|
||||
mapper reducer opal_hotel orte_dfs ulfm pmixtool
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
|
119
orte/test/system/ofi_big_msg.c
Обычный файл
119
orte/test/system/ofi_big_msg.c
Обычный файл
@ -0,0 +1,119 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
int sock_conduit_id = 1; //use the first one
|
||||
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
|
||||
if (argc > 1) {
|
||||
count = atoi(argv[1]);
|
||||
if (count < 0) {
|
||||
count = INT_MAX-1;
|
||||
}
|
||||
} else {
|
||||
count = MAX_COUNT;
|
||||
}
|
||||
|
||||
peer.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
if (peer.vpid == orte_process_info.num_procs) {
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
/* setup the initiating buffer - put random sized message in it */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
//maxpower = (double)(j%7);
|
||||
maxpower = (double)(j%8);
|
||||
msgsize = (int)pow(10.0, maxpower);
|
||||
//msgsize += 1401000;
|
||||
opal_output(0, "Ring %d message size %d bytes", j, msgsize);
|
||||
msg = (uint8_t*)malloc(msgsize);
|
||||
opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
|
||||
free(msg);
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, orte_rml_send_callback, NULL);
|
||||
|
||||
/* wait for it to come around */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
|
||||
opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
} else {
|
||||
/* wait for msg */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
msg_active = true;
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, send_callback, NULL);
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
|
||||
orte_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
193
orte/test/system/ofi_conduit_stress.c
Обычный файл
193
orte/test/system/ofi_conduit_stress.c
Обычный файл
@ -0,0 +1,193 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/util/attr.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
//debug routine to print the opal_value_t returned by query interface
|
||||
void print_transports_query()
|
||||
{
|
||||
opal_value_t *providers=NULL;
|
||||
char* prov_name = NULL;
|
||||
int ret;
|
||||
int32_t *protocol_ptr, protocol;
|
||||
int8_t conduit_id;
|
||||
int8_t *prov_num=&conduit_id;
|
||||
|
||||
protocol_ptr = &protocol;
|
||||
opal_output(0, "\n Current conduits loaded in rml-ofi ==>");
|
||||
/*opal_output(0,"\n print_transports_query() Begin- %s:%d",__FILE__,__LINE__);
|
||||
opal_output(0,"\n calling the orte_rml_ofi_query_transports() ");*/
|
||||
if( ORTE_SUCCESS == orte_rml.query_transports(&providers)) {
|
||||
//opal_output(0,"\n query_transports() completed, printing details\n");
|
||||
while (providers) {
|
||||
//get the first opal_list_t;
|
||||
opal_list_t temp;
|
||||
opal_list_t *prov = &temp;
|
||||
|
||||
ret = opal_value_unload(providers,(void **)&prov,OPAL_PTR);
|
||||
if (ret == OPAL_SUCCESS) {
|
||||
//opal_output(0,"\n %s:%d opal_value_unload() succeeded, opal_list* prov = %x",__FILE__,__LINE__,prov);
|
||||
if( orte_get_attribute( prov, ORTE_CONDUIT_ID, (void **)&prov_num,OPAL_UINT8)) {
|
||||
opal_output(0," Provider conduit_id : %d",*prov_num);
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROTOCOL, (void **)&protocol_ptr,OPAL_UINT32)) {
|
||||
opal_output(0," Protocol : %d",*protocol_ptr);
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROV_NAME, (void **)&prov_name ,OPAL_STRING)) {
|
||||
opal_output(0," Provider name : %s",prov_name);
|
||||
} else {
|
||||
opal_output(0," Error in getting Provider name");
|
||||
}
|
||||
} else {
|
||||
opal_output(0," %s:%d opal_value_unload() failed, opal_list* prov = %x",__FILE__,__LINE__,prov);
|
||||
}
|
||||
providers = (opal_value_t *)providers->super.opal_list_next;
|
||||
// opal_output_verbose(1,orte_rml_base_framework.framework_output,"\n %s:%d -
|
||||
// Moving on to next provider provders=%x",__FILE__,__LINE__,providers);
|
||||
}
|
||||
} else {
|
||||
opal_output(0,"\n query_transports() returned Error ");
|
||||
}
|
||||
//opal_output(0,"\n End of print_transports_query() from ofi_query_test.c \n");
|
||||
|
||||
//need to free all the providers here
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
int conduit_id = 0; //use the first available conduit
|
||||
struct timeval start, end;
|
||||
opal_list_t *conduit_attr;
|
||||
|
||||
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
|
||||
print_transports_query();
|
||||
conduit_attr = OBJ_NEW(opal_list_t);
|
||||
if( ORTE_SUCCESS ==
|
||||
( orte_set_attribute( conduit_attr, ORTE_RML_OFI_PROV_NAME_ATTRIB, ORTE_ATTR_GLOBAL,"sockets",OPAL_STRING))) {
|
||||
if( ORTE_SUCCESS ==
|
||||
( orte_set_attribute( conduit_attr, ORTE_RML_INCLUDE_COMP_ATTRIB, ORTE_ATTR_GLOBAL,"ofi",OPAL_STRING))) {
|
||||
opal_output(0, "%s calling open_conduit with ORTE_RML_INCLUDE_COMP_ATTRIB and ORTE_RML_OFI_PROV_NAME_ATTRIB",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
conduit_id = orte_rml_API_open_conduit(conduit_attr);
|
||||
if (0 > conduit_id ) {
|
||||
opal_output(0, "Conduit could not be opened for OFI, exiting");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
opal_output(0, "Using conduit-id %d ", conduit_id);
|
||||
|
||||
if (argc > 1) {
|
||||
count = atoi(argv[1]);
|
||||
if (count < 0) {
|
||||
count = INT_MAX-1;
|
||||
}
|
||||
} else {
|
||||
count = MAX_COUNT;
|
||||
}
|
||||
|
||||
peer.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
if (peer.vpid == orte_process_info.num_procs) {
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
/* setup the initiating buffer - put random sized message in it */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
maxpower = (double)(j%7);
|
||||
msgsize = (int)pow(10.0, maxpower);
|
||||
opal_output(0, "Ring %d message size %d bytes", j, msgsize);
|
||||
msg = (uint8_t*)malloc(msgsize);
|
||||
opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
|
||||
free(msg);
|
||||
orte_rml.send_buffer_nb_conduit(conduit_id,&peer, buf, MY_TAG, orte_rml_send_callback, NULL);
|
||||
|
||||
/* wait for it to come around */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
|
||||
opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
} else {
|
||||
/* wait for msg */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
msg_active = true;
|
||||
orte_rml.send_buffer_nb_conduit(conduit_id,&peer, buf, MY_TAG, send_callback, NULL);
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
orte_finalize();
|
||||
printf("start: %d secs, %d usecs\n",start.tv_sec,start.tv_usec);
|
||||
printf("end: %d secs, %d usecs\n",end.tv_sec,end.tv_usec);
|
||||
printf("Total minutes = %d, Total seconds = %d", (end.tv_sec - start.tv_sec)/60, (end.tv_sec - start.tv_sec) );
|
||||
return 0;
|
||||
}
|
137
orte/test/system/ofi_query_test.c
Обычный файл
137
orte/test/system/ofi_query_test.c
Обычный файл
@ -0,0 +1,137 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <rdma/fabric.h>
|
||||
#include <rdma/fi_cm.h>
|
||||
#include <rdma/fi_domain.h>
|
||||
#include <rdma/fi_endpoint.h>
|
||||
#include <rdma/fi_errno.h>
|
||||
#include <rdma/fi_tagged.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
|
||||
//debug routine to print the opal_value_t returned by query interface
|
||||
void print_transports_query()
|
||||
{
|
||||
opal_value_t *providers=NULL;
|
||||
char* prov_name = NULL;
|
||||
int ret;
|
||||
int32_t *protocol_ptr, protocol;
|
||||
int8_t conduit_id;
|
||||
int8_t *prov_num=&conduit_id;
|
||||
|
||||
protocol_ptr = &protocol;
|
||||
|
||||
opal_output(0,"\n print_transports_query() Begin- %s:%d",__FILE__,__LINE__);
|
||||
opal_output(0,"\n calling the orte_rml_ofi_query_transports() ");
|
||||
if( ORTE_SUCCESS == orte_rml.query_transports(&providers)) {
|
||||
opal_output(0,"\n query_transports() completed, printing details\n");
|
||||
while (providers) {
|
||||
//get the first opal_list_t;
|
||||
opal_list_t temp;
|
||||
opal_list_t *prov = &temp;
|
||||
|
||||
ret = opal_value_unload(providers,(void **)&prov,OPAL_PTR);
|
||||
if (ret == OPAL_SUCCESS) {
|
||||
opal_output_verbose(1,orte_rml_base_framework.framework_output,"\n %s:%d opal_value_unload() succeeded, opal_list* prov = %x",
|
||||
__FILE__,__LINE__,prov);
|
||||
if (orte_get_attribute( prov, ORTE_CONDUIT_ID, (void **)&prov_num,OPAL_UINT8)) {
|
||||
opal_output(0," Provider conduit_id : %d",*prov_num);
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROTOCOL, (void **)&protocol_ptr,OPAL_UINT32)) {
|
||||
opal_output(0," Protocol : %s",fi_tostr(protocol_ptr,FI_TYPE_PROTOCOL));
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROV_NAME, (void **)&prov_name ,OPAL_STRING)) {
|
||||
opal_output(0," Provider name : %s",prov_name);
|
||||
} else {
|
||||
opal_output(0," Error in getting Provider name");
|
||||
}
|
||||
} else {
|
||||
opal_output(0," %s:%d opal_value_unload() failed, opal_list* prov = %x",__FILE__,__LINE__,prov);
|
||||
}
|
||||
providers = (opal_value_t *)providers->super.opal_list_next;
|
||||
// opal_output_verbose(1,orte_rml_base_framework.framework_output,"\n %s:%d -
|
||||
// Moving on to next provider provders=%x",__FILE__,__LINE__,providers);
|
||||
}
|
||||
} else {
|
||||
opal_output(0,"\n query_transports() returned Error ");
|
||||
}
|
||||
opal_output(0,"\n End of print_transports_query() from ofi_query_test.c \n");
|
||||
|
||||
//need to free all the providers here
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
|
||||
|
||||
opal_output(0, "%s pid = %d ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), getpid());
|
||||
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
// orte_init(&argc, &argv, ORTE_PROC_MPI);
|
||||
|
||||
/*
|
||||
* Runtime Messaging Layer - added this as RML was not being initialised in the app process,
|
||||
* but now ompimaster has code added to call this automatically
|
||||
*/
|
||||
/*
|
||||
if (ORTE_SUCCESS == ( mca_base_framework_open(&orte_rml_base_framework, 0))) {
|
||||
opal_output(0, "%s RML framework opened successfully ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), getpid());
|
||||
if (ORTE_SUCCESS == orte_rml_base_select()) {
|
||||
opal_output(0, "%s RML framework base_select completed successfully ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), getpid());
|
||||
print_transports_query();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
print_transports_query();
|
||||
opal_output(0, "%s calling orte_finalize() from ofi_query_test.c ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
121
orte/test/system/ofi_stress.c
Обычный файл
121
orte/test/system/ofi_stress.c
Обычный файл
@ -0,0 +1,121 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
int sock_conduit_id = 0; //use the first conduit
|
||||
struct timeval start, end;
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
|
||||
if (argc > 1) {
|
||||
count = atoi(argv[1]);
|
||||
if (count < 0) {
|
||||
count = INT_MAX-1;
|
||||
}
|
||||
} else {
|
||||
count = MAX_COUNT;
|
||||
}
|
||||
|
||||
peer.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
if (peer.vpid == orte_process_info.num_procs) {
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
/* setup the initiating buffer - put random sized message in it */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
maxpower = (double)(j%7);
|
||||
msgsize = (int)pow(10.0, maxpower);
|
||||
opal_output(0, "Ring %d message size %d bytes", j, msgsize);
|
||||
msg = (uint8_t*)malloc(msgsize);
|
||||
opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
|
||||
free(msg);
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, orte_rml_send_callback, NULL);
|
||||
|
||||
/* wait for it to come around */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
|
||||
opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
} else {
|
||||
/* wait for msg */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
msg_active = true;
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, send_callback, NULL);
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
orte_finalize();
|
||||
printf("start: %d secs, %d usecs\n",start.tv_sec,start.tv_usec);
|
||||
printf("end: %d secs, %d usecs\n",end.tv_sec,end.tv_usec);
|
||||
printf("Total minutes = %d, Total seconds = %d", (end.tv_sec - start.tv_sec)/60, (end.tv_sec - start.tv_sec) );
|
||||
return 0;
|
||||
}
|
@ -3,6 +3,7 @@
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
@ -44,7 +45,7 @@ main(int argc, char *argv[]){
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
|
||||
struct timeval start, end;
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
@ -65,6 +66,7 @@ main(int argc, char *argv[]){
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
gettimeofday(&start,NULL);
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
@ -98,8 +100,6 @@ main(int argc, char *argv[]){
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
@ -109,6 +109,8 @@ main(int argc, char *argv[]){
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
gettimeofday(&end,NULL);
|
||||
printf("Total minutes = %d, Total seconds = %d\n",(end.tv_sec - start.tv_sec)/60,(end.tv_sec - start.tv_sec));
|
||||
|
||||
orte_finalize();
|
||||
|
||||
|
@ -312,6 +312,25 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key)
|
||||
case ORTE_PROC_NBEATS:
|
||||
return "PROC-NBEATS";
|
||||
|
||||
case ORTE_RML_TRANSPORT_TYPE:
|
||||
return "RML-TRANSPORT-TYPE";
|
||||
case ORTE_RML_PROTOCOL_TYPE:
|
||||
return "RML-PROTOCOL-TYPE";
|
||||
case ORTE_RML_CONDUIT_ID:
|
||||
return "RML-CONDUIT-ID";
|
||||
case ORTE_RML_INCLUDE_COMP_ATTRIB:
|
||||
return "RML-INCLUDE";
|
||||
case ORTE_RML_EXCLUDE_COMP_ATTRIB:
|
||||
return "RML-EXCLUDE";
|
||||
case ORTE_RML_TRANSPORT_ATTRIB:
|
||||
return "RML-TRANSPORT";
|
||||
case ORTE_RML_QUALIFIER_ATTRIB:
|
||||
return "RML-QUALIFIER";
|
||||
case ORTE_RML_PROVIDER_ATTRIB:
|
||||
return "RML-DESIRED-PROVIDERS";
|
||||
case ORTE_RML_PROTOCOL_ATTRIB:
|
||||
return "RML-DESIRED-PROTOCOLS";
|
||||
|
||||
default:
|
||||
return "UNKNOWN-KEY";
|
||||
}
|
||||
@ -339,9 +358,24 @@ static int orte_attr_load(orte_attribute_t *kv,
|
||||
struct timeval *tv;
|
||||
|
||||
kv->type = type;
|
||||
if (NULL == data && OPAL_STRING != type && OPAL_BYTE_OBJECT != type) {
|
||||
/* just set the fields to zero */
|
||||
memset(&kv->data, 0, sizeof(kv->data));
|
||||
if (NULL == data) {
|
||||
/* if the type is BOOL, then the user wanted to
|
||||
* use the presence of the attribute to indicate
|
||||
* "true" - so let's mark it that way just in
|
||||
* case a subsequent test looks for the value */
|
||||
if (OPAL_BOOL == type) {
|
||||
kv->data.flag = true;
|
||||
} else {
|
||||
/* otherwise, check to see if this type has storage
|
||||
* that is already allocated, and free it if so */
|
||||
if (OPAL_STRING == type && NULL != kv->data.string) {
|
||||
free(kv->data.string);
|
||||
} else if (OPAL_BYTE_OBJECT == type && NULL != kv->data.bo.bytes) {
|
||||
free(kv->data.bo.bytes);
|
||||
}
|
||||
/* just set the fields to zero */
|
||||
memset(&kv->data, 0, sizeof(kv->data));
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -181,15 +181,18 @@ typedef uint16_t orte_proc_flags_t;
|
||||
|
||||
#define ORTE_PROC_MAX_KEY 400
|
||||
|
||||
/*** MESSAGING QOS ATTRIBUTE KEYS ***/
|
||||
#define ORTE_QOS_START_KEY ORTE_PROC_MAX_KEY
|
||||
#define ORTE_QOS_TYPE (ORTE_QOS_START_KEY + 1) //uint8- defining what type of qos - refer to orte_qos_type enum for values
|
||||
#define ORTE_QOS_WINDOW_SIZE (ORTE_QOS_START_KEY + 2) // uint32 - number of messages in the window (stream)
|
||||
#define ORTE_QOS_ACK_NACK_TIMEOUT (ORTE_QOS_START_KEY + 3) //uint32 - timeout value in secs for msg/window ack nack
|
||||
#define ORTE_QOS_MSG_RETRY (ORTE_QOS_START_KEY + 4) // bool- resend message upon ACK fail or NACK or timeout.
|
||||
#define ORTE_QOS_NUM_RETRIES (ORTE_QOS_START_KEY + 5) // uint32 - number of retries.
|
||||
/*** RML ATTRIBUTE keys ***/
|
||||
#define ORTE_RML_START_KEY ORTE_PROC_MAX_KEY
|
||||
#define ORTE_RML_TRANSPORT_TYPE (ORTE_RML_START_KEY + 1) // string - null terminated string containing transport type
|
||||
#define ORTE_RML_PROTOCOL_TYPE (ORTE_RML_START_KEY + 2) // string - protocol type (e.g., as returned by fi_info)
|
||||
#define ORTE_RML_CONDUIT_ID (ORTE_RML_START_KEY + 3) // orte_rml_conduit_t - conduit_id for this transport
|
||||
#define ORTE_RML_INCLUDE_COMP_ATTRIB (ORTE_RML_START_KEY + 4) // string - comma delimited list of RML component names to be considered
|
||||
#define ORTE_RML_EXCLUDE_COMP_ATTRIB (ORTE_RML_START_KEY + 5) // string - comma delimited list of RML component names to be excluded
|
||||
#define ORTE_RML_TRANSPORT_ATTRIB (ORTE_RML_START_KEY + 6) // string - comma delimited list of transport types to be considered (e.g., "fabric,ethernet")
|
||||
#define ORTE_RML_QUALIFIER_ATTRIB (ORTE_RML_START_KEY + 7) // string - comma delimited list of qualifiers (e.g., routed=direct,bandwidth=xxx)
|
||||
#define ORTE_RML_PROVIDER_ATTRIB (ORTE_RML_START_KEY + 8) // string - comma delimited list of provider names to be considered
|
||||
#define ORTE_RML_PROTOCOL_ATTRIB (ORTE_RML_START_KEY + 9) // string - comma delimited list of protocols to be considered (e.g., tcp,udp)
|
||||
|
||||
#define ORTE_QOS_MAX_KEY 500
|
||||
|
||||
#define ORTE_ATTR_KEY_MAX 1000
|
||||
|
||||
|
@ -234,6 +234,9 @@ int orte_err2str(int errnum, const char **errmsg)
|
||||
case ORTE_ERR_JOB_CANCELLED:
|
||||
retval = "Job cancelled";
|
||||
break;
|
||||
case ORTE_ERR_CONDUIT_SEND_FAIL:
|
||||
retval = " Transport Conduit returned send error";
|
||||
break;
|
||||
case ORTE_ERR_DEBUGGER_RELEASE:
|
||||
retval = "Debugger release";
|
||||
break;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user