Merge pull request #2146 from rhc54/topic/rml2
Bring the RML modifications across
Этот коммит содержится в:
Коммит
ee9f33f08c
@ -2,7 +2,7 @@
|
||||
+++ config/ltmain.sh
|
||||
@@ -6417,8 +6417,14 @@
|
||||
func_source "$lib"
|
||||
|
||||
|
||||
# Convert "-framework foo" to "foo.ltframework"
|
||||
+ # and "-pthread" to "-Wl,-pthread" if NAG compiler
|
||||
if test -n "$inherited_linker_flags"; then
|
||||
|
@ -17,7 +17,7 @@ dnl and Technology (RIST). All rights reserved.
|
||||
dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||
dnl reserved.
|
||||
dnl Copyright (c) 2016 Intel Corporation. All rights reserved.
|
||||
dnl
|
||||
dnl
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# OMPI_CHECK_UCX(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if UCX support can be found. sets prefix_{CPPFLAGS,
|
||||
# check if UCX support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
AC_DEFUN([OMPI_CHECK_UCX],[
|
||||
|
@ -30,7 +30,7 @@
|
||||
# --------------------------------------------------------
|
||||
AC_DEFUN([OPAL_CHECK_CRAY_XPMEM],[
|
||||
if test -z "$opal_check_cray_xpmem_happy" ; then
|
||||
AC_ARG_WITH([cray_xpmem],
|
||||
AC_ARG_WITH([cray_xpmem],
|
||||
[AC_HELP_STRING([--with-cray-xpmem(=yes/no)],
|
||||
[Build Cray XPMEM support(default: auto)])],
|
||||
[], with_cray_xpmem=auto)
|
||||
|
@ -263,7 +263,7 @@ EOF
|
||||
chmod +x $rpath_script
|
||||
wl_fc=`./$rpath_script`
|
||||
rm -f $rpath_script
|
||||
|
||||
|
||||
LDFLAGS="$LDFLAGS_save ${wl_fc}--enable-new-dtags"
|
||||
AC_LANG_PUSH([Fortran])
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[program test
|
||||
|
@ -287,10 +287,10 @@ function do_analysis() {
|
||||
|
||||
if [ ! -e $tesdir ]; then
|
||||
do_err "can not find testdir: $testdir"
|
||||
fi
|
||||
fi
|
||||
if [ -z $basefile -o ! -f $basefile ]; then
|
||||
do_err "can not find basefile: $basefile"
|
||||
fi
|
||||
fi
|
||||
if [ -z $outfile -o ! -f $outfile ]; then
|
||||
do_err "can not find outfile: $outfile"
|
||||
fi
|
||||
@ -323,7 +323,7 @@ function do_analysis() {
|
||||
max_line=`sort -n $outfile1 | tail -n1`
|
||||
if [ -z "$min_line" -o -z "$max_line" ]; then
|
||||
do_err "can not find max/min lines in : $outfile1"
|
||||
fi
|
||||
fi
|
||||
min_t=$( echo "$min_line" | cut -f1 -d$'\t')
|
||||
max_t=$( echo "$max_line" | cut -f1 -d$'\t')
|
||||
echo -e "`bc -l <<< "scale=3; (($min_t - $start_t) / 1000000)"`\t`echo "$min_line" | cut -f4 -d$'\t'`\t`echo "$min_line" | cut -f5 -d$'\t'`" >> $resultfile 2>&1
|
||||
@ -341,7 +341,7 @@ function do_report() {
|
||||
|
||||
if [ -z $resultfile -o ! -f $resultfile ]; then
|
||||
do_err "can not find resultfile: $resultfile"
|
||||
fi
|
||||
fi
|
||||
min_t=`awk -F $'\t' '{ if (NR == 1) print $1 }' $resultfile`
|
||||
max_t=`awk -F $'\t' '{ if (NR == 2) print $1 }' $resultfile`
|
||||
echo -e "${nodes}\t${ppn}\t${min_t}\t${max_t}" >> $reportfile 2>&1
|
||||
@ -374,7 +374,7 @@ static inline void timestamp(FILE *file)
|
||||
int procid = -1;
|
||||
char *str = NULL;
|
||||
|
||||
gettimeofday(&tv, NULL);
|
||||
gettimeofday(&tv, NULL);
|
||||
|
||||
if (gethostname(name, sizeof(name)) != 0)
|
||||
strcpy(name, "localhost");
|
||||
@ -485,7 +485,7 @@ function do_parse() {
|
||||
test_list="${test_list} ${test}"
|
||||
done
|
||||
done
|
||||
|
||||
|
||||
result_list=`echo $result_list | tr " " "\n" | sort | uniq | tr "\n" " "`
|
||||
test_list=`echo $test_list | tr " " "\n" | sort | uniq | tr "\n" " "`
|
||||
|
||||
@ -994,7 +994,7 @@ int orte_rml_base_update_contact_info(void * data)
|
||||
FILE *fd = NULL;
|
||||
char filename[1024];
|
||||
char *str = getenv("SLURM_PROCID");
|
||||
|
||||
|
||||
if (str) {
|
||||
sprintf(filename, "%s.%s", "$outfile", str);
|
||||
fd = fopen(filename, "a");
|
||||
@ -1070,7 +1070,7 @@ int orte_rml_base_update_contact_info(void * data)
|
||||
FILE *fd = NULL;
|
||||
char filename[1024];
|
||||
char *str = getenv("SLURM_PROCID");
|
||||
|
||||
|
||||
if (str) {
|
||||
sprintf(filename, "%s.%s", "$outfile", str);
|
||||
fd = fopen(filename, "a");
|
||||
|
@ -340,7 +340,7 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
|
||||
if (flag) {
|
||||
opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL);
|
||||
}
|
||||
|
||||
|
||||
ompi_comm_cid_lowest_id = INT64_MAX;
|
||||
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
@ -154,7 +154,8 @@ enum {
|
||||
ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55),
|
||||
ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56),
|
||||
ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57),
|
||||
ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58)
|
||||
ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58),
|
||||
ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 59)
|
||||
};
|
||||
|
||||
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -201,12 +201,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_grpcomm_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* enable communication via the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
/* setup the routed info */
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -356,6 +356,29 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
/* obviously, we have "reported" */
|
||||
jdata->num_reported = 1;
|
||||
|
||||
/* setup the PMIx framework - ensure it skips all non-PMIx components,
|
||||
* but do not override anything we were given */
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_pmix_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "opal_pmix_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* set the event base */
|
||||
opal_pmix_base_set_evbase(orte_event_base);
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
/* the server code already barked, so let's be quiet */
|
||||
ret = ORTE_ERR_SILENT;
|
||||
error = "pmix_server_init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Setup the communication infrastructure */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -380,6 +403,13 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
/* add our contact info */
|
||||
proc->rml_uri = orte_rml.get_contact_info();
|
||||
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix server init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* select the errmgr */
|
||||
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -440,49 +470,7 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
error = "orte_rtc_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* enable communication with the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
/* Now provide a chance for the PLM
|
||||
* to perform any module-specific init functions. This
|
||||
* needs to occur AFTER the communications are setup
|
||||
* as it may involve starting a non-blocking recv
|
||||
* Do this only if a specific PLM was given to us - the
|
||||
* orted has no need of the proxy PLM at all
|
||||
*/
|
||||
if (plm_in_use) {
|
||||
if (ORTE_SUCCESS != (ret = orte_plm.init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_plm_init";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the PMIx framework - ensure it skips all non-PMIx components,
|
||||
* but do not override anything we were given */
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_pmix_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "opal_pmix_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* set the event base */
|
||||
opal_pmix_base_set_evbase(orte_event_base);
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
/* the server code already barked, so let's be quiet */
|
||||
ret = ORTE_ERR_SILENT;
|
||||
error = "pmix_server_init";
|
||||
goto error;
|
||||
}
|
||||
#if ORTE_ENABLE_STATIC_PORTS
|
||||
/* if we are using static ports, then we need to setup
|
||||
* the daemon info so the RML can function properly
|
||||
@ -511,6 +499,21 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
*/
|
||||
orte_routed.update_routing_plan();
|
||||
|
||||
/* Now provide a chance for the PLM
|
||||
* to perform any module-specific init functions. This
|
||||
* needs to occur AFTER the communications are setup
|
||||
* as it may involve starting a non-blocking recv
|
||||
* Do this only if a specific PLM was given to us - the
|
||||
* orted has no need of the proxy PLM at all
|
||||
*/
|
||||
if (plm_in_use) {
|
||||
if (ORTE_SUCCESS != (ret = orte_plm.init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_plm_init";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the routed info - the selected routed component
|
||||
* will know what to do.
|
||||
*/
|
||||
|
@ -134,12 +134,7 @@ int orte_ess_base_tool_setup(void)
|
||||
* to which I want to communicate and setting up a route for
|
||||
* that link is my responsibility
|
||||
*/
|
||||
/* enable communication via the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* we -may- need to know the name of the head
|
||||
* of our session directory tree, particularly the
|
||||
* tmp base where any other session directories on
|
||||
|
@ -551,12 +551,7 @@ static int rte_init(void)
|
||||
error = "orte_rtc_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* enable communication with the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* we are an hnp, so update the contact info field for later use */
|
||||
orte_process_info.my_hnp_uri = orte_rml.get_contact_info();
|
||||
proc->rml_uri = strdup(orte_process_info.my_hnp_uri);
|
||||
|
@ -56,9 +56,6 @@
|
||||
/* LOCAL FUNCTIONS */
|
||||
static void stdin_write_handler(int fd, short event, void *cbdata);
|
||||
|
||||
static void
|
||||
orte_iof_hnp_exception_handler(orte_process_name_t* peer, orte_rml_exception_t reason);
|
||||
|
||||
/* API FUNCTIONS */
|
||||
static int init(void);
|
||||
|
||||
@ -94,8 +91,6 @@ orte_iof_base_module_t orte_iof_hnp_module = {
|
||||
/* Initialize the module */
|
||||
static int init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* post non-blocking recv to catch forwarded IO from
|
||||
* the orteds
|
||||
*/
|
||||
@ -105,12 +100,6 @@ static int init(void)
|
||||
orte_iof_hnp_recv,
|
||||
NULL);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.add_exception_handler(orte_iof_hnp_exception_handler))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP);
|
||||
return rc;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mca_iof_hnp_component.procs, opal_list_t);
|
||||
mca_iof_hnp_component.stdinev = NULL;
|
||||
|
||||
@ -610,37 +599,3 @@ CHECK:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback when peer is disconnected
|
||||
*/
|
||||
|
||||
static void
|
||||
orte_iof_hnp_exception_handler(orte_process_name_t* peer, orte_rml_exception_t reason)
|
||||
{
|
||||
#if 0
|
||||
orte_iof_base_endpoint_t *endpoint;
|
||||
opal_output_verbose(1, orte_iof_base_framework.framework_output,
|
||||
"iof svc exception handler! %s\n",
|
||||
ORTE_NAME_PRINT((orte_process_name_t*)peer));
|
||||
|
||||
/* If we detect an exception on the RML connection to a peer,
|
||||
delete all of its subscriptions and publications. Note that
|
||||
exceptions can be detected during a normal RML shutdown; they
|
||||
are recoverable events (no need to abort). */
|
||||
orte_iof_hnp_sub_delete_all(peer);
|
||||
orte_iof_hnp_pub_delete_all(peer);
|
||||
opal_output_verbose(1, orte_iof_base_framework.framework_output, "deleted all pubs and subs\n");
|
||||
|
||||
/* Find any streams on any endpoints for this peer and close them */
|
||||
while (NULL !=
|
||||
(endpoint = orte_iof_base_endpoint_match(peer, ORTE_NS_CMP_ALL,
|
||||
ORTE_IOF_ANY))) {
|
||||
orte_iof_base_endpoint_closed(endpoint);
|
||||
|
||||
/* Delete the endpoint that we just matched */
|
||||
orte_iof_base_endpoint_delete(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY);
|
||||
}
|
||||
#endif
|
||||
opal_output_verbose(1, orte_iof_base_framework.framework_output, "done with exception handler\n");
|
||||
}
|
||||
|
@ -181,6 +181,12 @@ OBJ_CLASS_DECLARATION(mca_oob_uri_req_t);
|
||||
}while(0);
|
||||
ORTE_DECLSPEC void orte_oob_base_set_addr(int fd, short args, void *cbdata);
|
||||
|
||||
|
||||
/* Get the available transports and their attributes */
|
||||
#define ORTE_OOB_GET_TRANSPORTS(u) orte_oob_base_get_transports(u)
|
||||
ORTE_DECLSPEC void orte_oob_base_get_transports(opal_list_t *transports);
|
||||
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata);
|
||||
#endif
|
||||
|
@ -400,6 +400,30 @@ static void process_uri(char *uri)
|
||||
opal_argv_free(uris);
|
||||
}
|
||||
|
||||
void orte_oob_base_get_transports(opal_list_t *transports)
|
||||
{
|
||||
mca_base_component_list_item_t *cli;
|
||||
mca_oob_base_component_t *component;
|
||||
orte_rml_pathway_t *p;
|
||||
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s: get transports",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
|
||||
component = (mca_oob_base_component_t*)cli->cli_component;
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s:get transports for component %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
component->oob_base.mca_component_name);
|
||||
if (NULL != component->query_transports) {
|
||||
if (NULL != (p = component->query_transports())) {
|
||||
opal_list_append(transports, &p->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
void orte_oob_base_ft_event(int sd, short argc, void *cbdata)
|
||||
{
|
||||
|
@ -57,24 +57,27 @@ typedef int (*mca_oob_base_component_set_addr_fn_t)(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
typedef bool (*mca_oob_base_component_is_reachable_fn_t)(orte_process_name_t *peer);
|
||||
typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata);
|
||||
typedef orte_rml_pathway_t* (*mca_oob_base_component_query_transports_fn_t)(void);
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
typedef int (*mca_oob_base_component_ft_event_fn_t)(int state);
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
mca_base_component_t oob_base;
|
||||
mca_base_component_data_t oob_data;
|
||||
int idx;
|
||||
int priority;
|
||||
mca_oob_base_component_avail_fn_t available;
|
||||
mca_oob_base_component_startup_fn_t startup;
|
||||
mca_oob_base_component_shutdown_fn_t shutdown;
|
||||
mca_oob_base_component_send_fn_t send_nb;
|
||||
mca_oob_base_component_get_addr_fn_t get_addr;
|
||||
mca_oob_base_component_set_addr_fn_t set_addr;
|
||||
mca_oob_base_component_is_reachable_fn_t is_reachable;
|
||||
mca_base_component_t oob_base;
|
||||
mca_base_component_data_t oob_data;
|
||||
int idx;
|
||||
int priority;
|
||||
mca_oob_base_component_avail_fn_t available;
|
||||
mca_oob_base_component_startup_fn_t startup;
|
||||
mca_oob_base_component_shutdown_fn_t shutdown;
|
||||
mca_oob_base_component_send_fn_t send_nb;
|
||||
mca_oob_base_component_get_addr_fn_t get_addr;
|
||||
mca_oob_base_component_set_addr_fn_t set_addr;
|
||||
mca_oob_base_component_is_reachable_fn_t is_reachable;
|
||||
mca_oob_base_component_query_transports_fn_t query_transports;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
mca_oob_base_component_ft_event_fn_t ft_event;
|
||||
mca_oob_base_component_ft_event_fn_t ft_event;
|
||||
#endif
|
||||
} mca_oob_base_component_t;
|
||||
|
||||
|
@ -277,6 +277,13 @@ static void process_set_peer(int fd, short args, void *cbdata)
|
||||
OBJ_RELEASE(peer);
|
||||
return;
|
||||
}
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
/* we have to initiate the connection because otherwise the
|
||||
* daemon has no way to communicate to us via this component
|
||||
* as the app doesn't have a listening port */
|
||||
peer->state = MCA_OOB_TCP_CONNECTING;
|
||||
ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect);
|
||||
}
|
||||
}
|
||||
|
||||
maddr = OBJ_NEW(mca_oob_tcp_addr_t);
|
||||
@ -294,7 +301,7 @@ static void process_set_peer(int fd, short args, void *cbdata)
|
||||
(NULL == pop->port) ? "NULL" : pop->port);
|
||||
opal_list_append(&peer->addrs, &maddr->super);
|
||||
|
||||
cleanup:
|
||||
cleanup:
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
|
@ -64,8 +64,10 @@
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/util/attr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/show_help.h"
|
||||
@ -94,6 +96,7 @@ static char* component_get_addr(void);
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
static bool component_is_reachable(orte_process_name_t *peer);
|
||||
static orte_rml_pathway_t* component_query_transports(void);
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
static int component_ft_event(int state);
|
||||
#endif
|
||||
@ -124,6 +127,7 @@ mca_oob_tcp_component_t mca_oob_tcp_component = {
|
||||
.get_addr = component_get_addr,
|
||||
.set_addr = component_set_addr,
|
||||
.is_reachable = component_is_reachable,
|
||||
.query_transports = component_query_transports,
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
.ft_event = component_ft_event,
|
||||
#endif
|
||||
@ -146,11 +150,8 @@ static int tcp_component_open(void)
|
||||
mca_oob_tcp_component.addr_count = 0;
|
||||
mca_oob_tcp_component.ipv4conns = NULL;
|
||||
mca_oob_tcp_component.ipv4ports = NULL;
|
||||
|
||||
#if OPAL_ENABLE_IPV6
|
||||
mca_oob_tcp_component.ipv6conns = NULL;
|
||||
mca_oob_tcp_component.ipv6ports = NULL;
|
||||
#endif
|
||||
|
||||
/* if_include and if_exclude need to be mutually exclusive */
|
||||
if (OPAL_SUCCESS !=
|
||||
@ -513,6 +514,11 @@ static int component_available(void)
|
||||
/* get the name for diagnostic purposes */
|
||||
opal_ifindextoname(i, name, sizeof(name));
|
||||
|
||||
/* ignore any virtual interfaces */
|
||||
if (0 == strncmp(name, "vir", 3)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* handle include/exclude directives */
|
||||
if (NULL != interfaces) {
|
||||
/* check for match */
|
||||
@ -612,6 +618,37 @@ static int component_available(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static orte_rml_pathway_t* component_query_transports(void)
|
||||
{
|
||||
orte_rml_pathway_t *p;
|
||||
char *qual;
|
||||
|
||||
/* if neither IPv4 or IPv6 connections are available, then
|
||||
* we have nothing to support */
|
||||
if (NULL == mca_oob_tcp_component.ipv4conns &&
|
||||
NULL == mca_oob_tcp_component.ipv6conns) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* if we get here, then we support Ethernet and TCP */
|
||||
p = OBJ_NEW(orte_rml_pathway_t);
|
||||
p->component = strdup("oob");
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, "Ethernet", OPAL_STRING);
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_PROTOCOL_TYPE, ORTE_ATTR_LOCAL, "TCP", OPAL_STRING);
|
||||
/* setup our qualifiers - we route communications, may have IPv4 and/or IPv6, etc. */
|
||||
if (NULL != mca_oob_tcp_component.ipv4conns &&
|
||||
NULL != mca_oob_tcp_component.ipv6conns) {
|
||||
qual = "routed=true:ipv4:ipv6";
|
||||
} else if (NULL == mca_oob_tcp_component.ipv6conns) {
|
||||
qual = "routed=true:ipv4";
|
||||
} else {
|
||||
qual = "routed=true:ipv6";
|
||||
}
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_QUALIFIER_ATTRIB, ORTE_ATTR_LOCAL, qual, OPAL_STRING);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Start all modules */
|
||||
static int component_startup(void)
|
||||
{
|
||||
|
@ -60,14 +60,12 @@ typedef struct {
|
||||
char** ipv4conns;
|
||||
char** ipv4ports;
|
||||
|
||||
#if OPAL_ENABLE_IPV6
|
||||
/* IPv6 support */
|
||||
bool disable_ipv6_family; /**< disable this AF */
|
||||
char** tcp6_static_ports; /**< Static ports - IPV6 */
|
||||
char** tcp6_dyn_ports; /**< Dynamic ports - IPV6 */
|
||||
char** ipv6conns;
|
||||
char** ipv6ports;
|
||||
#endif
|
||||
|
||||
/* connection support */
|
||||
char* my_uri; /**< uri for connecting to the TCP module */
|
||||
|
@ -1,42 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_oob_usock_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_oob_usock_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/oob/usock/Makefile])
|
||||
|
||||
# check for sockaddr_un (a good sign we have Unix domain sockets)
|
||||
AC_CHECK_TYPES([struct sockaddr_un],
|
||||
[oob_usock_happy="yes"],
|
||||
[oob_usock_happy="no"],
|
||||
[AC_INCLUDES_DEFAULT
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_UN_H
|
||||
#include <sys/un.h>
|
||||
#endif])
|
||||
|
||||
AS_IF([test "$oob_usock_happy" = "yes"], [$1], [$2])
|
||||
])dnl
|
@ -1,70 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
[static-and-dynamic]
|
||||
Both static and dynamic port ranges were specified for the
|
||||
out-of-band (OOB) communication subsystem:
|
||||
|
||||
Static ports: %s
|
||||
Dynamic ports: %s
|
||||
|
||||
Only one can be specified. Please choose either static or
|
||||
dynamic ports and try again.
|
||||
#
|
||||
[include-exclude]
|
||||
Both TCP interface include and exclude lists were specified:
|
||||
|
||||
Include: %s
|
||||
Exclude: %s
|
||||
|
||||
Only one of these can be given.
|
||||
#
|
||||
[not-parseable]
|
||||
The specified network is not parseable. Since we cannot determine
|
||||
your desired intent, we cannot establish a TCP socket for out-of-band
|
||||
communications and will therefore abort. Please correct the network
|
||||
specification and retry.
|
||||
#
|
||||
[no-included-found]
|
||||
None of the networks specified to be included for out-of-band communications
|
||||
could be found:
|
||||
|
||||
Value given: %s
|
||||
|
||||
Please revise the specification and try again.
|
||||
#
|
||||
[excluded-all]
|
||||
The specified list of networks to be excluded for out-of-band communications
|
||||
resulted in no networks being available:
|
||||
|
||||
Value given: %s
|
||||
|
||||
Please revise the specification and try again.
|
||||
#
|
||||
[no-interfaces-avail]
|
||||
No network interfaces were found for out-of-band communications. We require
|
||||
at least one available network for TCP-based messaging.
|
||||
#
|
||||
[invalid if_inexclude]
|
||||
WARNING: An invalid value was given for oob_tcp_if_%s. This
|
||||
value will be ignored.
|
||||
|
||||
Local host: %s
|
||||
Value: %s
|
||||
Message: %s
|
||||
#
|
@ -1,490 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETDB_H
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/oob/usock/oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_ping.h"
|
||||
|
||||
static void usock_init(void);
|
||||
static void usock_fini(void);
|
||||
static void accept_connection(const int accepted_fd,
|
||||
const struct sockaddr *addr);
|
||||
static void ping(const orte_process_name_t *proc);
|
||||
static void send_nb(orte_rml_send_t *msg);
|
||||
static void ft_event(int state);
|
||||
|
||||
mca_oob_usock_module_t mca_oob_usock_module = {
|
||||
{
|
||||
usock_init,
|
||||
usock_fini,
|
||||
accept_connection,
|
||||
ping,
|
||||
send_nb,
|
||||
ft_event
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* Local utility functions
|
||||
*/
|
||||
static void recv_handler(int sd, short flags, void* user);
|
||||
static void* progress_thread_engine(opal_object_t *obj)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK PROGRESS THREAD RUNNING",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
while (mca_oob_usock_module.ev_active) {
|
||||
opal_event_loop(mca_oob_usock_module.ev_base, OPAL_EVLOOP_ONCE);
|
||||
}
|
||||
return OPAL_THREAD_CANCELLED;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initialize global variables used w/in this module.
|
||||
*/
|
||||
static void usock_init(void)
|
||||
{
|
||||
/* setup the module's state variables */
|
||||
OBJ_CONSTRUCT(&mca_oob_usock_module.peers, opal_hash_table_t);
|
||||
opal_hash_table_init(&mca_oob_usock_module.peers, 32);
|
||||
mca_oob_usock_module.ev_active = false;
|
||||
|
||||
if (orte_oob_base.use_module_threads) {
|
||||
/* if we are to use independent progress threads at
|
||||
* the module level, start it now
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s STARTING USOCK PROGRESS THREAD",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
mca_oob_usock_module.ev_base = opal_event_base_create();
|
||||
/* construct the thread object */
|
||||
OBJ_CONSTRUCT(&mca_oob_usock_module.progress_thread, opal_thread_t);
|
||||
/* fork off a thread to progress it */
|
||||
mca_oob_usock_module.progress_thread.t_run = progress_thread_engine;
|
||||
mca_oob_usock_module.progress_thread.t_arg = NULL;
|
||||
mca_oob_usock_module.ev_active = true;
|
||||
if (OPAL_SUCCESS != opal_thread_start(&mca_oob_usock_module.progress_thread)) {
|
||||
opal_output(0, "%s USOCK progress thread failed to start",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
} else {
|
||||
mca_oob_usock_module.ev_base = orte_event_base;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Module cleanup.
|
||||
*/
|
||||
static void usock_fini(void)
|
||||
{
|
||||
/* cleanup all peers */
|
||||
OBJ_DESTRUCT(&mca_oob_usock_module.peers);
|
||||
|
||||
if (mca_oob_usock_module.ev_active) {
|
||||
/* if we used an independent progress thread at
|
||||
* the module level, stop it now
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s STOPPING USOCK PROGRESS THREAD",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* stop the progress thread */
|
||||
mca_oob_usock_module.ev_active = false;
|
||||
/* break the event loop */
|
||||
opal_event_base_loopexit(mca_oob_usock_module.ev_base);
|
||||
/* wait for thread to exit */
|
||||
opal_thread_join(&mca_oob_usock_module.progress_thread, NULL);
|
||||
OBJ_DESTRUCT(&mca_oob_usock_module.progress_thread);
|
||||
/* release the event base */
|
||||
opal_event_base_free(mca_oob_usock_module.ev_base);
|
||||
}
|
||||
}
|
||||
|
||||
/* Called by mca_oob_usock_accept() and connection_handler() on
|
||||
* a socket that has been accepted. This call finishes processing the
|
||||
* socket by registering for the OOB-level connection handshake. Used
|
||||
* in both the threaded and event listen modes.
|
||||
*/
|
||||
static void accept_connection(const int accepted_fd,
|
||||
const struct sockaddr *addr)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s accept_connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* use a one-time event to wait for receipt of peer's
|
||||
* process ident message to complete this connection
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_ACCEPT_STATE(accepted_fd, addr, recv_handler);
|
||||
}
|
||||
|
||||
/* API functions */
|
||||
static void process_ping(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_ping_t *op = (mca_oob_usock_ping_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] processing ping to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
|
||||
/* do we know this peer? */
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&op->peer))) {
|
||||
/* push this back to the framework so another component can try */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] hop %s unknown",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
#if 0
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, NULL, &op->peer, mca_oob_usock_component_hop_unknown);
|
||||
#endif
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if we are already connected, there is nothing to do */
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] already connected to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if we are already connecting, there is nothing to do */
|
||||
if (MCA_OOB_USOCK_CONNECTING == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECT_ACK == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] already connecting to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* attempt the connection */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void ping(const orte_process_name_t *proc)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] pinging peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(proc));
|
||||
|
||||
/* push this into our event base for processing */
|
||||
ORTE_ACTIVATE_USOCK_PING(proc, process_ping);
|
||||
}
|
||||
|
||||
static void process_send(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_msg_op_t *op = (mca_oob_usock_msg_op_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
struct timeval tv;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] processing send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->msg->dst));
|
||||
|
||||
/* if I am an app, the only route is to my daemon, so
|
||||
* send the msg there
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(ORTE_PROC_MY_DAEMON))) {
|
||||
/* we don't know how to talk to our daemon,
|
||||
* which is strange since we already got here.
|
||||
* likely means we lost a race condition, so
|
||||
*
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg,
|
||||
ORTE_PROC_MY_DAEMON,
|
||||
mca_oob_usock_component_cannot_send);
|
||||
goto cleanup;
|
||||
}
|
||||
} else if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* if I am a daemon, the only way I should be given this
|
||||
* message to send is if the proc is local to me
|
||||
*/
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&op->msg->dst))) {
|
||||
/* try this again after a delay for N times */
|
||||
op->reps++;
|
||||
if (20 < op->reps) {
|
||||
/* we don't know how to talk to this proc,
|
||||
* so send this back up to the OOB base so it
|
||||
* can try another transport
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg,
|
||||
&op->msg->dst,
|
||||
mca_oob_usock_component_cannot_send);
|
||||
goto cleanup;
|
||||
}
|
||||
opal_event_evtimer_set(orte_event_base, &op->ev, process_send, op);
|
||||
opal_event_set_priority(&op->ev, ORTE_ERROR_PRI);
|
||||
tv.tv_sec = 1;
|
||||
tv.tv_usec = 0;
|
||||
opal_event_evtimer_add(&op->ev, &tv);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
/* otherwise, this message can't be handled by me, so
|
||||
* notify the component of the mistake
|
||||
*/
|
||||
opal_output(0, "CAN'T BE HANDLED");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* add the msg to the target's send queue */
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb: already connected to %s - queueing for send",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
MCA_OOB_USOCK_QUEUE_SEND(op->msg, peer);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (MCA_OOB_USOCK_CLOSED == peer->state) {
|
||||
/* the peer has gone, it will never come back */
|
||||
goto cleanup;
|
||||
}
|
||||
/* add the message to the queue for sending after the
|
||||
* connection is formed
|
||||
*/
|
||||
MCA_OOB_USOCK_QUEUE_PENDING(op->msg, peer);
|
||||
|
||||
if (MCA_OOB_USOCK_CONNECTING != peer->state &&
|
||||
MCA_OOB_USOCK_CONNECT_ACK != peer->state) {
|
||||
/* we have to initiate the connection - again, we do not
|
||||
* want to block while the connection is created.
|
||||
* So throw us into an event that will create
|
||||
* the connection via a mini-state-machine :-)
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb: initiating connection to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void send_nb(orte_rml_send_t *msg)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->dst));
|
||||
|
||||
/* push this into our event base for processing */
|
||||
ORTE_ACTIVATE_USOCK_POST_SEND(msg, process_send);
|
||||
}
|
||||
|
||||
/*
|
||||
* Event callback when there is data available on the registered
|
||||
* socket to recv. This is called for the listen sockets to accept an
|
||||
* incoming connection, on new sockets trying to complete the software
|
||||
* connection process, and for probes. Data on an established
|
||||
* connection is handled elsewhere.
|
||||
*/
|
||||
static void recv_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_conn_op_t *op = (mca_oob_usock_conn_op_t*)cbdata;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
uint64_t ui64;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* get the handshake */
|
||||
if (ORTE_SUCCESS != mca_oob_usock_peer_recv_connect_ack(NULL, sd, &hdr)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* finish processing ident */
|
||||
if (MCA_OOB_USOCK_IDENT == hdr.type) {
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&hdr.origin))) {
|
||||
/* should never happen */
|
||||
goto cleanup;
|
||||
}
|
||||
/* set socket up to be non-blocking */
|
||||
if ((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s mca_oob_usock_recv_connect: fcntl(F_GETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if (fcntl(sd, F_SETFL, flags) < 0) {
|
||||
opal_output(0, "%s mca_oob_usock_recv_connect: fcntl(F_SETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
}
|
||||
}
|
||||
|
||||
/* is the peer instance willing to accept this connection */
|
||||
peer->sd = sd;
|
||||
if (mca_oob_usock_peer_accept(peer) == false) {
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
opal_output(0, "%s-%s mca_oob_usock_recv_connect: "
|
||||
"rejected connection state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state);
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
memcpy(&ui64, &peer->name, sizeof(uint64_t));
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, NULL);
|
||||
OBJ_RELEASE(peer);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
/* Dummy function for when we are not using FT. */
|
||||
#if OPAL_ENABLE_FT_CR == 0
|
||||
static void ft_event(int state)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
static void ft_event(int state) {
|
||||
#if 0
|
||||
opal_list_item_t *item;
|
||||
#endif
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
#if 0
|
||||
/*
|
||||
* Disable event processing while we are working
|
||||
*/
|
||||
opal_event_disable();
|
||||
#endif
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
#if 0
|
||||
/*
|
||||
* Resume event processing
|
||||
*/
|
||||
opal_event_enable();
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
/*
|
||||
* Clean out cached connection information
|
||||
* Select pieces of finalize/init
|
||||
*/
|
||||
for (item = opal_list_remove_first(&mod->peer_list);
|
||||
item != NULL;
|
||||
item = opal_list_remove_first(&mod->peer_list)) {
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)item;
|
||||
/* JJH: Use the below command for debugging restarts with invalid sockets
|
||||
* mca_oob_usock_peer_dump(peer, "RESTART CLEAN")
|
||||
*/
|
||||
MCA_OOB_USOCK_PEER_RETURN(peer);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&mod->peer_free);
|
||||
OBJ_DESTRUCT(&mod->peer_names);
|
||||
OBJ_DESTRUCT(&mod->peers);
|
||||
OBJ_DESTRUCT(&mod->peer_list);
|
||||
|
||||
OBJ_CONSTRUCT(&mod->peer_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mod->peers, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&mod->peer_names, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&mod->peer_free, opal_free_list_t);
|
||||
|
||||
/*
|
||||
* Resume event processing
|
||||
*/
|
||||
opal_event_enable();
|
||||
#endif
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_H_
|
||||
#define _MCA_OOB_USOCK_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/mca/oob/oob.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* define some debug levels */
|
||||
#define OOB_USOCK_DEBUG_FAIL 2
|
||||
#define OOB_USOCK_DEBUG_CONNECT 7
|
||||
|
||||
/* forward declare a couple of structures */
|
||||
struct mca_oob_usock_module_t;
|
||||
struct mca_oob_usock_msg_error_t;
|
||||
|
||||
/* Module definition */
|
||||
typedef void (*mca_oob_usock_module_init_fn_t)(void);
|
||||
typedef void (*mca_oob_usock_module_fini_fn_t)(void);
|
||||
typedef void (*mca_oob_usock_module_accept_connection_fn_t)(const int accepted_fd,
|
||||
const struct sockaddr *addr);
|
||||
typedef void (*mca_oob_usock_module_ping_fn_t)(const orte_process_name_t *proc);
|
||||
typedef void (*mca_oob_usock_module_send_nb_fn_t)(orte_rml_send_t *msg);
|
||||
typedef void (*mca_oob_usock_module_ft_event_fn_t)(int state);
|
||||
|
||||
typedef struct {
|
||||
mca_oob_usock_module_init_fn_t init;
|
||||
mca_oob_usock_module_fini_fn_t finalize;
|
||||
mca_oob_usock_module_accept_connection_fn_t accept_connection;
|
||||
mca_oob_usock_module_ping_fn_t ping;
|
||||
mca_oob_usock_module_send_nb_fn_t send_nb;
|
||||
mca_oob_usock_module_ft_event_fn_t ft_event;
|
||||
} mca_oob_usock_module_api_t;
|
||||
typedef struct {
|
||||
mca_oob_usock_module_api_t api;
|
||||
opal_event_base_t *ev_base; /* event base for the module progress thread */
|
||||
bool ev_active;
|
||||
opal_thread_t progress_thread;
|
||||
opal_hash_table_t peers; // peer connection info
|
||||
} mca_oob_usock_module_t;
|
||||
ORTE_MODULE_DECLSPEC extern mca_oob_usock_module_t mca_oob_usock_module;
|
||||
|
||||
/**
|
||||
* the state of the connection
|
||||
*/
|
||||
typedef enum {
|
||||
MCA_OOB_USOCK_UNCONNECTED,
|
||||
MCA_OOB_USOCK_CLOSED,
|
||||
MCA_OOB_USOCK_RESOLVE,
|
||||
MCA_OOB_USOCK_CONNECTING,
|
||||
MCA_OOB_USOCK_CONNECT_ACK,
|
||||
MCA_OOB_USOCK_CONNECTED,
|
||||
MCA_OOB_USOCK_FAILED,
|
||||
MCA_OOB_USOCK_ACCEPTING
|
||||
} mca_oob_usock_state_t;
|
||||
|
||||
/* module-level shared functions */
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_send_handler(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_recv_handler(int fd, short args, void *cbdata);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_OOB_USOCK_H_ */
|
||||
|
@ -1,604 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* In windows, many of the socket functions return an EWOULDBLOCK
|
||||
* instead of things like EAGAIN, EINPROGRESS, etc. It has been
|
||||
* verified that this will not conflict with other error codes that
|
||||
* are returned by these functions under UNIX/Linux environments
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETDB_H
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/util/listener.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/oob/usock/oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_ping.h"
|
||||
/*
|
||||
* Local utility functions
|
||||
*/
|
||||
|
||||
static int usock_component_register(void);
|
||||
static int usock_component_open(void);
|
||||
static int usock_component_close(void);
|
||||
|
||||
static int component_available(void);
|
||||
static int component_startup(void);
|
||||
static void component_shutdown(void);
|
||||
static int component_send(orte_rml_send_t *msg);
|
||||
static char* component_get_addr(void);
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
static bool component_is_reachable(orte_process_name_t *peer);
|
||||
|
||||
/*
|
||||
* Struct of function pointers and all that to let us be initialized
|
||||
*/
|
||||
mca_oob_usock_component_t mca_oob_usock_component = {
|
||||
{
|
||||
.oob_base = {
|
||||
MCA_OOB_BASE_VERSION_2_0_0,
|
||||
.mca_component_name = "usock",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_open_component = usock_component_open,
|
||||
.mca_close_component = usock_component_close,
|
||||
.mca_register_component_params = usock_component_register,
|
||||
},
|
||||
.oob_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
.priority = 100,
|
||||
.available = component_available,
|
||||
.startup = component_startup,
|
||||
.shutdown = component_shutdown,
|
||||
.send_nb = component_send,
|
||||
.get_addr = component_get_addr,
|
||||
.set_addr = component_set_addr,
|
||||
.is_reachable = component_is_reachable,
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize global variables used w/in this module.
|
||||
*/
|
||||
static int usock_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup of global variables used by this module.
|
||||
*/
|
||||
static int usock_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int usock_component_register(void)
|
||||
{
|
||||
mca_base_component_t *component = &mca_oob_usock_component.super.oob_base;
|
||||
|
||||
/* register oob module parameters */
|
||||
mca_oob_usock_component.max_retries = 2;
|
||||
(void)mca_base_component_var_register(component, "peer_retries",
|
||||
"Number of times to try shutting down a connection before giving up",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_oob_usock_component.max_retries);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int component_available(void)
|
||||
{
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"oob:usock: component_available called");
|
||||
|
||||
/* if session directories were forbidden, then we cannot be used */
|
||||
if (!orte_create_session_dirs ||
|
||||
NULL == orte_process_info.jobfam_session_dir ) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* this component is not available to tools */
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
/* direct-launched apps cannot use it */
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
/* apps launched by daemons *must* use it */
|
||||
return ORTE_ERR_FORCE_SELECT;
|
||||
}
|
||||
|
||||
/* otherwise, we are available */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for accepting connections from the event library
|
||||
*/
|
||||
static void connection_event_handler(int incoming_sd, short flags, void* cbdata)
|
||||
{
|
||||
orte_pending_connection_t *pending = (orte_pending_connection_t*)cbdata;
|
||||
int sd;
|
||||
|
||||
sd = pending->fd;
|
||||
pending->fd = -1;
|
||||
OBJ_RELEASE(pending);
|
||||
|
||||
/* process the connection */
|
||||
mca_oob_usock_module.api.accept_connection(sd, NULL);
|
||||
}
|
||||
|
||||
/* Start the module */
|
||||
static int component_startup(void)
|
||||
{
|
||||
int rc=ORTE_SUCCESS;
|
||||
char *session;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK STARTUP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* setup the path to the daemon rendezvous point */
|
||||
memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un));
|
||||
mca_oob_usock_component.address.sun_family = AF_UNIX;
|
||||
session = opal_os_path(false, orte_process_info.jobfam_session_dir,
|
||||
"usock", NULL);
|
||||
if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) {
|
||||
opal_output(0, "SESSION DIR TOO LONG");
|
||||
free(session);
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
snprintf(mca_oob_usock_component.address.sun_path,
|
||||
sizeof(mca_oob_usock_component.address.sun_path)-1,
|
||||
"%s", session);
|
||||
free(session);
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"SUNPATH: %s", mca_oob_usock_component.address.sun_path);
|
||||
|
||||
/* if we are a daemon/HNP, register our listener */
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
if (ORTE_SUCCESS != (rc = orte_register_listener((struct sockaddr*)&mca_oob_usock_component.address, sizeof(struct sockaddr_un),
|
||||
orte_event_base, connection_event_handler))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
} else {
|
||||
/* if the rendezvous point isn't there, then that's an error */
|
||||
/* if the rendezvous file doesn't exist, that's an error */
|
||||
if (0 != access(mca_oob_usock_component.address.sun_path, R_OK)) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"SUNPATH: %s NOT READABLE", mca_oob_usock_component.address.sun_path);
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
/* start the module */
|
||||
mca_oob_usock_module.api.init();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void component_shutdown(void)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK SHUTDOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* delete the rendezvous file */
|
||||
unlink(mca_oob_usock_component.address.sun_path);
|
||||
}
|
||||
|
||||
/* shutdown the module */
|
||||
if (NULL != mca_oob_usock_module.api.finalize) {
|
||||
mca_oob_usock_module.api.finalize();
|
||||
}
|
||||
}
|
||||
|
||||
static int component_send(orte_rml_send_t *msg)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s oob:usock:send_nb to peer %s:%d seq_num =%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num);
|
||||
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* daemons can only reach local procs */
|
||||
if (NULL == (proc = orte_get_proc_object(&msg->dst))) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
}
|
||||
|
||||
/* apps can reach anyone via this module as the daemon
|
||||
* will route the message to the final destination
|
||||
*/
|
||||
|
||||
mca_oob_usock_module.api.send_nb(msg);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* although we do not use the uri to determine a peer's
|
||||
* address (since we know the path via the session directory),
|
||||
* we have to provide something to the uri. This is needed
|
||||
* as other places in ORTE use a NULL uri to indicate lack
|
||||
* of a daemon. We may eventually remove that dependency,
|
||||
* but for now, just ensure that the uri is never NULL,
|
||||
* even if we are the only active OOB transport.
|
||||
*/
|
||||
static char* component_get_addr(void)
|
||||
{
|
||||
char *tmp;
|
||||
tmp = strdup("usock");
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
mca_oob_usock_peer_t *pr;
|
||||
uint64_t ui64;
|
||||
|
||||
memcpy(&ui64, peer, sizeof(uint64_t));
|
||||
/* if I am an application, then everything is addressable
|
||||
* by me via my daemon
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
/* if this is my daemon, then take it - otherwise, ignore */
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == peer->jobid &&
|
||||
ORTE_PROC_MY_DAEMON->vpid == peer->vpid) {
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
ui64, (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = *peer;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr);
|
||||
}
|
||||
/* we have to initiate the connection because otherwise the
|
||||
* daemon has no way to communicate to us via this component
|
||||
* as the app doesn't have a listening port */
|
||||
pr->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(pr, mca_oob_usock_peer_try_connect);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
/* otherwise, indicate that we cannot reach this peer */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
/* if I am a daemon or HNP, I can only reach my
|
||||
* own local procs via this component
|
||||
*/
|
||||
if (ORTE_PROC_MY_NAME->jobid == peer->jobid) {
|
||||
/* another daemon */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (NULL == (proc = orte_get_proc_object(peer)) ||
|
||||
!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
/* indicate that this peer is addressable by this component */
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
ui64, (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = *peer;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_set_module(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
orte_oob_base_peer_t *bpr;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:set_module called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->peer->name), sizeof(uint64_t));
|
||||
|
||||
/* make sure the OOB knows that we are handling this peer - we
|
||||
* are in the same event base as the OOB base, so we can
|
||||
* directly access its storage
|
||||
*/
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
|
||||
ui64, (void**)&bpr) || NULL == bpr) {
|
||||
bpr = OBJ_NEW(orte_oob_base_peer_t);
|
||||
}
|
||||
opal_bitmap_set_bit(&bpr->addressable, mca_oob_usock_component.super.idx);
|
||||
bpr->component = &mca_oob_usock_component.super;
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, bpr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_lost_connection(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:lost connection called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->peer->name), sizeof(uint64_t));
|
||||
|
||||
/* mark the OOB's table that we can't reach it any more - for now, we don't
|
||||
* worry about shifting to another component. Eventually, we will want to push
|
||||
* this decision to the OOB so it can try other components and eventually error out
|
||||
*/
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* activate the proc state - since an app only connects to its parent daemon,
|
||||
* and the daemon is *always* its lifeline, activate the lifeline lost state */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||
} else {
|
||||
/* we are the daemon end, so notify that the child's comm failed */
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_COMM_FAILED);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_msg_error_t *pop = (mca_oob_usock_msg_error_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:unable to send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->hop));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->hop), sizeof(uint64_t));
|
||||
|
||||
/* mark the OOB's table that we can't reach it any more - for now, we don't
|
||||
* worry about shifting to another component. Eventually, we will want to push
|
||||
* this decision to the OOB so it can try other components and eventually error out
|
||||
*/
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* have the OOB base try to send it again */
|
||||
ORTE_OOB_SEND(pop->rmsg);
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_failed_to_connect(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:failed_to_connect called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* if we are terminating, then don't do anything further */
|
||||
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
||||
OBJ_RELEASE(pop);
|
||||
return;
|
||||
}
|
||||
|
||||
/* activate the proc state */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:failed_to_connect unable to reach peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* since an app only connects to its parent daemon,
|
||||
* and the daemon is *always* its lifeline, activate the lifeline lost state */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||
} else {
|
||||
/* we are the daemon end, so notify that the child's comm failed */
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_COMM_FAILED);
|
||||
}
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
static bool component_is_reachable(orte_process_name_t *peer)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
|
||||
/* if I am an application, then everything is reachable
|
||||
* by me via my daemon
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* if I am a daemon or HNP, I can only reach my
|
||||
* own local procs via this component
|
||||
*/
|
||||
if (ORTE_PROC_MY_NAME->jobid == peer->jobid) {
|
||||
/* another daemon */
|
||||
return false;
|
||||
}
|
||||
if (NULL == (proc = orte_get_proc_object(peer)) ||
|
||||
!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return false;
|
||||
}
|
||||
/* indicate that this peer is reachable by this component */
|
||||
return true;
|
||||
}
|
||||
|
||||
char* mca_oob_usock_state_print(mca_oob_usock_state_t state)
|
||||
{
|
||||
switch (state) {
|
||||
case MCA_OOB_USOCK_UNCONNECTED:
|
||||
return "UNCONNECTED";
|
||||
case MCA_OOB_USOCK_CLOSED:
|
||||
return "CLOSED";
|
||||
case MCA_OOB_USOCK_RESOLVE:
|
||||
return "RESOLVE";
|
||||
case MCA_OOB_USOCK_CONNECTING:
|
||||
return "CONNECTING";
|
||||
case MCA_OOB_USOCK_CONNECT_ACK:
|
||||
return "ACK";
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
return "CONNECTED";
|
||||
case MCA_OOB_USOCK_FAILED:
|
||||
return "FAILED";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
mca_oob_usock_peer_t* mca_oob_usock_peer_lookup(const orte_process_name_t *name)
|
||||
{
|
||||
mca_oob_usock_peer_t *peer;
|
||||
uint64_t ui64;
|
||||
|
||||
memcpy(&ui64, (char*)name, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, ui64, (void**)&peer)) {
|
||||
return NULL;
|
||||
}
|
||||
return peer;
|
||||
}
|
||||
|
||||
/* OOB USOCK Class instances */
|
||||
|
||||
static void peer_cons(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
peer->auth_method = NULL;
|
||||
peer->sd = -1;
|
||||
peer->state = MCA_OOB_USOCK_UNCONNECTED;
|
||||
peer->retries = 0;
|
||||
OBJ_CONSTRUCT(&peer->send_queue, opal_list_t);
|
||||
peer->send_msg = NULL;
|
||||
peer->recv_msg = NULL;
|
||||
peer->send_ev_active = false;
|
||||
peer->recv_ev_active = false;
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
static void peer_des(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
if (NULL != peer->auth_method) {
|
||||
free(peer->auth_method);
|
||||
}
|
||||
if (0 <= peer->sd) {
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&peer->send_queue);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_peer_t,
|
||||
opal_list_item_t,
|
||||
peer_cons, peer_des);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_peer_op_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void mopcon(mca_oob_usock_msg_op_t *p)
|
||||
{
|
||||
p->reps = 0;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_msg_op_t,
|
||||
opal_object_t,
|
||||
mopcon, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_conn_op_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_ping_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_COMPONENT_H_
|
||||
#define _MCA_OOB_USOCK_COMPONENT_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_UN_H
|
||||
#include <sys/un.h>
|
||||
#endif
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
|
||||
#include "orte/mca/oob/oob.h"
|
||||
#include "oob_usock_peer.h"
|
||||
#include "oob_usock.h"
|
||||
|
||||
/**
|
||||
* OOB USOCK Component
|
||||
*/
|
||||
typedef struct {
|
||||
mca_oob_base_component_t super; /**< base OOB component */
|
||||
int max_retries; /**< max number of retries before declaring peer gone */
|
||||
struct sockaddr_un address; /**< address of our rendezvous point */
|
||||
} mca_oob_usock_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern mca_oob_usock_component_t mca_oob_usock_component;
|
||||
|
||||
ORTE_MODULE_DECLSPEC char* mca_oob_usock_state_print(mca_oob_usock_state_t state);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_set_module(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_lost_connection(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_failed_to_connect(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC mca_oob_usock_peer_t* mca_oob_usock_peer_lookup(const orte_process_name_t *name);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_COMPONENT_H_ */
|
@ -1,967 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_NET_UIO_H
|
||||
#include <net/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_TCP_H
|
||||
#include <netinet/tcp.h>
|
||||
#endif
|
||||
|
||||
#include "opal/types.h"
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/mca/backtrace/backtrace.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
#include "opal/mca/sec/sec.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/fd.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
|
||||
static void usock_peer_event_init(mca_oob_usock_peer_t* peer);
|
||||
static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer);
|
||||
static int usock_peer_send_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size);
|
||||
static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size);
|
||||
static void usock_peer_connected(mca_oob_usock_peer_t* peer);
|
||||
|
||||
static int usock_peer_create_socket(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
int flags;
|
||||
|
||||
if (peer->sd >=0) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_oob_base_framework.framework_output,
|
||||
"%s oob:usock:peer creating socket to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name))));
|
||||
peer->sd = socket(PF_UNIX, SOCK_STREAM, 0);
|
||||
|
||||
if (peer->sd < 0) {
|
||||
opal_output(0, "%s-%s usock_peer_create_socket: socket() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
/* Set this fd to be close-on-exec so that subsequent children don't see it */
|
||||
if (opal_fd_set_cloexec(peer->sd) != OPAL_SUCCESS) {
|
||||
opal_output(0, "%s unable to set socket to CLOEXEC",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
close(peer->sd);
|
||||
peer->sd = -1;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* setup event callbacks */
|
||||
usock_peer_event_init(peer);
|
||||
|
||||
/* setup the socket as non-blocking */
|
||||
if (peer->sd >= 0) {
|
||||
if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s-%s usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if(fcntl(peer->sd, F_SETFL, flags) < 0)
|
||||
opal_output(0, "%s-%s usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Try connecting to a peer
|
||||
*/
|
||||
void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_conn_op_t *op = (mca_oob_usock_conn_op_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer = op->peer;
|
||||
int rc;
|
||||
opal_socklen_t addrlen = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
rc = usock_peer_create_socket(peer);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
/* FIXME: we cannot create a USOCK socket - report
|
||||
* back to the component that this peer is
|
||||
* unreachable so it can remove the peer
|
||||
* from its list and report back to the base
|
||||
* NOTE: this could be a reconnect attempt,
|
||||
* so we also need to mark any queued messages
|
||||
* and return them as "unreachable"
|
||||
*/
|
||||
opal_output(0, "%s CANNOT CREATE SOCKET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
|
||||
addrlen = sizeof(struct sockaddr_un);
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s - %d retries",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->retries);
|
||||
|
||||
retry_connect:
|
||||
peer->retries++;
|
||||
if (connect(peer->sd, (struct sockaddr *) &mca_oob_usock_component.address, addrlen) < 0) {
|
||||
/* non-blocking so wait for completion */
|
||||
if (opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s waiting for connect completion to %s - activating send event",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
/* just ensure the send_event is active */
|
||||
if (!peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Some kernels (Linux 2.6) will automatically software
|
||||
abort a connection that was ECONNREFUSED on the last
|
||||
attempt, without even trying to establish the
|
||||
connection. Handle that case in a semi-rational
|
||||
way by trying twice before giving up */
|
||||
if (ECONNABORTED == opal_socket_errno) {
|
||||
if (peer->retries < mca_oob_usock_component.max_retries) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connection aborted by OS to %s - retrying",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
goto retry_connect;
|
||||
} else {
|
||||
/* We were unsuccessful in establishing this connection, and are
|
||||
* not likely to suddenly become successful,
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"Connection across unix domain socket to local proc %s failed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
/* let the USOCK component know that this module failed to make
|
||||
* the connection so it can try other modules, and/or fail back
|
||||
* to the OOB level so another component can try. This will activate
|
||||
* an event in the component event base, and so it will fire async
|
||||
* from us if we are in our own progress thread
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_failed_to_connect);
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* connection succeeded */
|
||||
peer->retries = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"Connection across to proc %s succeeded",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
/* setup our recv to catch the return ack call */
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
|
||||
/* send our globally unique process identifier to the peer */
|
||||
if (ORTE_SUCCESS == (rc = usock_peer_send_connect_ack(peer))) {
|
||||
peer->state = MCA_OOB_USOCK_CONNECT_ACK;
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"usock_peer_send_connect_ack to proc %s failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
opal_strerror(rc), rc);
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
char *msg;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
int rc;
|
||||
size_t sdsize;
|
||||
char *cred;
|
||||
size_t credsize;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s SEND CONNECT ACK", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* send a handshake that includes our process identifier
|
||||
* to ensure we are talking to another OMPI process
|
||||
*/
|
||||
hdr.origin = *ORTE_PROC_MY_NAME;
|
||||
hdr.dst = peer->name;
|
||||
hdr.type = MCA_OOB_USOCK_IDENT;
|
||||
hdr.tag = 0;
|
||||
hdr.channel = 0xffffffff;
|
||||
hdr.seq_num = 0;
|
||||
|
||||
/* get our security credential*/
|
||||
if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(peer->auth_method,
|
||||
ORTE_PROC_MY_NAME, &cred, &credsize))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* set the number of bytes to be read beyond the header */
|
||||
hdr.nbytes = strlen(orte_version_string) + 1 + credsize;
|
||||
|
||||
/* create a space for our message */
|
||||
sdsize = (sizeof(hdr) + strlen(orte_version_string) + 1 + credsize);
|
||||
if (NULL == (msg = (char*)malloc(sdsize))) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
memset(msg, 0, sdsize);
|
||||
|
||||
/* load the message */
|
||||
memcpy(msg, &hdr, sizeof(hdr));
|
||||
memcpy(msg+sizeof(hdr), orte_version_string, strlen(orte_version_string));
|
||||
memcpy(msg+sizeof(hdr)+strlen(orte_version_string)+1, cred, credsize);
|
||||
free(cred);
|
||||
|
||||
if (ORTE_SUCCESS != usock_peer_send_blocking(peer, peer->sd, msg, sdsize)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_UNREACH);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
free(msg);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize events to be used by the peer instance for USOCK select/poll callbacks.
|
||||
*/
|
||||
static void usock_peer_event_init(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
if (peer->sd >= 0) {
|
||||
opal_event_set(mca_oob_usock_module.ev_base,
|
||||
&peer->recv_event,
|
||||
peer->sd,
|
||||
OPAL_EV_READ|OPAL_EV_PERSIST,
|
||||
mca_oob_usock_recv_handler,
|
||||
peer);
|
||||
opal_event_set_priority(&peer->recv_event, ORTE_MSG_PRI);
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
opal_event_set(mca_oob_usock_module.ev_base,
|
||||
&peer->send_event,
|
||||
peer->sd,
|
||||
OPAL_EV_WRITE|OPAL_EV_PERSIST,
|
||||
mca_oob_usock_send_handler,
|
||||
peer);
|
||||
opal_event_set_priority(&peer->send_event, ORTE_MSG_PRI);
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the status of the connection. If the connection failed, will retry
|
||||
* later. Otherwise, send this processes identifier to the peer on the
|
||||
* newly connected socket.
|
||||
*/
|
||||
void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
int so_error = 0, rc;
|
||||
opal_socklen_t so_length = sizeof(so_error);
|
||||
orte_oob_base_peer_t *bpr;
|
||||
uint64_t ui64;
|
||||
mca_oob_usock_peer_t *pr;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:complete_connect called for peer %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->sd);
|
||||
|
||||
/* check connect completion status */
|
||||
if (getsockopt(peer->sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
|
||||
opal_output(0, "%s usock_peer_complete_connect: getsockopt() to %s failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (so_error == EINPROGRESS) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:send:handler still in progress",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return;
|
||||
} else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_complete_connect: connection failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(so_error),
|
||||
so_error);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
} else if (so_error != 0) {
|
||||
/* No need to worry about the return code here - we return regardless
|
||||
at this point, and if an error did occur a message has already been
|
||||
printed for the user */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_complete_connect: "
|
||||
"connection failed with error %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), so_error);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_complete_connect: "
|
||||
"sending ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
if (usock_peer_send_connect_ack(peer) == ORTE_SUCCESS) {
|
||||
peer->state = MCA_OOB_USOCK_CONNECT_ACK;
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_complete_connect: "
|
||||
"setting read event on connection to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
} else {
|
||||
opal_output(0, "%s usock_peer_complete_connect: unable to send connect ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
}
|
||||
|
||||
/* make sure the OOB knows that we are handling this peer - we
|
||||
* are in the same event base as the OOB base, so we can
|
||||
* directly access its storage
|
||||
*/
|
||||
memcpy(&ui64, (char*)&(peer->name), sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
|
||||
ui64, (void**)&bpr) || NULL == bpr) {
|
||||
bpr = OBJ_NEW(orte_oob_base_peer_t);
|
||||
}
|
||||
opal_bitmap_set_bit(&bpr->addressable, mca_oob_usock_component.super.idx);
|
||||
bpr->component = &mca_oob_usock_component.super;
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, bpr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
/* record it locally too */
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
ui64, (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = peer->name;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A blocking send on a non-blocking socket. Used to send the small amount of connection
|
||||
* information that identifies the peers endpoint.
|
||||
*/
|
||||
static int usock_peer_send_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size)
|
||||
{
|
||||
unsigned char* ptr = (unsigned char*)data;
|
||||
size_t cnt = 0;
|
||||
int retval;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s send blocking of %"PRIsize_t" bytes to socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
size, sd);
|
||||
|
||||
while (cnt < size) {
|
||||
retval = send(sd, (char*)ptr+cnt, size-cnt, 0);
|
||||
if (retval < 0) {
|
||||
if (opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
||||
opal_output(0, "%s usock_peer_send_blocking: send() to socket %d failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd,
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
cnt += retval;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s blocking send complete to socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Receive the peers globally unique process identification from a newly
|
||||
* connected socket and verify the expected response. If so, move the
|
||||
* socket to a connected state.
|
||||
*/
|
||||
int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* pr, int sd,
|
||||
mca_oob_usock_hdr_t *dhdr)
|
||||
{
|
||||
char *msg;
|
||||
char *version;
|
||||
int rc, cmpval;
|
||||
char *cred;
|
||||
size_t credsize;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
uint64_t ui64;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECV CONNECT ACK FROM %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == pr) ? "UNKNOWN" : ORTE_NAME_PRINT(&pr->name), sd);
|
||||
|
||||
peer = pr;
|
||||
/* ensure all is zero'd */
|
||||
memset(&hdr, 0, sizeof(mca_oob_usock_hdr_t));
|
||||
|
||||
if (usock_peer_recv_blocking(peer, sd, &hdr, sizeof(mca_oob_usock_hdr_t))) {
|
||||
if (NULL != peer) {
|
||||
/* If the peer state is CONNECT_ACK, then we were waiting for
|
||||
* the connection to be ack'd
|
||||
*/
|
||||
if (peer->state != MCA_OOB_USOCK_CONNECT_ACK) {
|
||||
/* handshake broke down - abort this connection */
|
||||
opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), sd);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* unable to complete the recv */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s unable to complete recv of connect-ack from %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name), sd);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
/* if the requestor wanted the header returned, then do so now */
|
||||
if (NULL != dhdr) {
|
||||
*dhdr = hdr;
|
||||
}
|
||||
|
||||
if (MCA_OOB_USOCK_PROBE == hdr.type) {
|
||||
/* send a header back */
|
||||
hdr.type = MCA_OOB_USOCK_PROBE;
|
||||
hdr.dst = hdr.origin;
|
||||
hdr.origin = *ORTE_PROC_MY_NAME;
|
||||
usock_peer_send_blocking(peer, sd, &hdr, sizeof(mca_oob_usock_hdr_t));
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (hdr.type != MCA_OOB_USOCK_IDENT) {
|
||||
opal_output(0, "usock_peer_recv_connect_ack: invalid header type: %d\n", hdr.type);
|
||||
if (NULL != peer) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
} else {
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
}
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack recvd from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* if we don't already have it, get the peer */
|
||||
if (NULL == peer) {
|
||||
peer = mca_oob_usock_peer_lookup(&hdr.origin);
|
||||
if (NULL == peer) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s mca_oob_usock_recv_connect: connection from new peer",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
peer = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
peer->name = hdr.origin;
|
||||
peer->state = MCA_OOB_USOCK_ACCEPTING;
|
||||
peer->sd = sd;
|
||||
memcpy(&ui64, &peer->name, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, peer)) {
|
||||
OBJ_RELEASE(peer);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
} else {
|
||||
/* check for a race condition - if I was in the process of
|
||||
* creating a connection to the peer, or have already established
|
||||
* such a connection, then we need to reject this connection. We will
|
||||
* let the higher ranked process retry - if I'm the lower ranked
|
||||
* process, I'll simply defer until I receive the request
|
||||
*/
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECTING == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECT_ACK == peer->state) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s SIMUL CONNECTION WITH %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr.origin));
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
if (0 < peer->sd) {
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
peer->sd = -1;
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
peer->retries = 0;
|
||||
cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &hdr.origin, ORTE_PROC_MY_NAME);
|
||||
if (OPAL_VALUE1_GREATER == cmpval) {
|
||||
/* force the other end to retry the connection */
|
||||
peer->state = MCA_OOB_USOCK_UNCONNECTED;
|
||||
return ORTE_ERR_UNREACH;
|
||||
} else {
|
||||
/* retry the connection */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* compare the peers name to the expected value */
|
||||
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->name, &hdr.origin)) {
|
||||
opal_output(0, "%s usock_peer_recv_connect_ack: "
|
||||
"received unexpected process identifier %s from %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(hdr.origin)),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack header from %s is okay",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* get the authentication and version payload */
|
||||
if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (!usock_peer_recv_blocking(peer, sd, msg, hdr.nbytes)) {
|
||||
/* unable to complete the recv */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s unable to complete recv of connect-ack from %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->sd);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
/* check that this is from a matching version */
|
||||
version = (char*)(msg);
|
||||
if (0 != strcmp(version, orte_version_string)) {
|
||||
opal_output(0, "%s usock_peer_recv_connect_ack: "
|
||||
"received different version from %s: %s instead of %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
version, orte_version_string);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack version from %s matches ours",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* check security token */
|
||||
cred = (char*)(msg + strlen(version) + 1);
|
||||
credsize = hdr.nbytes - strlen(version) - 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, &peer->auth_method))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
free(msg);
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack %s authenticated",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* if the requestor wanted the header returned, then they
|
||||
* will complete their processing
|
||||
*/
|
||||
if (NULL != dhdr) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_set_module);
|
||||
|
||||
/* connected */
|
||||
usock_peer_connected(peer);
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
mca_oob_usock_peer_dump(peer, "connected");
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup peer state to reflect that connection has been established,
|
||||
* and start any pending sends.
|
||||
*/
|
||||
static void usock_peer_connected(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_connected on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
peer->state = MCA_OOB_USOCK_CONNECTED;
|
||||
|
||||
/* initiate send of first message on queue */
|
||||
if (NULL == peer->send_msg) {
|
||||
peer->send_msg = (mca_oob_usock_send_t*)
|
||||
opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove any event registrations associated with the socket
|
||||
* and update the peer state to reflect the connection has
|
||||
* been closed.
|
||||
*/
|
||||
void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
mca_oob_usock_send_t *snd;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_close for %s sd %d state %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->sd, mca_oob_usock_state_print(peer->state));
|
||||
|
||||
peer->state = MCA_OOB_USOCK_CLOSED;
|
||||
|
||||
/* release the socket */
|
||||
close(peer->sd);
|
||||
peer->sd = -1;
|
||||
|
||||
/* inform the component-level that we have lost a connection so
|
||||
* it can decide what to do about it.
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_lost_connection);
|
||||
|
||||
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
||||
/* nothing more to do */
|
||||
return;
|
||||
}
|
||||
|
||||
/* FIXME: push any queued messages back onto the OOB for retry - note that
|
||||
* this must be done after the prior call to ensure that the component
|
||||
* processes the "lost connection" notice before the OOB begins to
|
||||
* handle these recycled messages. This prevents us from unintentionally
|
||||
* attempting to send the message again across the now-failed interface
|
||||
*/
|
||||
if (NULL != peer->send_msg) {
|
||||
}
|
||||
while (NULL != (snd = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue))) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A blocking recv on a non-blocking socket. Used to receive the small amount of connection
|
||||
* information that identifies the peers endpoint.
|
||||
*/
|
||||
static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size)
|
||||
{
|
||||
unsigned char* ptr = (unsigned char*)data;
|
||||
size_t cnt = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s waiting for connect ack from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
while (cnt < size) {
|
||||
int retval = recv(sd, (char *)ptr+cnt, size-cnt, 0);
|
||||
|
||||
/* remote closed connection */
|
||||
if (retval == 0) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_recv_blocking: "
|
||||
"peer closed connection: peer state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
||||
(NULL == peer) ? 0 : peer->state);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* socket is non-blocking so handle errors */
|
||||
if (retval < 0) {
|
||||
if (opal_socket_errno != EINTR &&
|
||||
opal_socket_errno != EAGAIN &&
|
||||
opal_socket_errno != EWOULDBLOCK) {
|
||||
if (peer->state == MCA_OOB_USOCK_CONNECT_ACK) {
|
||||
/* If we overflow the listen backlog, it's
|
||||
possible that even though we finished the three
|
||||
way handshake, the remote host was unable to
|
||||
transition the connection from half connected
|
||||
(received the initial SYN) to fully connected
|
||||
(in the listen backlog). We likely won't see
|
||||
the failure until we try to receive, due to
|
||||
timing and the like. The first thing we'll get
|
||||
in that case is a RST packet, which receive
|
||||
will turn into a connection reset by peer
|
||||
errno. In that case, leave the socket in
|
||||
CONNECT_ACK and propogate the error up to
|
||||
recv_connect_ack, who will try to establish the
|
||||
connection again */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect ack received error %s from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
strerror(opal_socket_errno),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
return false;
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s usock_peer_recv_blocking: "
|
||||
"recv() failed for %s: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
if (NULL != peer) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
} else {
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
cnt += retval;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect ack received from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Routine for debugging to print the connection state and socket options
|
||||
*/
|
||||
void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg)
|
||||
{
|
||||
char buff[255];
|
||||
int nodelay,flags;
|
||||
|
||||
if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "usock_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
#if defined(USOCK_NODELAY)
|
||||
optlen = sizeof(nodelay);
|
||||
if (getsockopt(peer->sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) {
|
||||
opal_output(0, "usock_peer_dump: USOCK_NODELAY option: %s (%d)\n",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
#else
|
||||
nodelay = 0;
|
||||
#endif
|
||||
|
||||
snprintf(buff, sizeof(buff), "%s-%s %s: nodelay %d flags %08x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg, nodelay, flags);
|
||||
opal_output(0, "%s", buff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accept incoming connection - if not already connected
|
||||
*/
|
||||
|
||||
bool mca_oob_usock_peer_accept(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:peer_accept called for peer %s in state %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name),
|
||||
mca_oob_usock_state_print(peer->state), peer->sd);
|
||||
|
||||
if (peer->state != MCA_OOB_USOCK_CONNECTED) {
|
||||
|
||||
usock_peer_event_init(peer);
|
||||
|
||||
if (usock_peer_send_connect_ack(peer) != ORTE_SUCCESS) {
|
||||
opal_output(0, "%s-%s usock_peer_accept: "
|
||||
"usock_peer_send_connect_ack failed\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_set_module);
|
||||
|
||||
usock_peer_connected(peer);
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
/* if a message is waiting to be sent, ensure the send event is active */
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
mca_oob_usock_peer_dump(peer, "accepted");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:peer_accept ignored for peer %s in state %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name),
|
||||
mca_oob_usock_state_print(peer->state), peer->sd);
|
||||
return false;
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_CONNECTION_H_
|
||||
#define _MCA_OOB_USOCK_CONNECTION_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_peer.h"
|
||||
|
||||
/* State machine for connection operations */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
opal_event_t ev;
|
||||
} mca_oob_usock_conn_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_conn_op_t);
|
||||
|
||||
#define CLOSE_THE_SOCKET(socket) \
|
||||
do { \
|
||||
shutdown(socket, 2); \
|
||||
close(socket); \
|
||||
} while(0)
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_CONN_STATE(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] connect to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((&(p)->name))); \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
cop->peer = (p); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &cop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), cop); \
|
||||
opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&cop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_ACCEPT_STATE(s, a, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &cop->ev, s, \
|
||||
OPAL_EV_READ, (cbfunc), cop); \
|
||||
opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_add(&cop->ev, 0); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_RETRY_USOCK_CONN_STATE(p, cbfunc, tv) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] retry connect to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((&(p)->name))); \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
cop->peer = (p); \
|
||||
opal_event_evtimer_set(mca_oob_usock_module.ev_base, \
|
||||
&cop->ev, \
|
||||
(cbfunc), cop); \
|
||||
opal_event_evtimer_add(&cop->ev, (tv)); \
|
||||
} while(0);
|
||||
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg);
|
||||
ORTE_MODULE_DECLSPEC bool mca_oob_usock_peer_accept(mca_oob_usock_peer_t* peer);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t* peer);
|
||||
ORTE_MODULE_DECLSPEC int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* peer,
|
||||
int sd, mca_oob_usock_hdr_t *hdr);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_CONNECTION_H_ */
|
@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_HDR_H_
|
||||
#define _MCA_OOB_USOCK_HDR_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
/* define several internal-only message
|
||||
* types this component uses for its own
|
||||
* handshake operations, plus one indicating
|
||||
* the message came from an external (to
|
||||
* this component) source
|
||||
*/
|
||||
typedef enum {
|
||||
MCA_OOB_USOCK_IDENT,
|
||||
MCA_OOB_USOCK_PROBE,
|
||||
MCA_OOB_USOCK_PING,
|
||||
MCA_OOB_USOCK_USER
|
||||
} mca_oob_usock_msg_type_t;
|
||||
|
||||
/* header for usock msgs */
|
||||
typedef struct {
|
||||
/* the original sender */
|
||||
orte_process_name_t origin;
|
||||
/* the intended final recipient */
|
||||
orte_process_name_t dst;
|
||||
/* type of message */
|
||||
mca_oob_usock_msg_type_t type;
|
||||
/* the rml tag where this message is headed */
|
||||
orte_rml_tag_t tag;
|
||||
/* the rml channel to which this message is headed */
|
||||
orte_rml_channel_num_t channel;
|
||||
/* msg seq number on the src channel */
|
||||
uint32_t seq_num;
|
||||
/* number of bytes in message */
|
||||
uint32_t nbytes;
|
||||
} mca_oob_usock_hdr_t;
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_HDR_H_ */
|
@ -1,85 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_PEER_H_
|
||||
#define _MCA_OOB_USOCK_PEER_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_sendrecv.h"
|
||||
|
||||
/* object for tracking peers */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
/* although not required, there is enough debug
|
||||
* value that retaining the name makes sense
|
||||
*/
|
||||
orte_process_name_t name;
|
||||
char *auth_method; // how the peer authenticated themselves to use
|
||||
int sd;
|
||||
int retries; // number of times we have tried to connect to this address
|
||||
mca_oob_usock_state_t state;
|
||||
opal_event_t op_event; // used for connecting and operations other than read/write
|
||||
opal_event_t send_event; /**< registration with event thread for send events */
|
||||
bool send_ev_active;
|
||||
opal_event_t recv_event; /**< registration with event thread for recv events */
|
||||
bool recv_ev_active;
|
||||
opal_event_t timer_event; /**< timer for retrying connection failures */
|
||||
bool timer_ev_active;
|
||||
opal_list_t send_queue; /**< list of messages to send */
|
||||
mca_oob_usock_send_t *send_msg; /**< current send in progress */
|
||||
mca_oob_usock_recv_t *recv_msg; /**< current recv in progress */
|
||||
} mca_oob_usock_peer_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_peer_t);
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
} mca_oob_usock_peer_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_peer_op_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_PEER_OP(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_peer_op_t *op; \
|
||||
op = OBJ_NEW(mca_oob_usock_peer_op_t); \
|
||||
op->peer = (p); \
|
||||
opal_event_set(mca_usock_component.ev_base, &op->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), op); \
|
||||
opal_event_set_priority(&op->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_CMP_OP(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_peer_op_t *pop; \
|
||||
pop = OBJ_NEW(mca_oob_usock_peer_op_t); \
|
||||
pop->peer = (p); \
|
||||
opal_event_set(orte_event_base, &pop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), pop); \
|
||||
opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_PEER_H_ */
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_PING_H_
|
||||
#define _MCA_OOB_USOCK_PING_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_sendrecv.h"
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_process_name_t peer;
|
||||
} mca_oob_usock_ping_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_ping_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_PING(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_ping_t *pop; \
|
||||
pop = OBJ_NEW(mca_oob_usock_ping_t); \
|
||||
pop->peer.jobid = (p)->jobid; \
|
||||
pop->peer.vpid = (p)->vpid; \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &pop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), pop); \
|
||||
opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_PING_H_ */
|
@ -1,610 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* In windows, many of the socket functions return an EWOULDBLOCK
|
||||
* instead of \ things like EAGAIN, EINPROGRESS, etc. It has been
|
||||
* verified that this will \ not conflict with other error codes that
|
||||
* are returned by these functions \ under UNIX/Linux environments
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_NET_UIO_H
|
||||
#include <net/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_TCP_H
|
||||
#include <netinet/tcp.h>
|
||||
#endif
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/types.h"
|
||||
#include "opal/mca/backtrace/backtrace.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
|
||||
static int send_bytes(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
mca_oob_usock_send_t* msg = peer->send_msg;
|
||||
int rc;
|
||||
|
||||
while (0 < msg->sdbytes) {
|
||||
rc = write(peer->sd, msg->sdptr, msg->sdbytes);
|
||||
if (rc < 0) {
|
||||
if (opal_socket_errno == EINTR) {
|
||||
continue;
|
||||
} else if (opal_socket_errno == EAGAIN) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_RESOURCE_BUSY;
|
||||
} else if (opal_socket_errno == EWOULDBLOCK) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we hit an error and cannot progress this message */
|
||||
opal_output(0, "%s->%s mca_oob_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno,
|
||||
peer->sd);
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
}
|
||||
/* update location */
|
||||
msg->sdbytes -= rc;
|
||||
msg->sdptr += rc;
|
||||
}
|
||||
/* we sent the full data block */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* A file descriptor is available/ready for send. Check the state
|
||||
* of the socket and take the appropriate action.
|
||||
*/
|
||||
void mca_oob_usock_send_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata;
|
||||
mca_oob_usock_send_t* msg = peer->send_msg;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler called to send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
switch (peer->state) {
|
||||
case MCA_OOB_USOCK_CONNECTING:
|
||||
case MCA_OOB_USOCK_CLOSED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
mca_oob_usock_state_print(peer->state));
|
||||
mca_oob_usock_peer_complete_connect(peer);
|
||||
/* de-activate the send event until the connection
|
||||
* handshake completes
|
||||
*/
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler SENDING TO %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer->send_msg) ? "NULL" : ORTE_NAME_PRINT(&peer->name));
|
||||
if (NULL != msg) {
|
||||
/* if the header hasn't been completely sent, send it */
|
||||
if (!msg->hdr_sent) {
|
||||
if (ORTE_SUCCESS == (rc = send_bytes(peer))) {
|
||||
/* header is completely sent */
|
||||
msg->hdr_sent = true;
|
||||
/* setup to send the data */
|
||||
if (NULL == msg->msg) {
|
||||
/* this was a zero-byte msg - nothing more to do */
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
goto next;
|
||||
} else if (NULL != msg->msg->buffer) {
|
||||
/* send the buffer data as a single block */
|
||||
msg->sdptr = msg->msg->buffer->base_ptr;
|
||||
msg->sdbytes = msg->msg->buffer->bytes_used;
|
||||
} else if (NULL != msg->msg->iov) {
|
||||
/* start with the first iovec */
|
||||
msg->sdptr = msg->msg->iov[0].iov_base;
|
||||
msg->sdbytes = msg->msg->iov[0].iov_len;
|
||||
msg->iovnum = 0;
|
||||
} else {
|
||||
msg->sdptr = msg->msg->data;
|
||||
msg->sdbytes = msg->msg->count;
|
||||
}
|
||||
/* fall thru and let the send progress */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: unable to send header",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
msg->msg->status = rc;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
/* progress the data transmission */
|
||||
if (msg->hdr_sent) {
|
||||
if (ORTE_SUCCESS == (rc = send_bytes(peer))) {
|
||||
/* this block is complete */
|
||||
if (NULL != msg->msg->buffer) {
|
||||
/* we are done - notify the RML */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
msg->msg->status = ORTE_SUCCESS;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
} else if (NULL != msg->msg->data) {
|
||||
/* this was a relay message - nothing more to do */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
} else {
|
||||
/* rotate to the next iovec */
|
||||
msg->iovnum++;
|
||||
if (msg->iovnum < msg->msg->count) {
|
||||
msg->sdptr = msg->msg->iov[msg->iovnum].iov_base;
|
||||
msg->sdbytes = msg->msg->iov[msg->iovnum].iov_len;
|
||||
/* exit this event to give the event lib
|
||||
* a chance to progress any other pending
|
||||
* actions
|
||||
*/
|
||||
return;
|
||||
} else {
|
||||
/* this message is complete - notify the RML */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
msg->msg->status = ORTE_SUCCESS;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
}
|
||||
}
|
||||
/* fall thru to queue the next message */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: unable to send message ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
msg->msg->status = rc;
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
next:
|
||||
/* if current message completed - progress any pending sends by
|
||||
* moving the next in the queue into the "on-deck" position. Note
|
||||
* that this doesn't mean we send the message right now - we will
|
||||
* wait for another send_event to fire before doing so. This gives
|
||||
* us a chance to service any pending recvs.
|
||||
*/
|
||||
peer->send_msg = (mca_oob_usock_send_t*)
|
||||
opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
/* if nothing else to do unregister for send event notifications */
|
||||
if (NULL == peer->send_msg && peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: invalid connection state (%d) on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state, peer->sd);
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int read_bytes(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* read until all bytes recvd or error */
|
||||
while (0 < peer->recv_msg->rdbytes) {
|
||||
rc = read(peer->sd, peer->recv_msg->rdptr, peer->recv_msg->rdbytes);
|
||||
if (rc < 0) {
|
||||
if(opal_socket_errno == EINTR) {
|
||||
continue;
|
||||
} else if (opal_socket_errno == EAGAIN) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_RESOURCE_BUSY;
|
||||
} else if (opal_socket_errno == EWOULDBLOCK) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we hit an error and cannot progress this message - report
|
||||
* the error back to the RML and let the caller know
|
||||
* to abort this message
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
|
||||
"%s-%s mca_oob_usock_msg_recv: readv failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
// mca_oob_usock_peer_close(peer);
|
||||
// if (NULL != mca_oob_usock.oob_exception_callback) {
|
||||
// mca_oob_usock.oob_exception_callback(&peer->name, ORTE_RML_PEER_DISCONNECTED);
|
||||
//}
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
} else if (rc == 0) {
|
||||
/* the remote peer closed the connection - report that condition
|
||||
* and let the caller know
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
|
||||
"%s-%s mca_oob_usock_msg_recv: peer closed connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
/* stop all events */
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
if (NULL != peer->recv_msg) {
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
peer->recv_msg = NULL;
|
||||
}
|
||||
mca_oob_usock_peer_close(peer);
|
||||
//if (NULL != mca_oob_usock.oob_exception_callback) {
|
||||
// mca_oob_usock.oob_exception_callback(&peer->peer_name, ORTE_RML_PEER_DISCONNECTED);
|
||||
//}
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we were able to read something, so adjust counters and location */
|
||||
peer->recv_msg->rdbytes -= rc;
|
||||
peer->recv_msg->rdptr += rc;
|
||||
}
|
||||
|
||||
/* we read the full data block */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatch to the appropriate action routine based on the state
|
||||
* of the connection with the peer.
|
||||
*/
|
||||
|
||||
void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata;
|
||||
int rc;
|
||||
orte_rml_send_t *snd;
|
||||
|
||||
if (orte_abnormal_term_ordered) {
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
switch (peer->state) {
|
||||
case MCA_OOB_USOCK_CONNECT_ACK:
|
||||
if (ORTE_SUCCESS == (rc = mca_oob_usock_peer_recv_connect_ack(peer, peer->sd, NULL))) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler starting send/recv events",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* we connected! Start the send/recv events */
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
/* if there is a message waiting to be sent, queue it */
|
||||
if (NULL == peer->send_msg) {
|
||||
peer->send_msg = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
/* update our state */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTED;
|
||||
} else {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s UNABLE TO COMPLETE CONNECT ACK WITH %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler CONNECTED",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* allocate a new message and setup for recv */
|
||||
if (NULL == peer->recv_msg) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler allocate new recv msg",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
peer->recv_msg = OBJ_NEW(mca_oob_usock_recv_t);
|
||||
if (NULL == peer->recv_msg) {
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to allocate recv message\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
return;
|
||||
}
|
||||
/* start by reading the header */
|
||||
peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
|
||||
peer->recv_msg->rdbytes = sizeof(mca_oob_usock_hdr_t);
|
||||
}
|
||||
/* if the header hasn't been completely read, read it */
|
||||
if (!peer->recv_msg->hdr_recvd) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler read hdr",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
|
||||
/* completed reading the header */
|
||||
peer->recv_msg->hdr_recvd = true;
|
||||
/* if this is a zero-byte message, then we are done */
|
||||
if (0 == peer->recv_msg->hdr.nbytes) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag);
|
||||
peer->recv_msg->data = NULL; // make sure
|
||||
peer->recv_msg->rdptr = NULL;
|
||||
peer->recv_msg->rdbytes = 0;
|
||||
} else {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler allocate data region of size %lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes);
|
||||
/* allocate the data region */
|
||||
peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes);
|
||||
/* point to it */
|
||||
peer->recv_msg->rdptr = peer->recv_msg->data;
|
||||
peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes;
|
||||
}
|
||||
/* fall thru and attempt to read the data */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
/* close the connection */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler error reading bytes - closing connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (peer->recv_msg->hdr_recvd) {
|
||||
/* continue to read the data block - we start from
|
||||
* wherever we left off, which could be at the
|
||||
* beginning or somewhere in the message
|
||||
*/
|
||||
if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
|
||||
/* we recvd all of the message */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECVD COMPLETE MESSAGE FROM %s OF %d BYTES FOR DEST %s TAG %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin),
|
||||
(int)peer->recv_msg->hdr.nbytes,
|
||||
ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
|
||||
peer->recv_msg->hdr.tag);
|
||||
/* am I the intended recipient? */
|
||||
if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||
peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* yes - post it to the RML for delivery */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s DELIVERING TO RML",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
|
||||
peer->recv_msg->hdr.seq_num,
|
||||
peer->recv_msg->data,
|
||||
peer->recv_msg->hdr.nbytes);
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
} else {
|
||||
/* no - we don't route things, so we promote this
|
||||
* back to the OOB and let another transport move
|
||||
* it along. If we are a daemon and it is intended
|
||||
* for another of our local procs, it will just come
|
||||
* back to us and be handled then
|
||||
*/
|
||||
snd = OBJ_NEW(orte_rml_send_t);
|
||||
snd->dst = peer->recv_msg->hdr.dst;
|
||||
snd->origin = peer->recv_msg->hdr.origin;
|
||||
snd->tag = peer->recv_msg->hdr.tag;
|
||||
snd->data = peer->recv_msg->data;
|
||||
snd->seq_num = peer->recv_msg->hdr.seq_num;
|
||||
snd->count = peer->recv_msg->hdr.nbytes;
|
||||
snd->cbfunc.iov = NULL;
|
||||
snd->cbdata = NULL;
|
||||
/* activate the OOB send state */
|
||||
ORTE_OOB_SEND(snd);
|
||||
/* protect the data */
|
||||
peer->recv_msg->data = NULL;
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
return;
|
||||
}
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to recv message",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
/* turn off the recv event */
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: invalid socket state(%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state);
|
||||
// mca_oob_usock_peer_close(peer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void snd_cons(mca_oob_usock_send_t *ptr)
|
||||
{
|
||||
ptr->msg = NULL;
|
||||
ptr->data = NULL;
|
||||
ptr->hdr_sent = false;
|
||||
ptr->iovnum = 0;
|
||||
ptr->sdptr = NULL;
|
||||
ptr->sdbytes = 0;
|
||||
}
|
||||
/* we don't destruct any RML msg that is
|
||||
* attached to our send as the RML owns
|
||||
* that memory. However, if we relay a
|
||||
* msg, the data in the relay belongs to
|
||||
* us and must be free'd
|
||||
*/
|
||||
static void snd_des(mca_oob_usock_send_t *ptr)
|
||||
{
|
||||
if (NULL != ptr->data) {
|
||||
free(ptr->data);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_send_t,
|
||||
opal_list_item_t,
|
||||
snd_cons, snd_des);
|
||||
|
||||
static void rcv_cons(mca_oob_usock_recv_t *ptr)
|
||||
{
|
||||
ptr->hdr_recvd = false;
|
||||
ptr->rdptr = NULL;
|
||||
ptr->rdbytes = 0;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_recv_t,
|
||||
opal_list_item_t,
|
||||
rcv_cons, NULL);
|
||||
|
||||
static void err_cons(mca_oob_usock_msg_error_t *ptr)
|
||||
{
|
||||
ptr->rmsg = NULL;
|
||||
ptr->snd = NULL;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_msg_error_t,
|
||||
opal_object_t,
|
||||
err_cons, NULL);
|
||||
|
@ -1,254 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_SENDRECV_H_
|
||||
#define _MCA_OOB_USOCK_SENDRECV_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_hdr.h"
|
||||
|
||||
/* usock structure for sending a message */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
orte_rml_send_t *msg;
|
||||
char *data;
|
||||
bool hdr_sent;
|
||||
int iovnum;
|
||||
char *sdptr;
|
||||
size_t sdbytes;
|
||||
} mca_oob_usock_send_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_send_t);
|
||||
|
||||
/* usock structure for recving a message */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
bool hdr_recvd;
|
||||
char *data;
|
||||
char *rdptr;
|
||||
size_t rdbytes;
|
||||
} mca_oob_usock_recv_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t);
|
||||
|
||||
/* Queue a message to be sent to a specified peer. The macro
|
||||
* checks to see if a message is already in position to be
|
||||
* sent - if it is, then the message provided is simply added
|
||||
* to the peer's message queue. If not, then the provided message
|
||||
* is placed in the "ready" position
|
||||
*
|
||||
* If the provided boolean is true, then the send event for the
|
||||
* peer is checked and activated if not already active. This allows
|
||||
* the macro to either immediately send the message, or to queue
|
||||
* it as "pending" for later transmission - e.g., after the
|
||||
* connection procedure is completed
|
||||
*
|
||||
* p => pointer to mca_oob_usock_peer_t
|
||||
* s => pointer to mca_oob_usock_send_t
|
||||
* f => true if send event is to be activated
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_MSG(p, s, f) \
|
||||
do { \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue msg to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((s)->hdr.dst))); \
|
||||
/* if there is no message on-deck, put this one there */ \
|
||||
if (NULL == (p)->send_msg) { \
|
||||
(p)->send_msg = (s); \
|
||||
} else { \
|
||||
/* add it to the queue */ \
|
||||
opal_list_append(&(p)->send_queue, &(s)->super); \
|
||||
} \
|
||||
if ((f)) { \
|
||||
/* if we aren't connected, then start connecting */ \
|
||||
if (MCA_OOB_USOCK_CONNECTED != (p)->state) { \
|
||||
(p)->state = MCA_OOB_USOCK_CONNECTING; \
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE((p), \
|
||||
mca_oob_usock_peer_try_connect); \
|
||||
} else { \
|
||||
/* ensure the send event is active */ \
|
||||
if (!(p)->send_ev_active) { \
|
||||
opal_event_add(&(p)->send_event, 0); \
|
||||
(p)->send_ev_active = true; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}while(0);
|
||||
|
||||
/* queue a message to be sent by one of our modules - must
|
||||
* provide the following params:
|
||||
*
|
||||
* m - the RML message to be sent
|
||||
* p - the final recipient
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_SEND(m, p) \
|
||||
do { \
|
||||
mca_oob_usock_send_t *msg; \
|
||||
int i; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue send to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((m)->dst))); \
|
||||
msg = OBJ_NEW(mca_oob_usock_send_t); \
|
||||
/* setup the header */ \
|
||||
msg->hdr.origin = (m)->origin; \
|
||||
msg->hdr.dst = (m)->dst; \
|
||||
msg->hdr.type = MCA_OOB_USOCK_USER; \
|
||||
msg->hdr.tag = (m)->tag; \
|
||||
msg->hdr.seq_num = (m)->seq_num; \
|
||||
/* point to the actual message */ \
|
||||
msg->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
if (NULL != (m)->buffer) { \
|
||||
msg->hdr.nbytes = (m)->buffer->bytes_used; \
|
||||
} else if (NULL != (m)->iov) { \
|
||||
msg->hdr.nbytes = 0; \
|
||||
for (i=0; i < (m)->count; i++) { \
|
||||
msg->hdr.nbytes += (m)->iov[i].iov_len; \
|
||||
} \
|
||||
} else { \
|
||||
msg->hdr.nbytes = (m)->count; \
|
||||
} \
|
||||
/* start the send with the header */ \
|
||||
msg->sdptr = (char*)&msg->hdr; \
|
||||
msg->sdbytes = sizeof(mca_oob_usock_hdr_t); \
|
||||
/* add to the msg queue for this peer */ \
|
||||
MCA_OOB_USOCK_QUEUE_MSG((p), msg, true); \
|
||||
}while(0);
|
||||
|
||||
/* queue a message to be sent by one of our modules upon completing
|
||||
* the connection process - must provide the following params:
|
||||
*
|
||||
* m - the RML message to be sent
|
||||
* p - the final recipient
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_PENDING(m, p) \
|
||||
do { \
|
||||
mca_oob_usock_send_t *msg; \
|
||||
int i; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue pending to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((m)->dst))); \
|
||||
msg = OBJ_NEW(mca_oob_usock_send_t); \
|
||||
/* setup the header */ \
|
||||
msg->hdr.origin = (m)->origin; \
|
||||
msg->hdr.dst = (m)->dst; \
|
||||
msg->hdr.type = MCA_OOB_USOCK_USER; \
|
||||
msg->hdr.tag = (m)->tag; \
|
||||
msg->hdr.seq_num = (m)->seq_num; \
|
||||
/* point to the actual message */ \
|
||||
msg->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
if (NULL != (m)->buffer) { \
|
||||
msg->hdr.nbytes = (m)->buffer->bytes_used; \
|
||||
} else if (NULL != (m)->iov) { \
|
||||
msg->hdr.nbytes = 0; \
|
||||
for (i=0; i < (m)->count; i++) { \
|
||||
msg->hdr.nbytes += (m)->iov[i].iov_len; \
|
||||
} \
|
||||
} else { \
|
||||
msg->hdr.nbytes = (m)->count; \
|
||||
} \
|
||||
/* start the send with the header */ \
|
||||
msg->sdptr = (char*)&msg->hdr; \
|
||||
msg->sdbytes = sizeof(mca_oob_usock_hdr_t); \
|
||||
/* add to the msg queue for this peer */ \
|
||||
MCA_OOB_USOCK_QUEUE_MSG((p), msg, false); \
|
||||
}while(0);
|
||||
|
||||
/* State machine for processing message */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
int reps;
|
||||
orte_rml_send_t *msg;
|
||||
} mca_oob_usock_msg_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_msg_op_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_POST_SEND(ms, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_msg_op_t *mop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] post send to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((ms)->dst))); \
|
||||
mop = OBJ_NEW(mca_oob_usock_msg_op_t); \
|
||||
mop->msg = (ms); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &mop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), mop); \
|
||||
opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_rml_send_t *rmsg;
|
||||
mca_oob_usock_send_t *snd;
|
||||
orte_process_name_t hop;
|
||||
} mca_oob_usock_msg_error_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_msg_error_t);
|
||||
|
||||
/* macro for reporting delivery errors back to the
|
||||
* component for error handling
|
||||
*
|
||||
* s -> mca_oob_usock_send_t that failed (can be NULL)
|
||||
* r -> orte_rml_send_t that failed (can be NULL)
|
||||
* h -> process name for the next recipient
|
||||
* cbfunc -> function to handle the callback
|
||||
*/
|
||||
#define ORTE_ACTIVATE_USOCK_MSG_ERROR(s, r, h, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_msg_error_t *mop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] post msg error to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((h))); \
|
||||
mop = OBJ_NEW(mca_oob_usock_msg_error_t); \
|
||||
if (NULL != (s)) { \
|
||||
mop->snd = (s); \
|
||||
} else if (NULL != (r)) { \
|
||||
/* use a proxy so we can pass NULL into the macro */ \
|
||||
mop->rmsg = (r); \
|
||||
} \
|
||||
mop->hop.jobid = (h)->jobid; \
|
||||
mop->hop.vpid = (h)->vpid; \
|
||||
opal_event_set(orte_event_base, &mop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), mop); \
|
||||
opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_SENDRECV_H_ */
|
@ -1,7 +0,0 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: INTEL
|
||||
status: maintenance
|
@ -27,7 +27,7 @@
|
||||
*
|
||||
* RML Framework maintenence interface
|
||||
*
|
||||
* Interface for starting / stopping / controlling the RML framework,
|
||||
* Interface for starting / stopping / controlling the RML framework,307
|
||||
* as well as support for modifying RML datatypes.
|
||||
*
|
||||
* @note The only RML datatype exposed to the user is the RML tag.
|
||||
@ -91,16 +91,17 @@ ORTE_DECLSPEC void orte_rml_base_comm_stop(void);
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
int pri;
|
||||
orte_rml_base_module_t *module;
|
||||
mca_base_component_t *component;
|
||||
orte_rml_component_t *component;
|
||||
} orte_rml_base_active_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_base_active_t);
|
||||
|
||||
/* a global struct containing framework-level values */
|
||||
typedef struct {
|
||||
opal_list_t actives; /* list to hold the active plugins */
|
||||
opal_list_t actives; /* list to hold the active components */
|
||||
opal_pointer_array_t conduits; /* array to hold the open conduits */
|
||||
opal_list_t posted_recvs;
|
||||
opal_list_t unmatched_msgs;
|
||||
orte_rml_conduit_t def_conduit_id;
|
||||
#if OPAL_ENABLE_TIMING
|
||||
bool timing;
|
||||
#endif
|
||||
@ -108,17 +109,6 @@ typedef struct {
|
||||
ORTE_DECLSPEC extern orte_rml_base_t orte_rml_base;
|
||||
|
||||
|
||||
/**
|
||||
* List of components that are available to the RML
|
||||
*
|
||||
* List of components that are currently available to the RML
|
||||
* framework. Useable between calls to orte_rml_base_open() and
|
||||
* orte_rml_base_close().
|
||||
*
|
||||
* @note This list should not be used by code outside the RML base.
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_list_t orte_rml_base_components;
|
||||
|
||||
/* structure to send RML messages - used internally */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
@ -153,6 +143,8 @@ typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_rml_send_t send;
|
||||
/* conduit_id */
|
||||
orte_rml_conduit_t conduit_id;
|
||||
} orte_rml_send_request_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_send_request_t);
|
||||
|
||||
@ -222,21 +214,6 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
|
||||
opal_event_active(&(m)->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
/*
|
||||
reactivates rcv msg on the unposted rcvd list when a match occurs
|
||||
need a different path as the QoS recv processing was already done
|
||||
for this process
|
||||
*/
|
||||
#define ORTE_RML_REACTIVATE_MESSAGE(m) \
|
||||
do { \
|
||||
/* setup the event */ \
|
||||
opal_event_set(orte_event_base, &(m)->ev, -1, \
|
||||
OPAL_EV_WRITE, \
|
||||
orte_rml_base_reprocess_msg, (m)); \
|
||||
opal_event_set_priority(&(m)->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&(m)->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_RML_SEND_COMPLETE(m) \
|
||||
do { \
|
||||
opal_output_verbose(5, orte_rml_base_framework.framework_output, \
|
||||
@ -264,47 +241,60 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
|
||||
/* common implementations */
|
||||
ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_process_error(int fd, short flags, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg);
|
||||
|
||||
|
||||
/* Stub API interfaces to cycle through active plugins and call highest priority */
|
||||
ORTE_DECLSPEC int orte_rml_API_enable_comm(void);
|
||||
ORTE_DECLSPEC void orte_rml_API_finalize(void);
|
||||
ORTE_DECLSPEC char* orte_rml_API_get_contact_info(void);
|
||||
ORTE_DECLSPEC void orte_rml_API_set_contact_info(const char *contact_info);
|
||||
ORTE_DECLSPEC int orte_rml_API_ping(const char* contact_info, const struct timeval* tv);
|
||||
ORTE_DECLSPEC int orte_rml_API_send_nb(orte_process_name_t* peer, struct iovec* msg,
|
||||
int count, orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc, void* cbdata);
|
||||
ORTE_DECLSPEC int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
/* Stub API interfaces to cycle through active plugins */
|
||||
char* orte_rml_API_get_contact_info(void);
|
||||
void orte_rml_API_set_contact_info(const char *contact_info);
|
||||
|
||||
int orte_rml_API_ping(const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
int orte_rml_API_ping_conduit(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
int orte_rml_API_send_nb(orte_process_name_t* peer, struct iovec* msg,
|
||||
int count, orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc, void* cbdata);
|
||||
int orte_rml_API_send_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer, struct iovec* msg,
|
||||
int count, orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc, void* cbdata);
|
||||
|
||||
int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
int orte_rml_API_send_buffer_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
|
||||
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
|
||||
|
||||
ORTE_DECLSPEC int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc);
|
||||
void orte_rml_API_purge(orte_process_name_t *peer);
|
||||
|
||||
ORTE_DECLSPEC int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc);
|
||||
int orte_rml_API_query_transports(opal_list_t *providers);
|
||||
|
||||
ORTE_DECLSPEC int orte_rml_API_ft_event(int state);
|
||||
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_API_purge(orte_process_name_t *peer);
|
||||
void orte_rml_API_close_conduit(orte_rml_conduit_t id);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_RML_BASE_H */
|
||||
#endif /* MCA_RML_BASE_H */
|
||||
|
@ -139,7 +139,7 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data)
|
||||
}
|
||||
|
||||
/* if we changed it, then we better update the routing
|
||||
* plan so daemon collectives work correctly
|
||||
* plan so daemon collectives work correctly.
|
||||
*/
|
||||
orte_routed.update_routing_plan();
|
||||
}
|
||||
|
@ -38,21 +38,22 @@
|
||||
|
||||
|
||||
/* Initialising stub fns in the global var used by other modules */
|
||||
orte_rml_base_module_t orte_rml = {
|
||||
orte_rml_API_enable_comm,
|
||||
orte_rml_API_finalize,
|
||||
orte_rml_API_get_contact_info,
|
||||
orte_rml_API_set_contact_info,
|
||||
orte_rml_API_ping,
|
||||
orte_rml_API_send_nb,
|
||||
orte_rml_API_send_buffer_nb,
|
||||
orte_rml_API_recv_nb,
|
||||
orte_rml_API_recv_buffer_nb,
|
||||
orte_rml_API_recv_cancel,
|
||||
orte_rml_API_add_exception_handler,
|
||||
orte_rml_API_del_exception_handler,
|
||||
orte_rml_API_ft_event,
|
||||
orte_rml_API_purge
|
||||
orte_rml_base_API_t orte_rml = {
|
||||
.get_contact_info = orte_rml_API_get_contact_info,
|
||||
.set_contact_info = orte_rml_API_set_contact_info,
|
||||
.ping = orte_rml_API_ping,
|
||||
.ping_conduit = orte_rml_API_ping_conduit,
|
||||
.send_nb = orte_rml_API_send_nb,
|
||||
.send_buffer_nb = orte_rml_API_send_buffer_nb,
|
||||
.send_nb_conduit = orte_rml_API_send_nb_conduit,
|
||||
.send_buffer_nb_conduit = orte_rml_API_send_buffer_nb_conduit,
|
||||
.recv_nb = orte_rml_API_recv_nb,
|
||||
.recv_buffer_nb = orte_rml_API_recv_buffer_nb,
|
||||
.recv_cancel = orte_rml_API_recv_cancel,
|
||||
.purge = orte_rml_API_purge,
|
||||
.query_transports = orte_rml_API_query_transports,
|
||||
.open_conduit = orte_rml_API_open_conduit,
|
||||
.close_conduit = orte_rml_API_close_conduit
|
||||
};
|
||||
|
||||
orte_rml_base_t orte_rml_base = {{{0}}};
|
||||
@ -89,16 +90,24 @@ static void cleanup(int sd, short args, void *cbdata)
|
||||
static int orte_rml_base_close(void)
|
||||
{
|
||||
volatile bool active;
|
||||
int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits);
|
||||
orte_rml_base_module_t *mod;
|
||||
orte_rml_component_t *comp;
|
||||
|
||||
orte_rml_base_active_t *active_module;
|
||||
|
||||
/*close the active modules */
|
||||
OPAL_LIST_FOREACH(active_module, &orte_rml_base.actives, orte_rml_base_active_t)
|
||||
/* cycle thru the conduits opened and call each module's finalize */
|
||||
/* The components finalise/close() will be responsible for freeing the module pointers */
|
||||
for (idx = 0; idx < total_conduits ; idx++)
|
||||
{
|
||||
if (NULL != active_module->module->finalize) {
|
||||
active_module->module->finalize();
|
||||
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits,idx))) {
|
||||
/* close the conduit */
|
||||
comp = (orte_rml_component_t*)mod->component;
|
||||
if (NULL != comp && NULL != comp->close_conduit) {
|
||||
comp->close_conduit(mod);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
OPAL_LIST_DESTRUCT(&orte_rml_base.actives)
|
||||
|
||||
/* because the RML posted recvs list is in a separate
|
||||
@ -130,6 +139,8 @@ static int orte_rml_base_open(mca_base_open_flag_t flags)
|
||||
OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&orte_rml_base.conduits,1,INT_MAX,1);
|
||||
|
||||
OPAL_TIMING_INIT(&tm_rml);
|
||||
/* Open up all available components */
|
||||
@ -145,18 +156,15 @@ OBJ_CLASS_INSTANCE(orte_rml_base_active_t,
|
||||
NULL, NULL);
|
||||
|
||||
/**
|
||||
* Function for selecting one component(plugin) from all those that are
|
||||
* available.
|
||||
* Function for ordering the component(plugin) by priority
|
||||
*/
|
||||
int orte_rml_base_select(void)
|
||||
{
|
||||
mca_base_component_list_item_t *cli=NULL;
|
||||
mca_base_component_t *component=NULL;
|
||||
mca_base_module_t *module=NULL;
|
||||
orte_rml_base_module_t *nmodule;
|
||||
orte_rml_component_t *component=NULL;
|
||||
orte_rml_base_active_t *newmodule, *mod;
|
||||
int priority;
|
||||
bool inserted;
|
||||
opal_list_t conduit_attr;
|
||||
|
||||
if (selected) {
|
||||
return ORTE_SUCCESS;
|
||||
@ -164,59 +172,65 @@ int orte_rml_base_select(void)
|
||||
selected = true;
|
||||
|
||||
OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) {
|
||||
component = (mca_base_component_t *) cli->cli_component;
|
||||
component = (orte_rml_component_t*) cli->cli_component;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"orte_rml_base_select: Initializing %s component %s",
|
||||
component->mca_type_name,
|
||||
component->mca_component_name);
|
||||
component->base.mca_type_name,
|
||||
component->base.mca_component_name);
|
||||
|
||||
if (NULL == ((orte_rml_component_t *)component)->rml_init) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"orte_rml_base_select: no init function; ignoring component [%s]",component->mca_component_name);
|
||||
} else {
|
||||
module = (mca_base_module_t *) ((orte_rml_component_t *)component)->rml_init(&priority);
|
||||
if (NULL == module) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"orte_rml_base_select: init returned failure [%s]",component->mca_component_name);
|
||||
continue;
|
||||
}
|
||||
/* add to the list of available components */
|
||||
newmodule = OBJ_NEW(orte_rml_base_active_t);
|
||||
newmodule->pri = component->priority;
|
||||
newmodule->component = component;
|
||||
|
||||
/* based on priority add it to the actives list */
|
||||
nmodule = (orte_rml_base_module_t*) module;
|
||||
/* add to the list of selected modules */
|
||||
newmodule = OBJ_NEW(orte_rml_base_active_t);
|
||||
newmodule->pri = priority;
|
||||
newmodule->module = nmodule;
|
||||
newmodule->component = component;
|
||||
|
||||
/* maintain priority order */
|
||||
inserted = false;
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (priority > mod->pri) {
|
||||
opal_list_insert_pos(&orte_rml_base.actives,
|
||||
(opal_list_item_t*)mod, &newmodule->super);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!inserted) {
|
||||
/* must be lowest priority - add to end */
|
||||
opal_list_append(&orte_rml_base.actives, &newmodule->super);
|
||||
/* maintain priority order */
|
||||
inserted = false;
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (newmodule->pri > mod->pri) {
|
||||
opal_list_insert_pos(&orte_rml_base.actives,
|
||||
(opal_list_item_t*)mod, &newmodule->super);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) {
|
||||
if (!inserted) {
|
||||
/* must be lowest priority - add to end */
|
||||
opal_list_append(&orte_rml_base.actives, &newmodule->super);
|
||||
}
|
||||
}
|
||||
|
||||
if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) {
|
||||
opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* show the prioritized list */
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
opal_output(0, "\tComponent: %s Priority: %d", mod->component->mca_component_name, mod->pri);
|
||||
opal_output(0, "\tComponent: %s Priority: %d", mod->component->base.mca_component_name, mod->pri);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Open the default oob conduit */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Opening the default conduit - oob component",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
OBJ_CONSTRUCT(&conduit_attr, opal_list_t);
|
||||
orte_set_attribute(&conduit_attr, ORTE_RML_INCLUDE_COMP_ATTRIB, ORTE_ATTR_LOCAL,"oob",OPAL_STRING);
|
||||
orte_rml_base.def_conduit_id = orte_rml_API_open_conduit(&conduit_attr);
|
||||
OPAL_LIST_DESTRUCT(&conduit_attr);
|
||||
if (0 <= orte_rml_base.def_conduit_id) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Default conduit (oob) opened with conduit id = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_rml_base.def_conduit_id);
|
||||
} else {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Default conduit (oob) could not be opened",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void orte_rml_send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
@ -315,3 +329,21 @@ static void prq_des(orte_rml_recv_request_t *ptr)
|
||||
OBJ_CLASS_INSTANCE(orte_rml_recv_request_t,
|
||||
opal_object_t,
|
||||
prq_cons, prq_des);
|
||||
|
||||
static void pthcons(orte_rml_pathway_t *p)
|
||||
{
|
||||
p->component = NULL;
|
||||
OBJ_CONSTRUCT(&p->attributes, opal_list_t);
|
||||
OBJ_CONSTRUCT(&p->transports, opal_list_t);
|
||||
}
|
||||
static void pthdes(orte_rml_pathway_t *p)
|
||||
{
|
||||
if (NULL != p->component) {
|
||||
free(p->component);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&p->attributes);
|
||||
OPAL_LIST_DESTRUCT(&p->transports);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_rml_pathway_t,
|
||||
opal_list_item_t,
|
||||
pthcons, pthdes);
|
||||
|
@ -210,7 +210,7 @@ static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all)
|
||||
*/
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &msg->sender, &rcv->peer) &&
|
||||
msg->tag == rcv->tag) {
|
||||
ORTE_RML_REACTIVATE_MESSAGE(msg);
|
||||
ORTE_RML_ACTIVATE_MESSAGE(msg);
|
||||
opal_list_remove_item(&orte_rml_base.unmatched_msgs, item);
|
||||
if (!get_all) {
|
||||
break;
|
||||
@ -233,19 +233,3 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata)
|
||||
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));
|
||||
orte_rml_base_complete_recv_msg(&msg);
|
||||
}
|
||||
|
||||
void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata)
|
||||
{
|
||||
orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output,
|
||||
"%s reprocessing msg received from %s for tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->sender),
|
||||
msg->tag));
|
||||
|
||||
OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes",
|
||||
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));
|
||||
orte_rml_base_complete_recv_msg ( &msg);
|
||||
/* the msg should be matched and released in this path
|
||||
add an assert (msg!= NULL) ?? */
|
||||
}
|
||||
|
@ -37,72 +37,98 @@
|
||||
* The stub API interface functions
|
||||
*/
|
||||
|
||||
/** Enable communication once a process name has been assigned */
|
||||
int orte_rml_API_enable_comm(void)
|
||||
/** Open a conduit - check if the ORTE_RML_INCLUDE_COMP attribute is provided, this is */
|
||||
/* a comma seperated list of components, try to open the conduit in this order. */
|
||||
/* if the ORTE_RML_INCLUDE_COMP is not provided or this list was not able to open conduit */
|
||||
/* call the open_conduit() of the component in priority order to see if they can use the */
|
||||
/* attribute to open a conduit. */
|
||||
/* Note: The component takes care of checking for duplicate and returning the previously */
|
||||
/* opened module* in case of duplicates. Currently we are saving it in a new conduit_id */
|
||||
/* even if it is duplicate. [ToDo] compare the module* received from component to see if */
|
||||
/* already present in array and return the prev conduit_id instead of adding it again to array */
|
||||
/* @param[in] attributes The attributes is a list of opal_value_t of type OPAL_STRING */
|
||||
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes)
|
||||
{
|
||||
orte_rml_base_active_t *active, *next;
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_component_t *comp;
|
||||
orte_rml_base_module_t *mod, *ourmod=NULL;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:enable_comm",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:open_conduit",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives and let each one enable their comm */
|
||||
OPAL_LIST_FOREACH_SAFE(active, next, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->enable_comm) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->enable_comm())) {
|
||||
/* bozo check - you cannot specify both include and exclude */
|
||||
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, NULL, OPAL_STRING) &&
|
||||
orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, NULL, OPAL_STRING)) {
|
||||
// orte_show_help();
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* cycle thru the actives in priority order and let each one see if they can support this request */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
comp = (orte_rml_component_t *)active->component;
|
||||
if (NULL != comp->open_conduit) {
|
||||
if (NULL != (mod = comp->open_conduit(attributes))) {
|
||||
opal_output_verbose(2, orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:enable_comm Component %s was unable to enable comm",
|
||||
"%s rml:base:open_conduit Component %s provided a conduit",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
active->component->mca_component_name);
|
||||
/* remove this component from our actives */
|
||||
opal_list_remove_item(&orte_rml_base.actives, &active->super);
|
||||
/* give the module a chance to finalize */
|
||||
if (NULL != active->module->finalize) {
|
||||
active->module->finalize();
|
||||
active->component->base.mca_component_name);
|
||||
/* retain this answer */
|
||||
if (NULL != ourmod) {
|
||||
free(ourmod);
|
||||
}
|
||||
OBJ_RELEASE(active);
|
||||
ourmod = mod;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* ensure we still have someone */
|
||||
if (0 < opal_list_get_size(&orte_rml_base.actives)) {
|
||||
return ORTE_SUCCESS;
|
||||
if (NULL != ourmod) {
|
||||
/* we got an answer - store this conduit in our array */
|
||||
rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod);
|
||||
return rc;
|
||||
}
|
||||
return ORTE_ERR_UNREACH;
|
||||
/* we get here if nobody could support it */
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Shutdown the communication system and clean up resources */
|
||||
void orte_rml_API_finalize(void)
|
||||
void orte_rml_API_close_conduit(orte_rml_conduit_t id)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_base_module_t *mod;
|
||||
orte_rml_component_t *comp;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:finalize()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:close_conduit(%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)id);
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->finalize) {
|
||||
active->module->finalize();
|
||||
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) {
|
||||
comp = (orte_rml_component_t*)mod->component;
|
||||
if (NULL != comp && NULL != comp->close_conduit) {
|
||||
comp->close_conduit(mod);
|
||||
}
|
||||
opal_pointer_array_set_item(&orte_rml_base.conduits, id, NULL);
|
||||
free(mod);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Get contact information for local process */
|
||||
char* orte_rml_API_get_contact_info(void)
|
||||
{
|
||||
char **rc = NULL, *tmp;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:get_contact_info()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
/* cycle thru the actives and get their contact info */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->get_contact_info) {
|
||||
tmp = active->module->get_contact_info();
|
||||
if (NULL != active->component->get_contact_info) {
|
||||
tmp = active->component->get_contact_info();
|
||||
if (NULL != tmp) {
|
||||
opal_argv_append_nosize(&rc, tmp);
|
||||
free(tmp);
|
||||
@ -122,44 +148,104 @@ void orte_rml_API_set_contact_info(const char *contact_info)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:set_contact_info()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives and let all modules parse the info
|
||||
* to extract their relevant portions */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->set_contact_info) {
|
||||
active->module->set_contact_info(contact_info);
|
||||
if (NULL != active->component->set_contact_info) {
|
||||
active->component->set_contact_info(contact_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
int orte_rml_API_ping(const char* contact_info,
|
||||
const struct timeval* tv)
|
||||
int orte_rml_API_ping_conduit(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:ping()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:ping(conduit-%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),conduit_id);
|
||||
|
||||
/* cycle thru the actives and see if anyone can confirm connection */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->ping) {
|
||||
rc = active->module->ping(contact_info, tv);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* at least someone can reach this target */
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->ping) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->ping((struct orte_rml_base_module_t*)mod, contact_info, tv);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
int orte_rml_API_ping(const char* contact_info,
|
||||
const struct timeval* tv)
|
||||
{
|
||||
return orte_rml_API_ping_conduit(orte_rml_base.def_conduit_id, contact_info, tv);
|
||||
}
|
||||
|
||||
/** Send non-blocking iovec message through a specific conduit*/
|
||||
int orte_rml_API_send_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_nb_conduit() to peer %s through conduit %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),conduit_id);
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->send_nb) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->send_nb((struct orte_rml_base_module_t*)mod, peer, msg, count, tag, cbfunc, cbdata);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
int orte_rml_API_send_buffer_nb_conduit(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_buffer_nb_conduit() to peer %s through conduit %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),conduit_id);
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->send_buffer_nb) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->send_buffer_nb((struct orte_rml_base_module_t*)mod, peer, buffer, tag, cbfunc, cbdata);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Send non-blocking iovec message through a specific conduit*/
|
||||
int orte_rml_API_send_nb(orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
@ -167,25 +253,7 @@ int orte_rml_API_send_nb(orte_process_name_t* peer,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_nb() to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->send_nb) {
|
||||
rc = active->module->send_nb(peer, msg, count, tag, cbfunc, cbdata);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* someone was able to send it */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
return orte_rml_API_send_nb_conduit(orte_rml_base.def_conduit_id, peer, msg, count, tag, cbfunc, cbdata);
|
||||
}
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
@ -195,48 +263,26 @@ int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_buffer_nb()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->send_buffer_nb) {
|
||||
if (ORTE_SUCCESS == (rc = active->module->send_buffer_nb(peer, buffer, tag, cbfunc, cbdata))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
return orte_rml_API_send_buffer_nb_conduit(orte_rml_base.def_conduit_id, peer, buffer, tag, cbfunc, cbdata);
|
||||
}
|
||||
|
||||
|
||||
/** post a receive for an IOV message */
|
||||
/** post a receive for an IOV message - this is done
|
||||
* strictly in the base, and so it does not go to a module */
|
||||
void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag));
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* cycle thru the actives and give each module a chance
|
||||
* to do whatever module-specific things they need to do */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->recv_nb) {
|
||||
active->module->recv_nb(peer, tag, persistent, cbfunc, cbdata);
|
||||
}
|
||||
}
|
||||
/* now push the request into the event base so we can add
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = false;
|
||||
@ -260,23 +306,14 @@ void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_buffer_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag));
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* cycle thru the actives and give each module a chance
|
||||
* to do whatever module-specific things they need to do */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->recv_buffer_nb) {
|
||||
active->module->recv_buffer_nb(peer, tag, persistent, cbfunc, cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
/* now push the request into the event base so we can add
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = true;
|
||||
@ -296,23 +333,14 @@ void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
/** Cancel posted non-blocking receive */
|
||||
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_cancel for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag));
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* cycle thru the actives and give each module a chance
|
||||
* to do whatever module-specific things they need to do */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->recv_cancel) {
|
||||
active->module->recv_cancel(peer,tag);
|
||||
}
|
||||
}
|
||||
|
||||
/* now push the request into the event base so we can remove
|
||||
/* push the request into the event base so we can remove
|
||||
* the receive from our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->cancel = true;
|
||||
@ -326,83 +354,46 @@ void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
|
||||
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
|
||||
}
|
||||
|
||||
/** Add callback for communication exception */
|
||||
int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
int rc = ORTE_ERROR;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:add_exception_handler()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->add_exception_handler) {
|
||||
if (ORTE_SUCCESS == (rc = active->module->add_exception_handler(cbfunc))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Delete callback for communication exception */
|
||||
int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
int rc = ORTE_ERROR;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:del_exception_handler()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and see who can send it */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->del_exception_handler) {
|
||||
if (ORTE_SUCCESS == (rc = active->module->del_exception_handler(cbfunc))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Fault tolerance handler */
|
||||
int orte_rml_API_ft_event(int state)
|
||||
{
|
||||
int rc = ORTE_ERROR;
|
||||
orte_rml_base_active_t *active;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:ft_event()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and let them all handle this event */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->ft_event) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/** Purge information */
|
||||
void orte_rml_API_purge(orte_process_name_t *peer)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_base_module_t *mod;
|
||||
int i;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:purge() - calling the respective plugin that implements this",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* cycle thru the actives and let everyone purge related info */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->module->purge) {
|
||||
active->module->purge(peer);
|
||||
for (i=0; i < orte_rml_base.conduits.size; i++) {
|
||||
/* get the module */
|
||||
if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, i))) {
|
||||
if (NULL != mod->purge) {
|
||||
mod->purge(peer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int orte_rml_API_query_transports(opal_list_t *providers)
|
||||
{
|
||||
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_pathway_t *p;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:orte_rml_API_query_transports()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->component->query_transports) {
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"\n calling module: %s->query_transports() \n",
|
||||
active->component->base.mca_component_name);
|
||||
if (NULL != (p = active->component->query_transports())) {
|
||||
/* pass the results across */
|
||||
OBJ_RETAIN(p);
|
||||
opal_list_append(providers, &p->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
}
|
||||
|
0
orte/mca/rml/ofi/.opal_ignore
Обычный файл
0
orte/mca/rml/ofi/.opal_ignore
Обычный файл
@ -10,9 +10,7 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved
|
||||
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -21,36 +19,35 @@
|
||||
#
|
||||
|
||||
sources = \
|
||||
oob_usock_component.h \
|
||||
oob_usock.h \
|
||||
oob_usock_component.c \
|
||||
oob_usock_connection.h \
|
||||
oob_usock_sendrecv.h \
|
||||
oob_usock_hdr.h \
|
||||
oob_usock_peer.h \
|
||||
oob_usock_ping.h \
|
||||
oob_usock.c \
|
||||
oob_usock_connection.c \
|
||||
oob_usock_sendrecv.c
|
||||
rml_ofi.h \
|
||||
rml_ofi_request.h \
|
||||
rml_ofi_component.c \
|
||||
rml_ofi_send.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_oob_usock_DSO
|
||||
if MCA_BUILD_orte_rml_ofi_DSO
|
||||
component_noinst =
|
||||
component_install = mca_oob_usock.la
|
||||
component_install = mca_rml_ofi.la
|
||||
else
|
||||
component_noinst = libmca_oob_usock.la
|
||||
component_noinst = libmca_rml_ofi.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ortelibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_oob_usock_la_SOURCES = $(sources)
|
||||
mca_oob_usock_la_LDFLAGS = -module -avoid-version
|
||||
mca_rml_ofi_la_SOURCES = $(sources)
|
||||
mca_rml_ofi_la_LDFLAGS = \
|
||||
$(orte_rml_ofi_LDFLAGS) \
|
||||
-module -avoid-version
|
||||
mca_rml_ofi_la_LIBADD = $(orte_rml_ofi_LIBS) \
|
||||
$(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib@OPAL_LIB_PREFIX@mca_common_libfabric.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_oob_usock_la_SOURCES = $(sources)
|
||||
libmca_oob_usock_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
libmca_rml_ofi_la_SOURCES = $(sources)
|
||||
libmca_rml_ofi_la_LDFLAGS = \
|
||||
$(orte_rml_ofi_LDFLAGS) \
|
||||
-module -avoid-version
|
||||
libmca_rml_ofi_la_LIBADD = $(orte_rml_ofi_LIBS)
|
29
orte/mca/rml/ofi/configure.m4
Обычный файл
29
orte/mca/rml/ofi/configure.m4
Обычный файл
@ -0,0 +1,29 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
#
|
||||
# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_orte_rml_ofi_POST_CONFIG(will_build)
|
||||
# ----------------------------------------
|
||||
# Only require the tag if we're actually going to be built
|
||||
|
||||
# MCA_mtl_ofi_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_rml_ofi_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/rml/ofi/Makefile])
|
||||
|
||||
# ensure we already ran the common libfabric config
|
||||
AC_REQUIRE([MCA_opal_common_libfabric_CONFIG])
|
||||
|
||||
AS_IF([test "$opal_common_libfabric_happy" = "yes"],
|
||||
[$1],
|
||||
[$2])
|
||||
])dnl
|
191
orte/mca/rml/ofi/rml_ofi.h
Обычный файл
191
orte/mca/rml/ofi/rml_ofi.h
Обычный файл
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_RML_OFI_RML_OFI_H
|
||||
#define MCA_RML_OFI_RML_OFI_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
|
||||
#include <rdma/fabric.h>
|
||||
#include <rdma/fi_cm.h>
|
||||
#include <rdma/fi_domain.h>
|
||||
#include <rdma/fi_endpoint.h>
|
||||
#include <rdma/fi_errno.h>
|
||||
#include <rdma/fi_tagged.h>
|
||||
|
||||
#include "rml_ofi_request.h"
|
||||
|
||||
|
||||
/** RML/OFI key values **/
|
||||
/* (char*) ofi socket address (type IN) of the node process is running on */
|
||||
#define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin"
|
||||
/* (char*) ofi socket address (type PSM) of the node process is running on */
|
||||
#define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx"
|
||||
|
||||
// MULTI_BUF_SIZE_FACTOR defines how large the multi recv buffer will be.
|
||||
// In order to use FI_MULTI_RECV feature efficiently, we need to have a
|
||||
// large recv buffer so that we don't need to repost the buffer often to
|
||||
// get the remaining data when the buffer is full
|
||||
#define MULTI_BUF_SIZE_FACTOR 128
|
||||
#define MIN_MULTI_BUF_SIZE (1024 * 1024)
|
||||
|
||||
#define CLOSE_FID(fd) \
|
||||
do { \
|
||||
int _ret = 0; \
|
||||
if (0 != (fd)) { \
|
||||
_ret = fi_close(&(fd)->fid); \
|
||||
fd = NULL; \
|
||||
if (0 != _ret) { \
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output, \
|
||||
" %s - fi_close failed with error- %d", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret); \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
|
||||
#define RML_OFI_RETRY_UNTIL_DONE(FUNC) \
|
||||
do { \
|
||||
do { \
|
||||
ret = FUNC; \
|
||||
if(OPAL_LIKELY(0 == ret)) {break;} \
|
||||
} while(-FI_EAGAIN == ret); \
|
||||
} while(0);
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct orte_rml_ofi_module_t;
|
||||
|
||||
/** This structure will hold the ep and all ofi objects for each transport
|
||||
and also the corresponding fi_info
|
||||
**/
|
||||
typedef struct {
|
||||
|
||||
/** OFI conduit ID **/
|
||||
uint8_t conduit_id;
|
||||
|
||||
/** fi_info for this transport */
|
||||
struct fi_info *fabric_info;
|
||||
|
||||
/** Fabric Domain handle */
|
||||
struct fid_fabric *fabric;
|
||||
|
||||
/** Access Domain handle */
|
||||
struct fid_domain *domain;
|
||||
|
||||
/** Address vector handle */
|
||||
struct fid_av *av;
|
||||
|
||||
/** Completion queue handle */
|
||||
struct fid_cq *cq;
|
||||
|
||||
/** Endpoint to communicate on */
|
||||
struct fid_ep *ep;
|
||||
|
||||
/** Endpoint name */
|
||||
char ep_name[FI_NAME_MAX];
|
||||
|
||||
/** Endpoint name length */
|
||||
size_t epnamelen;
|
||||
|
||||
/** OFI memory region */
|
||||
struct fid_mr *mr_multi_recv;
|
||||
|
||||
/** buffer for tx and rx */
|
||||
void *rxbuf;
|
||||
|
||||
uint64_t rxbuf_size;
|
||||
|
||||
/* event,fd associated with the cq */
|
||||
int fd;
|
||||
|
||||
/*event associated with progress fn */
|
||||
opal_event_t progress_event;
|
||||
bool progress_ev_active;
|
||||
|
||||
struct fi_context rx_ctx1;
|
||||
|
||||
/* module associated with this conduit_id returned to rml
|
||||
from open_conduit call */
|
||||
struct orte_rml_ofi_module_t *ofi_module;
|
||||
|
||||
} ofi_transport_conduit_t;
|
||||
|
||||
|
||||
struct orte_rml_ofi_module_t {
|
||||
orte_rml_base_module_t api;
|
||||
|
||||
/** current ofi transport id the component is using, this will be initialised
|
||||
** in the open_conduit() call **/
|
||||
int cur_transport_id;
|
||||
|
||||
/** Fabric info structure of all supported transports in system **/
|
||||
struct fi_info *fi_info_list;
|
||||
|
||||
/** OFI ep and corr fi_info for all the transports (conduit) **/
|
||||
ofi_transport_conduit_t ofi_conduits[MAX_CONDUIT];
|
||||
|
||||
size_t min_ofi_recv_buf_sz;
|
||||
|
||||
/** "Any source" address */
|
||||
fi_addr_t any_addr;
|
||||
|
||||
/** number of conduits currently opened **/
|
||||
uint8_t conduit_open_num;
|
||||
|
||||
/** Unique message id for every message that is fragmented to be sent over OFI **/
|
||||
uint32_t cur_msgid;
|
||||
|
||||
opal_list_t recv_msg_queue_list;
|
||||
opal_list_t queued_routing_messages;
|
||||
opal_event_t *timer_event;
|
||||
struct timeval timeout;
|
||||
} ;
|
||||
typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t;
|
||||
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ofi_component;
|
||||
|
||||
int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
int orte_rml_ofi_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* iov,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/****************** INTERNAL OFI Functions*************/
|
||||
void free_conduit_resources( int conduit_id);
|
||||
void print_provider_list_info (struct fi_info *fi );
|
||||
|
||||
/** Send callback */
|
||||
int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc,
|
||||
orte_rml_ofi_request_t*);
|
||||
|
||||
/** Error callback */
|
||||
int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
|
||||
orte_rml_ofi_request_t*);
|
||||
|
||||
/* OFI Recv handler */
|
||||
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t conduit_id);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
1088
orte/mca/rml/ofi/rml_ofi_component.c
Обычный файл
1088
orte/mca/rml/ofi/rml_ofi_component.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
127
orte/mca/rml/ofi/rml_ofi_request.h
Обычный файл
127
orte/mca/rml/ofi/rml_ofi_request.h
Обычный файл
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_RML_OFI_REQUEST_H
|
||||
#define ORTE_RML_OFI_REQUEST_H
|
||||
|
||||
|
||||
#define TO_OFI_REQ(_ptr_ctx) \
|
||||
container_of((_ptr_ctx), orte_rml_ofi_request_t, ctx)
|
||||
|
||||
typedef enum {
|
||||
ORTE_RML_OFI_SEND,
|
||||
ORTE_RML_OFI_RECV,
|
||||
ORTE_RML_OFI_ACK,
|
||||
ORTE_RML_OFI_PROBE
|
||||
} orte_rml_ofi_request_type_t;
|
||||
/* orte_rml_ofi_msg_header_t contains the header information for the message being sent.
|
||||
The header and data is passed on to the destination. The destination will re-construct the
|
||||
orte_rml_sent_t struct once it receives this header and data.This header has the required information
|
||||
to construct the orte_rml_sent_t struct and also if the message is split into packets,
|
||||
then the packet information - total number of packets and the current packet number.
|
||||
*/
|
||||
struct orte_rml_ofi_msg_header_t{
|
||||
opal_process_name_t origin; // originator process id from the send message
|
||||
opal_process_name_t dst; // Destination process id from the send message
|
||||
uint32_t seq_num; // seq_num from the send message
|
||||
orte_rml_tag_t tag; // tag from the send message
|
||||
uint32_t msgid; // unique msgid added by ofi plugin to keep track of fragmented msgs
|
||||
uint32_t tot_pkts; // total packets this msg will be fragmented into by ofi plugin
|
||||
uint32_t cur_pkt_num; // current packet number
|
||||
};
|
||||
typedef struct orte_rml_ofi_msg_header_t orte_rml_ofi_msg_header_t;
|
||||
|
||||
/*
|
||||
orte_rml_ofi_pkts_t defines the packets in the message. Each packet contains header information
|
||||
and the data. Create a list of packets to hold the entire message.
|
||||
*/
|
||||
typedef struct {
|
||||
//list_item_t
|
||||
opal_list_item_t super;
|
||||
/* header + data size */
|
||||
size_t pkt_size;
|
||||
//header + data
|
||||
void *data;
|
||||
}orte_rml_ofi_send_pkt_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_ofi_send_pkt_t);
|
||||
|
||||
/*
|
||||
orte_rml_ofi_recv_pkt_t defines the packets in the receiving end of message.
|
||||
Each packet contains the packet number and the data.
|
||||
Create a list of packets to hold the entire message.
|
||||
*/
|
||||
typedef struct {
|
||||
//list_item_t
|
||||
opal_list_item_t super;
|
||||
/* current packet number */
|
||||
uint32_t cur_pkt_num;
|
||||
/*data size */
|
||||
size_t pkt_size;
|
||||
//data
|
||||
void *data;
|
||||
}orte_rml_ofi_recv_pkt_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_ofi_recv_pkt_t);
|
||||
|
||||
/*
|
||||
orte_rml_ofi_request_t holds the send request (orte_rml_send_t)
|
||||
*/
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
|
||||
/** OFI context */
|
||||
struct fi_context ctx;
|
||||
|
||||
orte_rml_send_t *send;
|
||||
|
||||
/** OFI conduit_id the request will use - this is
|
||||
* the reference to element into the orte_rml_ofi.ofi_conduits[] **/
|
||||
uint8_t conduit_id;
|
||||
|
||||
/** OFI Request type */
|
||||
orte_rml_ofi_request_type_t type;
|
||||
|
||||
/** Completion count used by blocking and/or synchronous operations */
|
||||
volatile int completion_count;
|
||||
|
||||
/** Reference to the RML used to lookup */
|
||||
/* source of an ANY_SOURCE Recv */
|
||||
struct orte_rml_base_module_t* rml;
|
||||
|
||||
/** header being sent **/
|
||||
orte_rml_ofi_msg_header_t hdr;
|
||||
|
||||
/** Pack buffer */
|
||||
void *data_blob;
|
||||
|
||||
/** Pack buffer size */
|
||||
size_t length;
|
||||
|
||||
/** Header and data in a list of Packets orte_rml_ofi_send_pkt_t */
|
||||
opal_list_t pkt_list;
|
||||
|
||||
} orte_rml_ofi_request_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_ofi_request_t);
|
||||
|
||||
|
||||
/* This will hold all the pckts received at the destination.
|
||||
Each entry will be indexed by [sender,msgid] and will have
|
||||
all the packets for that msgid and sender.
|
||||
*/
|
||||
typedef struct {
|
||||
|
||||
opal_list_item_t super; //list_item_t
|
||||
uint32_t msgid; // unique msgid added by ofi plugin to keep track of fragmented msgs
|
||||
opal_process_name_t sender; // originator process id from the send message
|
||||
uint32_t tot_pkts; // total packets this msg will be fragmented into by ofi plugin
|
||||
uint32_t pkt_recd; // current packet number
|
||||
opal_list_t pkt_list; // list holding Packets in this msg of type orte_rml_ofi_recv_pkt_t
|
||||
} ofi_recv_msg_queue_t;
|
||||
OBJ_CLASS_DECLARATION( ofi_recv_msg_queue_t);
|
||||
|
||||
#endif
|
665
orte/mca/rml/ofi/rml_ofi_send.c
Обычный файл
665
orte/mca/rml/ofi/rml_ofi_send.c
Обычный файл
@ -0,0 +1,665 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
|
||||
#include <rdma/fabric.h>
|
||||
#include <rdma/fi_cm.h>
|
||||
#include <rdma/fi_domain.h>
|
||||
#include <rdma/fi_endpoint.h>
|
||||
#include <rdma/fi_errno.h>
|
||||
#include <rdma/fi_tagged.h>
|
||||
|
||||
#include "rml_ofi.h"
|
||||
|
||||
|
||||
static void ofi_req_cons(orte_rml_ofi_request_t *ptr)
|
||||
{
|
||||
OBJ_CONSTRUCT(&ptr->pkt_list, opal_list_t);
|
||||
}
|
||||
static void ofi_req_des(orte_rml_ofi_request_t *ptr)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&ptr->pkt_list);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_rml_ofi_request_t,
|
||||
opal_object_t,
|
||||
ofi_req_cons, ofi_req_des);
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_rml_ofi_send_pkt_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_rml_ofi_recv_pkt_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void ofi_recv_msg_queue_cons(ofi_recv_msg_queue_t *ptr)
|
||||
{
|
||||
ptr->msgid = 0;
|
||||
ptr->tot_pkts = 1;
|
||||
ptr->pkt_recd = 0;
|
||||
OBJ_CONSTRUCT(&ptr->pkt_list, opal_list_t);
|
||||
}
|
||||
static void ofi_recv_msg_queue_des(ofi_recv_msg_queue_t *ptr)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&ptr->pkt_list);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(ofi_recv_msg_queue_t,
|
||||
opal_list_item_t,
|
||||
ofi_recv_msg_queue_cons, ofi_recv_msg_queue_des);
|
||||
|
||||
/** Send callback */
|
||||
/* [Desc] This is called from the progress fn when a send completion
|
||||
** is received in the cq
|
||||
** wc [in] : the completion queue data entry
|
||||
** ofi_send_req [in]: ofi send request with the send msg and callback
|
||||
*/
|
||||
int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc,
|
||||
orte_rml_ofi_request_t* ofi_req)
|
||||
{
|
||||
orte_rml_ofi_send_pkt_t *ofi_send_pkt, *next;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s orte_rml_ofi_send_callback called, completion count = %d, msgid = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_req->completion_count, ofi_req->hdr.msgid);
|
||||
assert(ofi_req->completion_count > 0);
|
||||
ofi_req->completion_count--;
|
||||
if ( 0 == ofi_req->completion_count ) {
|
||||
// call the callback fn of the sender
|
||||
ofi_req->send->status = ORTE_SUCCESS;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s calling ORTE_RML_SEND_COMPLETE macro for msgid = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_req->hdr.msgid);
|
||||
ORTE_RML_SEND_COMPLETE(ofi_req->send);
|
||||
OPAL_LIST_FOREACH_SAFE(ofi_send_pkt, next, &ofi_req->pkt_list, orte_rml_ofi_send_pkt_t) {
|
||||
free( ofi_send_pkt->data);
|
||||
ofi_send_pkt->pkt_size=0;
|
||||
opal_list_remove_item(&ofi_req->pkt_list, &ofi_send_pkt->super);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Removed pkt from list ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
OBJ_RELEASE(ofi_send_pkt);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Released packet ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
}
|
||||
free(ofi_req->data_blob);
|
||||
OBJ_RELEASE(ofi_req);
|
||||
}
|
||||
|
||||
// [TODO] need to check for error before returning success
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/** Error callback */
|
||||
/* [Desc] This is called from the progress fn when a send completion
|
||||
** is received in the cq
|
||||
** wc [in] : the completion queue data entry
|
||||
** ofi_send_req [in]: ofi send request with the send msg and callback
|
||||
*/
|
||||
int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
|
||||
orte_rml_ofi_request_t* ofi_req)
|
||||
{
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s orte_rml_ofi_error_callback called ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
switch(error->err) {
|
||||
default:
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
ofi_req->send->status = ORTE_ERR_CONDUIT_SEND_FAIL;
|
||||
ORTE_RML_SEND_COMPLETE(ofi_req->send);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/** Recv handler */
|
||||
/* [Desc] This is called from the progress fn when a recv completion
|
||||
** is received in the cq
|
||||
** wc [in] : the completion queue data entry */
|
||||
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t conduit_id)
|
||||
{
|
||||
orte_rml_ofi_msg_header_t msg_hdr;
|
||||
uint32_t msglen, datalen = 0;
|
||||
char *data, *totdata, *nextpkt;
|
||||
ofi_recv_msg_queue_t *recv_msg_queue, *new_msg;
|
||||
orte_rml_ofi_recv_pkt_t *ofi_recv_pkt, *new_pkt, *next;
|
||||
bool msg_in_queue = false;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s orte_rml_ofi_recv_handler called ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
/*copy the header and data from buffer and pass it on
|
||||
** since this is the conduit recv buffer don't want it to be released as
|
||||
** considering re-using it, so for now copying to newly allocated *data
|
||||
** the *data will be released by orte_rml_base functions */
|
||||
|
||||
memcpy(&msg_hdr,wc->buf,sizeof(orte_rml_ofi_msg_header_t));
|
||||
msglen = wc->len - sizeof(orte_rml_ofi_msg_header_t);
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Received packet -> msg id = %d wc->len = %d, msglen = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid, wc->len, msglen );
|
||||
data = (char *)malloc(msglen);
|
||||
memcpy(data,(wc->buf+sizeof(orte_rml_ofi_msg_header_t)),msglen);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s header info of received packet -> cur_pkt_num = %d, tot_pkts = %d ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.cur_pkt_num, msg_hdr.tot_pkts );
|
||||
/* To accomodate message bigger than recv buffer size,
|
||||
check if current message is in multiple blocks and append them before sending it to RML */
|
||||
if ( msg_hdr.tot_pkts == 1) {
|
||||
/* Since OFI is point-to-point, no need to check if the intended destination is me
|
||||
send to RML */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Posting Recv for msgid %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
ORTE_RML_POST_MESSAGE(&msg_hdr.origin, msg_hdr.tag, msg_hdr.seq_num,data,msglen);
|
||||
} else {
|
||||
msg_in_queue = false;
|
||||
new_pkt = OBJ_NEW(orte_rml_ofi_recv_pkt_t);
|
||||
new_pkt->cur_pkt_num = msg_hdr.cur_pkt_num;
|
||||
new_pkt->pkt_size = msglen;
|
||||
new_pkt->data = data;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Just beofe checking if this message-pkt is already in queue. msgid-%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
/* check if the queue has the [msgid,sender] entry */
|
||||
OPAL_LIST_FOREACH(recv_msg_queue, &orte_rml_ofi.recv_msg_queue_list, ofi_recv_msg_queue_t) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Checking msgid-%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid );
|
||||
if( (recv_msg_queue->msgid == msg_hdr.msgid) && (recv_msg_queue->sender.jobid == msg_hdr.origin.jobid)
|
||||
&& (recv_msg_queue->sender.vpid == msg_hdr.origin.vpid) ) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Found Msg entry in queue for msgid %d, sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid, recv_msg_queue->sender.vpid);
|
||||
msg_in_queue = true;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s msgid %d, tot_pkts=%d, opal_list_get_size()=%d,total pkt_recd=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->tot_pkts,
|
||||
opal_list_get_size(&recv_msg_queue->pkt_list), recv_msg_queue->pkt_recd );
|
||||
if( recv_msg_queue->tot_pkts == (recv_msg_queue->pkt_recd +1) ) {
|
||||
/* all packets received for this message - post message to rml and remove this from queue */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s All packets recd for msgid %d, tot_pkts=%d, opal_list_get_size()=%d,total pkt_recd=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->tot_pkts,
|
||||
opal_list_get_size(&recv_msg_queue->pkt_list), recv_msg_queue->pkt_recd );
|
||||
totdata = NULL;
|
||||
datalen = 0;
|
||||
OPAL_LIST_FOREACH(ofi_recv_pkt, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding data for packet %d, pktlength = %d, cumulative datalen so far = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num, ofi_recv_pkt->pkt_size, datalen );
|
||||
if (0 == datalen) {
|
||||
totdata = (char *)malloc(ofi_recv_pkt->pkt_size);
|
||||
if( totdata == NULL) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Error: malloc failed for msgid %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),recv_msg_queue->msgid );
|
||||
return 1; //[TODO: error-handling needs to be implemented
|
||||
}
|
||||
memcpy(totdata,ofi_recv_pkt->data,ofi_recv_pkt->pkt_size);
|
||||
|
||||
} else {
|
||||
totdata = realloc(totdata,datalen+ofi_recv_pkt->pkt_size);
|
||||
if (NULL != totdata ) {
|
||||
memcpy((totdata+datalen),ofi_recv_pkt->data,ofi_recv_pkt->pkt_size);
|
||||
} else {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Error: realloc failed for msgid %d, from sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid,
|
||||
recv_msg_queue->sender.vpid);
|
||||
return 1; //[TODO: error-handling needs to be implemented
|
||||
}
|
||||
}
|
||||
datalen += ofi_recv_pkt->pkt_size;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s packet %d done, datalen = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num,datalen);
|
||||
}
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding leftover data recd, datalen = %d, new_pkt->pkt_size = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size);
|
||||
//add the last packet
|
||||
totdata =realloc(totdata,datalen+new_pkt->pkt_size);
|
||||
if( NULL != totdata ) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Realloc completed for leftover data recd, datalen = %d, new->pkt->pkt_size = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size);
|
||||
nextpkt = totdata+datalen;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s totdata = %p,nextpkt = %p ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), totdata, nextpkt);
|
||||
memcpy(nextpkt,new_pkt->data,new_pkt->pkt_size);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s memcpy completed for leftover data recd, datalen = %d, new->pkt->pkt_size = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size);
|
||||
datalen += new_pkt->pkt_size;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Posting Recv for msgid %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
ORTE_RML_POST_MESSAGE(&msg_hdr.origin, msg_hdr.tag, msg_hdr.seq_num,totdata,datalen);\
|
||||
|
||||
// free the pkts
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s msgid %d - posting recv completed, freeing packets",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid );
|
||||
OPAL_LIST_FOREACH_SAFE(ofi_recv_pkt, next, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) {
|
||||
free( ofi_recv_pkt->data);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s freed data for packet %d",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num );
|
||||
ofi_recv_pkt->pkt_size=0;
|
||||
opal_list_remove_item(&recv_msg_queue->pkt_list, &ofi_recv_pkt->super);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Removed pkt from list ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
OBJ_RELEASE(ofi_recv_pkt);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Released packet ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
}
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s freeing packets completed",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
//free the msg from the queue-list
|
||||
opal_list_remove_item(&orte_rml_ofi.recv_msg_queue_list,&recv_msg_queue->super);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Successfully removed msg from queue",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
OBJ_RELEASE(recv_msg_queue);
|
||||
} else {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Error: realloc failed for msgid %d, from sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid,
|
||||
recv_msg_queue->sender.vpid);
|
||||
return 1; //[TODO: error-handling needs to be implemented
|
||||
}
|
||||
} else {
|
||||
/* add this packet to the msg in the queue ordered by cur_pkt_num */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding packet to list, msgid %d, pkt - %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, msg_hdr.cur_pkt_num );
|
||||
|
||||
bool pkt_added = false;
|
||||
OPAL_LIST_FOREACH(ofi_recv_pkt, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) {
|
||||
if( msg_hdr.cur_pkt_num < ofi_recv_pkt->cur_pkt_num ) {
|
||||
opal_list_insert_pos(&recv_msg_queue->pkt_list, (opal_list_item_t*)ofi_recv_pkt, &new_pkt->super);
|
||||
recv_msg_queue->pkt_recd++;
|
||||
pkt_added = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!pkt_added) {
|
||||
opal_list_append(&recv_msg_queue->pkt_list,&new_pkt->super);
|
||||
recv_msg_queue->pkt_recd++;
|
||||
}
|
||||
}
|
||||
}
|
||||
break; //we found the msg or added it so exit out of the msg_queue loop
|
||||
}
|
||||
if( !msg_in_queue ) {
|
||||
/*add to the queue as this is the first packet for [msgid,sender] */
|
||||
new_msg = OBJ_NEW(ofi_recv_msg_queue_t);
|
||||
new_msg->msgid = msg_hdr.msgid;
|
||||
new_msg->sender = msg_hdr.origin;
|
||||
new_msg->tot_pkts = msg_hdr.tot_pkts;
|
||||
new_msg->pkt_recd = 1;
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s Adding first Msg entry in queue for msgid %d, sender jobid=%d, sender vpid=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), new_msg->msgid, new_msg->sender.jobid, new_msg->sender.vpid);
|
||||
opal_list_append(&new_msg->pkt_list, &new_pkt->super);
|
||||
opal_list_append(&orte_rml_ofi.recv_msg_queue_list, &new_msg->super);
|
||||
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static void send_msg(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata;
|
||||
orte_process_name_t *peer = &(req->send.dst);
|
||||
orte_rml_tag_t tag = req->send.tag;
|
||||
char *dest_ep_name;
|
||||
size_t dest_ep_namelen = 0;
|
||||
int ret = OPAL_ERROR;
|
||||
uint32_t total_packets;
|
||||
fi_addr_t dest_fi_addr;
|
||||
orte_rml_send_t *snd;
|
||||
orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t );
|
||||
uint8_t conduit_id = req->conduit_id;
|
||||
orte_rml_ofi_send_pkt_t* ofi_msg_pkt;
|
||||
size_t datalen_per_pkt, hdrsize, data_in_pkt; // the length of data in per packet excluding the header size
|
||||
|
||||
|
||||
snd = OBJ_NEW(orte_rml_send_t);
|
||||
snd->dst = *peer;
|
||||
snd->origin = *ORTE_PROC_MY_NAME;
|
||||
snd->tag = tag;
|
||||
if (NULL != req->send.iov) {
|
||||
snd->iov = req->send.iov;
|
||||
snd->count = req->send.count;
|
||||
snd->cbfunc.iov = req->send.cbfunc.iov;
|
||||
} else {
|
||||
snd->buffer = req->send.buffer;
|
||||
snd->cbfunc.buffer = req->send.cbfunc.buffer;
|
||||
}
|
||||
snd->cbdata = req->send.cbdata;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s send_msg_transport to peer %s at tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
|
||||
/* get the peer address by doing modex_receive */
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s calling OPAL_MODEX_RECV_STRING ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
switch ( orte_rml_ofi.ofi_conduits[conduit_id].fabric_info->addr_format)
|
||||
{
|
||||
case FI_SOCKADDR_IN :
|
||||
OPAL_MODEX_RECV_STRING(ret, OPAL_RML_OFI_FI_SOCKADDR_IN, peer , (char **) &dest_ep_name, &dest_ep_namelen);
|
||||
/*print the sockaddr - port and s_addr */
|
||||
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*) dest_ep_name;
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s obtained for peer %s port = 0x%printinx, InternetAddr = %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),ntohs(ep_sockaddr->sin_port),
|
||||
inet_ntoa(ep_sockaddr->sin_addr));
|
||||
break;
|
||||
case FI_ADDR_PSMX :
|
||||
OPAL_MODEX_RECV_STRING(ret, OPAL_RML_OFI_FI_ADDR_PSMX, peer , (char **) &dest_ep_name, &dest_ep_namelen);
|
||||
break;
|
||||
default:
|
||||
/* we shouldn't be getting here as only above are supported and address sent
|
||||
* to PMIX (OPAL_MODEX_SEND) in orte_component_init() */
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Error: Unhandled address format type in ofi_send_msg", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
return;
|
||||
}
|
||||
opal_output_verbose(50, orte_rml_base_framework.framework_output,
|
||||
"%s Return value from OPAL_MODEX_RECV_STRING - %d, length returned - %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret, dest_ep_namelen);
|
||||
|
||||
|
||||
if ( OPAL_SUCCESS == ret) {
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), dest_ep_namelen);
|
||||
ret = fi_av_insert(orte_rml_ofi.ofi_conduits[conduit_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL);
|
||||
if( ret != 1) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s fi_av_insert failed in send_msg() returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret );
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
//OBJ_RELEASE( ofi_send_req);
|
||||
return;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s OPAL_MODEX_RECV failed to obtain %s peer ep name ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer));
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
//OBJ_RELEASE( ofi_send_req);
|
||||
return;
|
||||
}
|
||||
|
||||
ofi_send_req->send = snd;
|
||||
ofi_send_req->completion_count = 1;
|
||||
|
||||
/* [DESC] we want to send the pid,seqnum,tag in addition to the data
|
||||
* copy all of this to header of message from the ofi_send_t* send
|
||||
*/
|
||||
ofi_send_req->hdr.dst = ofi_send_req->send->dst;
|
||||
ofi_send_req->hdr.origin = ofi_send_req->send->origin;
|
||||
ofi_send_req->hdr.seq_num = ofi_send_req->send->seq_num;
|
||||
ofi_send_req->hdr.tag = ofi_send_req->send->tag;
|
||||
|
||||
/*
|
||||
* also insert ofi plugin specific header details -
|
||||
* the unique msgid, for now initalise total_packets to 1
|
||||
*/
|
||||
ofi_send_req->hdr.msgid = orte_rml_ofi.cur_msgid;
|
||||
orte_rml_ofi.cur_msgid += 1;
|
||||
total_packets = 1;
|
||||
|
||||
/* copy the buffer/iov/data to the ofi_send_req->datablob and update ofi_send_req->length*/
|
||||
ofi_send_req->length = 0;
|
||||
if( NULL != ofi_send_req->send->buffer) {
|
||||
ofi_send_req->length = ofi_send_req->send->buffer->bytes_used;
|
||||
ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length);
|
||||
memcpy(ofi_send_req->data_blob ,
|
||||
ofi_send_req->send->buffer->base_ptr,
|
||||
ofi_send_req->send->buffer->bytes_used);
|
||||
} else if ( NULL != ofi_send_req->send->iov) {
|
||||
for (int i=0; i < ofi_send_req->send->count; i++) {
|
||||
ofi_send_req->length += ofi_send_req->send->iov[i].iov_len;
|
||||
}
|
||||
ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length);
|
||||
int iovlen=0;
|
||||
for (int i=0; i < ofi_send_req->send->count; i++) {
|
||||
memcpy((ofi_send_req->data_blob + iovlen ),
|
||||
ofi_send_req->send->iov[i].iov_base,
|
||||
ofi_send_req->send->iov[i].iov_len);
|
||||
iovlen += ofi_send_req->send->iov[i].iov_len;
|
||||
}
|
||||
} else {
|
||||
//just send the data
|
||||
ofi_send_req->length = ofi_send_req->send->count;
|
||||
ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length);
|
||||
memcpy(ofi_send_req->data_blob ,
|
||||
ofi_send_req->send->data,
|
||||
ofi_send_req->send->count);
|
||||
}
|
||||
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Completed copying all data into ofi_send_req->data_blob, total data - %d bytes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_send_req->length );
|
||||
|
||||
/* Each packet will have header information, so the data length in each packet is datalen_per_packet.
|
||||
* check if the ofi_send_req->send->buffer->bytes_used is greater than the data per packet datalen_per_packet(recv buffer)
|
||||
* if so fragment and add info to header and send it in a loop back-to-back */
|
||||
hdrsize = sizeof(orte_rml_ofi_msg_header_t);
|
||||
datalen_per_pkt = MIN_MULTI_BUF_SIZE - hdrsize;
|
||||
if (ofi_send_req->length > datalen_per_pkt )
|
||||
{
|
||||
total_packets = ( ofi_send_req->length / datalen_per_pkt ) + 1 ;
|
||||
}
|
||||
ofi_send_req->hdr.tot_pkts = total_packets;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s datalen_per_pkt = %d, ofi_send_req->length= %d, total packets = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen_per_pkt, ofi_send_req->length, total_packets );
|
||||
|
||||
/* in a loop send create and send the packets */
|
||||
for(size_t pkt_num=1,sent_data=0; sent_data < ofi_send_req->length; pkt_num++) {
|
||||
ofi_send_req->hdr.cur_pkt_num = pkt_num;
|
||||
/* create the packet */
|
||||
ofi_msg_pkt = OBJ_NEW(orte_rml_ofi_send_pkt_t);
|
||||
data_in_pkt = ((ofi_send_req->length - sent_data) >= datalen_per_pkt) ?
|
||||
datalen_per_pkt : (ofi_send_req->length - sent_data);
|
||||
ofi_msg_pkt->pkt_size = hdrsize + data_in_pkt;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Packet %d -> data_in_pkt= %d, header_size= %d, pkt_size=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), pkt_num,data_in_pkt,hdrsize,ofi_msg_pkt->pkt_size );
|
||||
/* copy the header and data for this pkt */
|
||||
ofi_msg_pkt->data = malloc( ofi_msg_pkt->pkt_size);
|
||||
memcpy(ofi_msg_pkt->data, &ofi_send_req->hdr, hdrsize );
|
||||
memcpy( (ofi_msg_pkt->data + hdrsize ),
|
||||
(ofi_send_req->data_blob + sent_data),
|
||||
data_in_pkt);
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Copying header, data into packets completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
/* add it to list */
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Before adding packet %d to list. List addr -> 0x%x, ofi_msg_pkt->super is 0x%x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),pkt_num,&(ofi_send_req->pkt_list), &ofi_msg_pkt->super );
|
||||
opal_list_append(&(ofi_send_req->pkt_list), &ofi_msg_pkt->super);
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s adding packet %d to list done successful",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),pkt_num );
|
||||
sent_data += data_in_pkt;
|
||||
}
|
||||
|
||||
if( ofi_send_req->hdr.tot_pkts != ofi_send_req->hdr.cur_pkt_num ) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Error: Total packets calculated [%d] does not match total created-%d pkts to peer %s with tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_send_req->hdr.tot_pkts, ofi_send_req->hdr.cur_pkt_num,
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
}
|
||||
/* do the fi_send() for all the pkts */
|
||||
ofi_send_req->completion_count= ofi_send_req->hdr.tot_pkts;
|
||||
OPAL_LIST_FOREACH(ofi_msg_pkt, &ofi_send_req->pkt_list, orte_rml_ofi_send_pkt_t) {
|
||||
/* debug purpose - copying the header from packet to verify if it is correct */
|
||||
struct orte_rml_ofi_msg_header_t *cur_hdr;
|
||||
cur_hdr = (struct orte_rml_ofi_msg_header_t* ) ofi_msg_pkt->data;
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s Sending Pkt[%d] of total %d pkts for msgid:%d to peer %s with tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_hdr->cur_pkt_num, ofi_send_req->completion_count,
|
||||
cur_hdr->msgid, ORTE_NAME_PRINT(peer), tag);
|
||||
/* end debug*/
|
||||
|
||||
RML_OFI_RETRY_UNTIL_DONE(fi_send(orte_rml_ofi.ofi_conduits[conduit_id].ep,
|
||||
ofi_msg_pkt->data,
|
||||
ofi_msg_pkt->pkt_size,
|
||||
fi_mr_desc(orte_rml_ofi.ofi_conduits[conduit_id].mr_multi_recv),
|
||||
dest_fi_addr,
|
||||
(void *)&ofi_send_req->ctx));
|
||||
|
||||
}
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s End of send_msg_transport. fi_send completed to peer %s with tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
free(dest_ep_name);
|
||||
OBJ_RELEASE(req);
|
||||
}
|
||||
|
||||
int orte_rml_ofi_send_nb(void* mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* iov,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_send_request_t *req;
|
||||
orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod;
|
||||
int conduit_id = ofi_mod->cur_transport_id;
|
||||
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml_ofi_send_transport to peer %s at tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
|
||||
if( (0 > conduit_id) || ( conduit_id >= orte_rml_ofi.conduit_open_num ) ) {
|
||||
/* Invalid conduit ID provided */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (ORTE_RML_TAG_INVALID == tag) {
|
||||
/* cannot send to an invalid tag */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (NULL == peer ||
|
||||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
|
||||
/* cannot send to an invalid peer */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
/* get ourselves into an event to protect against
|
||||
* race conditions and threads
|
||||
*/
|
||||
req = OBJ_NEW(orte_rml_send_request_t);
|
||||
req->conduit_id = conduit_id;
|
||||
req->send.dst = *peer;
|
||||
req->send.iov = iov;
|
||||
req->send.count = count;
|
||||
req->send.tag = tag;
|
||||
req->send.cbfunc.iov = cbfunc;
|
||||
req->send.cbdata = cbdata;
|
||||
|
||||
/* setup the event for the send callback */
|
||||
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
|
||||
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
|
||||
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int orte_rml_ofi_send_buffer_nb(void* mod,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_send_request_t *req;
|
||||
orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod;
|
||||
int conduit_id = ofi_mod->cur_transport_id;
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml_ofi_send_buffer_transport to peer %s at tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
|
||||
if( (0 > conduit_id) || ( conduit_id >= orte_rml_ofi.conduit_open_num ) ) {
|
||||
/* Invalid conduit ID provided */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (ORTE_RML_TAG_INVALID == tag) {
|
||||
/* cannot send to an invalid tag */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (NULL == peer ||
|
||||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
|
||||
/* cannot send to an invalid peer */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
/* get ourselves into an event to protect against
|
||||
* race conditions and threads
|
||||
*/
|
||||
req = OBJ_NEW(orte_rml_send_request_t);
|
||||
req->conduit_id = conduit_id;
|
||||
req->send.dst = *peer;
|
||||
req->send.buffer = buffer;
|
||||
req->send.tag = tag;
|
||||
req->send.cbfunc.buffer = cbfunc;
|
||||
req->send.cbdata = cbdata;
|
||||
|
||||
/* setup the event for the send callback */
|
||||
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
|
||||
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
|
||||
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -21,9 +21,6 @@
|
||||
sources = \
|
||||
rml_oob.h \
|
||||
rml_oob_component.c \
|
||||
rml_oob_contact.c \
|
||||
rml_oob_exception.c \
|
||||
rml_oob_ping.c \
|
||||
rml_oob_send.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
|
@ -37,47 +37,35 @@
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
struct orte_rml_base_module_t super;
|
||||
opal_list_t exceptions;
|
||||
opal_list_t queued_routing_messages;
|
||||
orte_rml_base_module_t api;
|
||||
opal_list_t queued_routing_messages;
|
||||
opal_event_t *timer_event;
|
||||
struct timeval timeout;
|
||||
struct timeval timeout;
|
||||
} orte_rml_oob_module_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component;
|
||||
extern orte_rml_oob_module_t orte_rml_oob_module;
|
||||
|
||||
int orte_rml_oob_init(void);
|
||||
void orte_rml_oob_fini(void);
|
||||
int orte_rml_oob_ft_event(int state);
|
||||
void orte_rml_oob_fini(struct orte_rml_base_module_t *mod);
|
||||
|
||||
int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
int orte_rml_oob_ping(const char* uri,
|
||||
int orte_rml_oob_ping(struct orte_rml_base_module_t *mod,
|
||||
const char* uri,
|
||||
const struct timeval* tv);
|
||||
|
||||
char* orte_rml_oob_get_uri(void);
|
||||
void orte_rml_oob_set_uri(const char*);
|
||||
|
||||
int orte_rml_oob_add_exception(orte_rml_exception_callback_t cbfunc);
|
||||
int orte_rml_oob_del_exception(orte_rml_exception_callback_t cbfunc);
|
||||
void orte_rml_oob_exception_callback(orte_process_name_t *peer,
|
||||
orte_rml_exception_t exception);
|
||||
|
||||
|
||||
void orte_rml_oob_purge(orte_process_name_t *peer);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -51,10 +51,13 @@
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "rml_oob.h"
|
||||
|
||||
static orte_rml_base_module_t* rml_oob_init(int* priority);
|
||||
static int rml_oob_open(void);
|
||||
static int rml_oob_close(void);
|
||||
|
||||
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes);
|
||||
static orte_rml_pathway_t* query_transports(void);
|
||||
static char* get_contact_info(void);
|
||||
static void set_contact_info(const char *uri);
|
||||
static void close_conduit(orte_rml_base_module_t *mod);
|
||||
/**
|
||||
* component definition
|
||||
*/
|
||||
@ -62,156 +65,159 @@ orte_rml_component_t mca_rml_oob_component = {
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
.rml_version = {
|
||||
ORTE_RML_BASE_VERSION_2_0_0,
|
||||
.base = {
|
||||
ORTE_RML_BASE_VERSION_3_0_0,
|
||||
|
||||
.mca_component_name = "oob",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_open_component = rml_oob_open,
|
||||
.mca_close_component = rml_oob_close,
|
||||
|
||||
},
|
||||
.rml_data = {
|
||||
.data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
.rml_init = rml_oob_init,
|
||||
};
|
||||
|
||||
orte_rml_oob_module_t orte_rml_oob_module = {
|
||||
{
|
||||
.finalize = orte_rml_oob_fini,
|
||||
|
||||
.get_contact_info = orte_rml_oob_get_uri,
|
||||
.set_contact_info = orte_rml_oob_set_uri,
|
||||
|
||||
.ping = orte_rml_oob_ping,
|
||||
|
||||
.send_nb = orte_rml_oob_send_nb,
|
||||
.send_buffer_nb = orte_rml_oob_send_buffer_nb,
|
||||
|
||||
.add_exception_handler = orte_rml_oob_add_exception,
|
||||
.del_exception_handler = orte_rml_oob_del_exception,
|
||||
.ft_event = orte_rml_oob_ft_event,
|
||||
.purge = orte_rml_oob_purge
|
||||
}
|
||||
.priority = 5,
|
||||
.open_conduit = open_conduit,
|
||||
.query_transports = query_transports,
|
||||
.get_contact_info = get_contact_info,
|
||||
.set_contact_info = set_contact_info,
|
||||
.close_conduit = close_conduit
|
||||
};
|
||||
|
||||
/* Local variables */
|
||||
static bool init_done = false;
|
||||
static orte_rml_pathway_t pathway;
|
||||
static orte_rml_base_module_t base_module = {
|
||||
.component = (struct orte_rml_component_t*)&mca_rml_oob_component,
|
||||
.ping = NULL,
|
||||
.send_nb = orte_rml_oob_send_nb,
|
||||
.send_buffer_nb = orte_rml_oob_send_buffer_nb,
|
||||
.purge = NULL
|
||||
};
|
||||
|
||||
static int
|
||||
rml_oob_open(void)
|
||||
static int rml_oob_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
rml_oob_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static orte_rml_base_module_t*
|
||||
rml_oob_init(int* priority)
|
||||
{
|
||||
if (init_done) {
|
||||
*priority = 1;
|
||||
return &orte_rml_oob_module.super;
|
||||
}
|
||||
|
||||
*priority = 1;
|
||||
|
||||
OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);
|
||||
|
||||
init_done = true;
|
||||
return &orte_rml_oob_module.super;
|
||||
}
|
||||
|
||||
int
|
||||
orte_rml_oob_init(void)
|
||||
{
|
||||
/* enable the base receive to get updates on contact info */
|
||||
orte_rml_base_comm_start();
|
||||
/* ask our OOB transports for their info */
|
||||
OBJ_CONSTRUCT(&pathway, orte_rml_pathway_t);
|
||||
pathway.component = strdup("oob");
|
||||
ORTE_OOB_GET_TRANSPORTS(&pathway.transports);
|
||||
/* add any component attributes of our own */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
orte_rml_oob_fini(void)
|
||||
static int rml_oob_close(void)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&pathway);
|
||||
|
||||
while (NULL !=
|
||||
(item = opal_list_remove_first(&orte_rml_oob_module.exceptions))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&orte_rml_oob_module.exceptions);
|
||||
|
||||
/* clear the base receive */
|
||||
orte_rml_base_comm_stop();
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
int
|
||||
orte_rml_oob_ft_event(int state) {
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
int ret;
|
||||
static orte_rml_base_module_t* make_module(void)
|
||||
{
|
||||
orte_rml_oob_module_t *mod;
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CHECKPOINT);
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CONTINUE);
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_RESTART);
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
/* create a new module */
|
||||
mod = (orte_rml_oob_module_t*)malloc(sizeof(orte_rml_oob_module_t));
|
||||
if (NULL == mod) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* copy the APIs over to it */
|
||||
memcpy(mod, &base_module, sizeof(base_module));
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
(void) mca_base_framework_close(&orte_oob_base_framework);
|
||||
/* initialize its internal storage */
|
||||
OBJ_CONSTRUCT(&mod->queued_routing_messages, opal_list_t);
|
||||
mod->timer_event = NULL;
|
||||
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
/* return the result */
|
||||
return (orte_rml_base_module_t*)mod;
|
||||
}
|
||||
|
||||
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
|
||||
{
|
||||
char *comp_attrib = NULL;
|
||||
char **comps;
|
||||
int i;
|
||||
orte_attribute_t *attr;
|
||||
|
||||
opal_output_verbose(20,orte_rml_base_framework.framework_output,
|
||||
"%s - Entering rml_oob_open_conduit()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* someone may require this specific component, so look for "oob" */
|
||||
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
/* they specified specific components - could be multiple */
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcmp(comps[i], "oob")) {
|
||||
/* we are a candidate */
|
||||
opal_argv_free(comps);
|
||||
return make_module();
|
||||
}
|
||||
}
|
||||
|
||||
if( ORTE_SUCCESS != (ret = orte_oob_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
/* we are not a candidate */
|
||||
opal_argv_free(comps);
|
||||
return NULL;
|
||||
} else if (orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
/* see if we are on the list */
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcmp(comps[i], "oob")) {
|
||||
/* we cannot be a candidate */
|
||||
opal_argv_free(comps);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
|
||||
/* Alternatively, check the attributes to see if we qualify - we only handle
|
||||
* "routed", "Ethernet", and "TCP" */
|
||||
OPAL_LIST_FOREACH(attr, attributes, orte_attribute_t) {
|
||||
|
||||
}
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
/* if we get here, we cannot handle it */
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
int
|
||||
orte_rml_oob_ft_event(int state) {
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
static orte_rml_pathway_t* query_transports(void)
|
||||
{
|
||||
/* if we have any available transports, make them available */
|
||||
if (0 < opal_list_get_size(&pathway.transports)) {
|
||||
return &pathway;
|
||||
}
|
||||
/* if not, then return NULL */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void close_conduit(orte_rml_base_module_t *md)
|
||||
{
|
||||
orte_rml_oob_module_t *mod = (orte_rml_oob_module_t*)md;
|
||||
|
||||
/* cleanup the list of messages */
|
||||
OBJ_DESTRUCT(&mod->queued_routing_messages);
|
||||
|
||||
/* the rml_base_stub takes care of clearing the base receive
|
||||
* and free'ng the module */
|
||||
return;
|
||||
}
|
||||
|
||||
static char* get_contact_info(void)
|
||||
{
|
||||
char *ret;
|
||||
|
||||
ORTE_OOB_GET_URI(&ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_contact_info(const char *uri)
|
||||
{
|
||||
ORTE_OOB_SET_URI(uri);
|
||||
}
|
||||
#endif
|
||||
|
@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "rml_oob.h"
|
||||
|
||||
char* orte_rml_oob_get_uri(void)
|
||||
{
|
||||
char *ret;
|
||||
|
||||
ORTE_OOB_GET_URI(&ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void orte_rml_oob_set_uri(const char* uri)
|
||||
{
|
||||
ORTE_OOB_SET_URI(uri);
|
||||
}
|
||||
|
||||
|
||||
void orte_rml_oob_purge(orte_process_name_t *peer)
|
||||
{
|
||||
#if 0
|
||||
opal_list_item_t *item, *next;
|
||||
orte_rml_oob_queued_msg_t *qmsg;
|
||||
orte_rml_oob_msg_header_t *hdr;
|
||||
orte_process_name_t step;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
|
||||
/* clear our message queue */
|
||||
item = opal_list_get_first(&orte_rml_oob_module.queued_routing_messages);
|
||||
while (item != opal_list_get_end(&orte_rml_oob_module.queued_routing_messages)) {
|
||||
next = opal_list_get_next(item);
|
||||
qmsg = (orte_rml_oob_queued_msg_t*)item;
|
||||
hdr = (orte_rml_oob_msg_header_t*) qmsg->payload[0].iov_base;
|
||||
step = orte_routed.get_route(&hdr->destination);
|
||||
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
|
||||
if (OPAL_EQUAL ==
|
||||
orte_util_compare_name_fields(mask, peer, &hdr->destination)) {
|
||||
opal_list_remove_item(&orte_rml_oob_module.queued_routing_messages, item);
|
||||
OBJ_RELEASE(item);
|
||||
} else if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &step, &hdr->destination)) {
|
||||
opal_list_remove_item(&orte_rml_oob_module.queued_routing_messages, item);
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
item = next;
|
||||
}
|
||||
#endif
|
||||
}
|
@ -1,84 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "rml_oob.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
struct orte_rml_oob_exception_t {
|
||||
opal_list_item_t super;
|
||||
orte_rml_exception_callback_t cbfunc;
|
||||
};
|
||||
typedef struct orte_rml_oob_exception_t orte_rml_oob_exception_t;
|
||||
static OBJ_CLASS_INSTANCE(orte_rml_oob_exception_t, opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
|
||||
void
|
||||
orte_rml_oob_exception_callback(orte_process_name_t *peer,
|
||||
orte_rml_exception_t exception)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
for (item = opal_list_get_first(&orte_rml_oob_module.exceptions) ;
|
||||
item != opal_list_get_end(&orte_rml_oob_module.exceptions) ;
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rml_oob_exception_t *ex = (orte_rml_oob_exception_t*) item;
|
||||
ex->cbfunc(peer, exception);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_rml_oob_add_exception(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
orte_rml_oob_exception_t *ex = OBJ_NEW(orte_rml_oob_exception_t);
|
||||
|
||||
if (NULL == ex) return ORTE_ERROR;
|
||||
|
||||
ex->cbfunc = cbfunc;
|
||||
opal_list_append(&orte_rml_oob_module.exceptions, &ex->super);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_rml_oob_del_exception(orte_rml_exception_callback_t cbfunc)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
for (item = opal_list_get_first(&orte_rml_oob_module.exceptions) ;
|
||||
item != opal_list_get_end(&orte_rml_oob_module.exceptions) ;
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rml_oob_exception_t *ex = (orte_rml_oob_exception_t*) item;
|
||||
|
||||
if (cbfunc == ex->cbfunc) {
|
||||
opal_list_remove_item(&orte_rml_oob_module.exceptions, item);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "rml_oob.h"
|
||||
|
||||
int
|
||||
orte_rml_oob_ping(const char* uri,
|
||||
const struct timeval* tv)
|
||||
{
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
@ -203,7 +203,8 @@ static void send_msg(int fd, short args, void *cbdata)
|
||||
OBJ_RELEASE(req);
|
||||
}
|
||||
|
||||
int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* iov,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
@ -246,7 +247,8 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
|
@ -55,10 +55,10 @@ BEGIN_C_DECLS
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
struct opal_buffer_t;
|
||||
/* forward declare */
|
||||
struct orte_rml_base_module_t;
|
||||
struct orte_rml_API_module_t;
|
||||
struct orte_rml_component_t;
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
orte_process_name_t name;
|
||||
@ -79,60 +79,8 @@ ORTE_DECLSPEC void orte_rml_recv_callback(int status, orte_process_name_t* sende
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
|
||||
ORTE_DECLSPEC void orte_rml_open_channel_recv_callback(int status,
|
||||
orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_close_channel_recv_callback(int status,
|
||||
orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
/**
|
||||
* RML component initialization
|
||||
*
|
||||
* Create an instance (module) of the given RML component. Upon
|
||||
* returning, the module data structure should be fully populated and
|
||||
* all functions should be usable. Non-blocking receive calls may be
|
||||
* posted upon return from this function, although communication need
|
||||
* not be enabled until enable_comm() call is called on the module.
|
||||
*
|
||||
* @return Exactly one module created by the call to the component's
|
||||
* initialization function should be returned. The module structure
|
||||
* should be fully populated, and the priority should be set to a
|
||||
* reasonable value.
|
||||
*
|
||||
* @param[out] priority Selection priority for the given component
|
||||
*
|
||||
* @retval NULL An error occurred and initialization did not occur
|
||||
* @retval non-NULL The module was successfully initialized
|
||||
*/
|
||||
typedef struct orte_rml_base_module_t* (*orte_rml_component_init_fn_t)(int *priority);
|
||||
|
||||
/**
|
||||
* RML component interface
|
||||
*
|
||||
* Component interface for the RML framework. A public instance of
|
||||
* this structure, called mca_rml_[component name]_component, must
|
||||
* exist in any RML component.
|
||||
*/
|
||||
struct orte_rml_component_2_0_0_t {
|
||||
/* Base component description */
|
||||
mca_base_component_t rml_version;
|
||||
/* Base component data block */
|
||||
mca_base_component_data_t rml_data;
|
||||
/* Component intialization function */
|
||||
orte_rml_component_init_fn_t rml_init;
|
||||
};
|
||||
/** Convienence typedef */
|
||||
typedef struct orte_rml_component_2_0_0_t orte_rml_component_t;
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
/* RML CALLBACK FUNCTION DEFINITIONS */
|
||||
|
||||
/**
|
||||
* Funtion prototype for callback from non-blocking iovec send and recv
|
||||
@ -205,77 +153,7 @@ typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer,
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
/**
|
||||
* Enable communication using the RML module
|
||||
*
|
||||
* Enable communication using the RML module. Before this call, only
|
||||
* the non-blocking receive and ping interfaces may be used. After
|
||||
* this call returns, the module must be fully functional, capable of
|
||||
* sending and receiving data. This function will be called after the
|
||||
* process has been assigned a proces identifier.
|
||||
*
|
||||
* @note While the ping interface may be used between the call to the
|
||||
* component's initialization function and this call, care must be
|
||||
* taken when doing so. The remote process must have already called
|
||||
* enable_comm() or the remote process will not reply to the ping.
|
||||
* As the ping interface is generally used by MPI processes to find a
|
||||
* daemon to contact, this should not be a major limitation.
|
||||
*
|
||||
* @retval ORTE_SUCCESS Communications successfully enabled
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_enable_comm_fn_t)(void);
|
||||
|
||||
/**
|
||||
* Finalize the RML module
|
||||
*
|
||||
* Finalize the RML module, ending all communication and cleaning up
|
||||
* all resources associated with the module. After the finalize
|
||||
* function is called, all interface functions (and the module
|
||||
* structure itself) are not available for use.
|
||||
*/
|
||||
typedef void (*orte_rml_module_finalize_fn_t)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Get a "contact info" string for the local process
|
||||
*
|
||||
* Get a "contact info" string that can be used by other processes to
|
||||
* share the contact information for the given process. The "contact
|
||||
* info" string includes the process identifier for the given process
|
||||
* and uses only basic ascii characters. It should be quoted when
|
||||
* evaluated by a shell, although no special escaping is necessary.
|
||||
*
|
||||
* @note The function may return a contact info string which contains
|
||||
* multiple addresses.
|
||||
*
|
||||
* @retval non-NULL The contact information for this process
|
||||
* @retval NULL An error occurred when trying to get the current
|
||||
* process contact info
|
||||
*/
|
||||
typedef char* (*orte_rml_module_get_contact_info_fn_t)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Update the RML with a remote process's contact info
|
||||
*
|
||||
* Update the RML with a remote process's contact information, as
|
||||
* returned from the get_contact_info() function on the remote
|
||||
* process. Before a send can be initiated to a remote process,
|
||||
* either this function must be called for that process or that
|
||||
* process must have already established a connection to the local
|
||||
* process.
|
||||
*
|
||||
* @note The user may not always explicitly call this function
|
||||
* directly, but may instead cause it to be called through one of the
|
||||
* contact setup functions available in
|
||||
* orte/mca/rml/base/rml_contact.h.
|
||||
*
|
||||
* @param[in] contact_info The contact information string of a peer
|
||||
*/
|
||||
typedef void (*orte_rml_module_set_contact_info_fn_t)(const char *contact_info);
|
||||
/* RML INTERNAL MODULE API DEFINITION */
|
||||
|
||||
|
||||
/**
|
||||
@ -294,7 +172,8 @@ typedef void (*orte_rml_module_set_contact_info_fn_t)(const char *contact_info);
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
|
||||
typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
|
||||
@ -321,7 +200,8 @@ typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer,
|
||||
typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
@ -351,83 +231,13 @@ typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer,
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Receive an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Receive a buffer non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Cancel a posted non-blocking receive
|
||||
*
|
||||
* Attempt to cancel a posted non-blocking receive.
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
|
||||
* to the non-blocking receive call
|
||||
* @param[in] tag Posted receive tag
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag);
|
||||
|
||||
|
||||
/**
|
||||
* Register or deregister an exception callback function
|
||||
*
|
||||
* Register or deregister a callback when an asynchronous
|
||||
* communication exception occurs.
|
||||
*
|
||||
* @param[in] cbfunc User callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The operation completed successfully
|
||||
* @retval ORTE_ERROR An unspecifed error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_exception_fn_t)(orte_rml_exception_callback_t cbfunc);
|
||||
|
||||
|
||||
/**
|
||||
* Handle fault tolerance updates
|
||||
*
|
||||
* Handle fault tolerance updates
|
||||
*
|
||||
* @param[in] state Fault tolerance state update
|
||||
*
|
||||
* @retval ORTE_SUCCESS The operation completed successfully
|
||||
* @retval ORTE_ERROR An unspecifed error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_ft_event_fn_t)(int state);
|
||||
|
||||
/**
|
||||
* Purge the RML/OOB of contact info and pending messages
|
||||
* to/from a specified process. Used when a process aborts
|
||||
@ -435,80 +245,128 @@ typedef int (*orte_rml_module_ft_event_fn_t)(int state);
|
||||
*/
|
||||
typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer);
|
||||
|
||||
/********* NEW RML QOS MESSAGING APIS *****************/
|
||||
/***** Questions *****/
|
||||
/*
|
||||
1 : Should we provide a func for the user to get qos attributes of a channel? (do we allow for sets?? )
|
||||
2 : Should open channel - have a channel error callback function?
|
||||
*/
|
||||
typedef void (*orte_rml_channel_callback_fn_t) (int status,
|
||||
orte_rml_channel_num_t channel_num,
|
||||
orte_process_name_t * peer,
|
||||
opal_list_t *qos_attributes,
|
||||
void * cbdata);
|
||||
/**
|
||||
* Funtion prototype for callback from non-blocking iovec send on a channel
|
||||
*
|
||||
* Funtion prototype for callback from non-blocking iovec send on a channel
|
||||
* On send, the iovec pointer will be the same pointer passed to
|
||||
* send_nb and count will equal the count given to send.
|
||||
*
|
||||
*
|
||||
* @note The parameter in/out parameters are relative to the user's callback
|
||||
* function.
|
||||
*
|
||||
* @param[in] status Completion status
|
||||
* @param[in] channel Opaque channel number on which the msg was sent (input to rml_send_channel)
|
||||
* @param[in] msg Pointer to the array of iovec that was sent
|
||||
* or to a single iovec that has been recvd
|
||||
* @param[in] count Number of iovecs in the array
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbdata User data passed to send_nb()
|
||||
*/
|
||||
typedef void (*orte_rml_send_channel_callback_fn_t)(int status,
|
||||
orte_rml_channel_num_t channel,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
/**
|
||||
* Funtion prototype for callback from non-blocking buffer send on a channel
|
||||
*
|
||||
* Function prototype for callback from non-blocking buffer send on a
|
||||
* channel. On send, the buffer will be the same pointer passed to
|
||||
* send_buffer_nb.
|
||||
*
|
||||
* @note The parameter in/out parameters are relative to the user's callback
|
||||
* function.
|
||||
*
|
||||
* @param[in] status Completion status
|
||||
* @param[in] channel channel number on which the msg was sent
|
||||
* @param[in] buffer Message buffer
|
||||
* @param[in] tag User defined tag for matching send
|
||||
* @param[in] cbdata User data passed to send_buffer_nb()
|
||||
*/
|
||||
typedef void (*orte_rml_send_buffer_channel_callback_fn_t)(int status,
|
||||
orte_rml_channel_num_t channel,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* * Open a messaging channel with specified QoS to a specific peer
|
||||
*
|
||||
* @param[in] peer End point Peer to which the channel needs to be opened
|
||||
* @param[in] qos_attributes List of Quality of Service Attributes for the channel
|
||||
* @param[in] cbfunc Callback function on channel create (open) comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS - the channel was successfully created at the source and a request was sent to the dest.
|
||||
* @retval ORTE_ERROR - unknown error
|
||||
* @retval ORTE_ERROR_UNSUPPORTED_QOS - the requested QoS cannot be provided.
|
||||
* RML internal module interface - these will be implemented by all RML components
|
||||
*/
|
||||
typedef int (*orte_rml_module_open_channel_fn_t)(orte_process_name_t* peer,
|
||||
opal_list_t *qos_attributes,
|
||||
orte_rml_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
typedef struct {
|
||||
/* pointer to the parent component for this module */
|
||||
struct orte_rml_component_t *component;
|
||||
/** Ping process for connectivity check */
|
||||
orte_rml_module_ping_fn_t ping;
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
orte_rml_module_send_nb_fn_t send_nb;
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
|
||||
|
||||
/** Purge information */
|
||||
orte_rml_module_purge_fn_t purge;
|
||||
} orte_rml_base_module_t;
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
/* RML PUBLIC MODULE API DEFINITION */
|
||||
|
||||
/** Open conduit - call each component and see if they can provide a
|
||||
* conduit that can satisfy all these attributes - return the conduit id
|
||||
* (a negative value indicates error)
|
||||
*/
|
||||
typedef orte_rml_conduit_t (*orte_rml_API_open_conduit_fn_t)(opal_list_t *attributes);
|
||||
|
||||
/**
|
||||
* Close a conduit - allow the component to cleanup.
|
||||
*/
|
||||
typedef void (*orte_rml_API_close_conduit_fn_t)(orte_rml_conduit_t id);
|
||||
|
||||
/**
|
||||
* Query the library to provide all the supported interfaces/transport
|
||||
* providers in the current node/system.
|
||||
*
|
||||
* @param[out] List of providers and their attributes.
|
||||
*/
|
||||
typedef int (*orte_rml_API_query_transports_fn_t)(opal_list_t *transports);
|
||||
|
||||
/**
|
||||
* Get a "contact info" string for the local process
|
||||
*
|
||||
* Get a "contact info" string that can be used by other processes to
|
||||
* share the contact information for the given process. The "contact
|
||||
* info" string includes the process identifier for the given process
|
||||
* and uses only basic ascii characters. It should be quoted when
|
||||
* evaluated by a shell, although no special escaping is necessary.
|
||||
*
|
||||
* @note The function may return a contact info string which contains
|
||||
* multiple addresses.
|
||||
*
|
||||
* @retval non-NULL The contact information for this process
|
||||
* @retval NULL An error occurred when trying to get the current
|
||||
* process contact info
|
||||
*/
|
||||
typedef char* (*orte_rml_API_get_contact_info_fn_t)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Update the RML with a remote process's contact info
|
||||
*
|
||||
* Update the RML with a remote process's contact information, as
|
||||
* returned from the get_contact_info() function on the remote
|
||||
* process. Before a send can be initiated to a remote process,
|
||||
* either this function must be called for that process or that
|
||||
* process must have already established a connection to the local
|
||||
* process.
|
||||
*
|
||||
* @note The user may not always explicitly call this function
|
||||
* directly, but may instead cause it to be called through one of the
|
||||
* contact setup functions available in
|
||||
* orte/mca/rml/base/rml_contact.h.
|
||||
*
|
||||
* @param[in] contact_info The contact information string of a peer
|
||||
*/
|
||||
typedef void (*orte_rml_API_set_contact_info_fn_t)(const char *contact_info);
|
||||
|
||||
|
||||
/**
|
||||
* "Ping" another process to determine availability
|
||||
*
|
||||
* Ping another process to determine if it is available. This
|
||||
* function only verifies that the process is alive and will allow a
|
||||
* connection to the local process. It does *not* qualify as
|
||||
* establishing communication with the remote process, as required by
|
||||
* the note for set_contact_info().
|
||||
*
|
||||
* @param[in] contact_info The contact info string for the remote process
|
||||
* @param[in] tv Timeout after which the ping should be failed
|
||||
*
|
||||
* @retval ORTE_SUCESS The process is available and will allow connections
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_API_ping_conduit_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
/**
|
||||
* "Ping" another process to determine availability using the default conduit_id
|
||||
*
|
||||
* Ping another process to determine if it is available. This
|
||||
* function only verifies that the process is alive and will allow a
|
||||
* connection to the local process. It does *not* qualify as
|
||||
* establishing communication with the remote process, as required by
|
||||
* the note for set_contact_info().
|
||||
*
|
||||
* @param[in] contact_info The contact info string for the remote process
|
||||
* @param[in] tv Timeout after which the ping should be failed
|
||||
*
|
||||
* @retval ORTE_SUCESS The process is available and will allow connections
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_API_ping_fn_t)(const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
|
||||
/**
|
||||
* Send an iovec non-blocking message
|
||||
@ -520,7 +378,7 @@ typedef int (*orte_rml_module_open_channel_fn_t)(orte_process_name_t* peer,
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] channel Channel number of the specific channel (given to user in the channel open completion callback fn.)
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] msg Pointer to an array of iovecs to be sent
|
||||
* @param[in] count Number of iovecs in array
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
@ -529,120 +387,284 @@ typedef int (*orte_rml_module_open_channel_fn_t)(orte_process_name_t* peer,
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_CHANNEL_UNKNOWN Channel specified does not exist.
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_channel_nb_fn_t)(orte_rml_channel_num_t channel,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_send_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
typedef int (*orte_rml_API_send_nb_fn_t)(orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Send a buffer non-blocking message
|
||||
*
|
||||
* Send a buffer on specific prestablished channel. The call
|
||||
* Send a buffer to the specified peer. The call
|
||||
* will return immediately, although the buffer may not be modified
|
||||
* until the completion callback is triggered. The buffer *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] channel Channel number of the specific channel (given to user in the channel open completion callback fn.)
|
||||
* @param[in] buffer Pointer to buffer to be sent
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] buffer Pointer to buffer to be sent
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_CHANNEL_UNKNOWN Channel specified does not exist.
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
|
||||
typedef int (*orte_rml_module_send_buffer_channel_nb_fn_t) (orte_rml_channel_num_t channel,
|
||||
struct opal_buffer_t * buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_send_buffer_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
typedef int (*orte_rml_API_send_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* * close a messaging channel with specified QoS to a specific peer
|
||||
* Send an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer End point Peer to which the channel needs to be opened
|
||||
* @param[in] channel_num The channel number returned in the channel open completion callback function.
|
||||
* @param[in] cbfunc Callback function on channel close comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
* Send an array of iovecs to the specified peer. The call
|
||||
* will return immediately, although the iovecs may not be modified
|
||||
* until the completion callback is triggered. The iovecs *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @retval ORTE_SUCCESS - the channel was successfully closed at the source and a request was sent to the dest.
|
||||
* @retval ORTE_ERROR - unknown error
|
||||
* @retval ORTE_ERROR_UNKNOWN_CHANNEL - cannot find the specified QoS channel
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] msg Pointer to an array of iovecs to be sent
|
||||
* @param[in] count Number of iovecs in array
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_close_channel_fn_t)( orte_rml_channel_num_t channel_num,
|
||||
orte_rml_channel_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/* ******************************************************************** */
|
||||
typedef int (*orte_rml_API_send_nb_conduit_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* RML module interface
|
||||
* Send a buffer non-blocking message
|
||||
*
|
||||
* Send a buffer to the specified peer. The call
|
||||
* will return immediately, although the buffer may not be modified
|
||||
* until the completion callback is triggered. The buffer *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] buffer Pointer to buffer to be sent
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
struct orte_rml_base_module_t {
|
||||
/** Enable communication once a process name has been assigned */
|
||||
orte_rml_module_enable_comm_fn_t enable_comm;
|
||||
/** Shutdown the communication system and clean up resources */
|
||||
orte_rml_module_finalize_fn_t finalize;
|
||||
typedef int (*orte_rml_API_send_buffer_nb_conduit_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Purge the RML/OOB of contact info and pending messages
|
||||
* to/from a specified process. Used when a process aborts
|
||||
* and is to be restarted
|
||||
*/
|
||||
typedef void (*orte_rml_API_purge_fn_t)(orte_process_name_t *peer);
|
||||
|
||||
/**
|
||||
* Receive an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Receive a buffer non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Cancel a posted non-blocking receive
|
||||
*
|
||||
* Attempt to cancel a posted non-blocking receive.
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
|
||||
* to the non-blocking receive call
|
||||
* @param[in] tag Posted receive tag
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_cancel_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag);
|
||||
|
||||
/**
|
||||
* RML API interface
|
||||
*/
|
||||
typedef struct {
|
||||
/** Open Conduit **/
|
||||
orte_rml_API_open_conduit_fn_t open_conduit;
|
||||
|
||||
/** Shutdown the conduit and clean up resources */
|
||||
orte_rml_API_close_conduit_fn_t close_conduit;
|
||||
|
||||
/** Get contact information for local process */
|
||||
orte_rml_module_get_contact_info_fn_t get_contact_info;
|
||||
orte_rml_API_get_contact_info_fn_t get_contact_info;
|
||||
/** Set contact information for remote process */
|
||||
orte_rml_module_set_contact_info_fn_t set_contact_info;
|
||||
orte_rml_API_set_contact_info_fn_t set_contact_info;
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
orte_rml_module_ping_fn_t ping;
|
||||
orte_rml_API_ping_fn_t ping;
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
orte_rml_API_ping_conduit_fn_t ping_conduit;
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
orte_rml_module_send_nb_fn_t send_nb;
|
||||
orte_rml_API_send_nb_fn_t send_nb;
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
|
||||
orte_rml_API_send_buffer_nb_fn_t send_buffer_nb;
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
orte_rml_API_send_nb_conduit_fn_t send_nb_conduit;
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_API_send_buffer_nb_conduit_fn_t send_buffer_nb_conduit;
|
||||
|
||||
/** Receive non-blocking iovec message */
|
||||
orte_rml_module_recv_nb_fn_t recv_nb;
|
||||
orte_rml_API_recv_nb_fn_t recv_nb;
|
||||
|
||||
/** Receive non-blocking buffer message */
|
||||
orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb;
|
||||
orte_rml_API_recv_buffer_nb_fn_t recv_buffer_nb;
|
||||
|
||||
/** Cancel posted non-blocking receive */
|
||||
orte_rml_module_recv_cancel_fn_t recv_cancel;
|
||||
|
||||
/** Add callback for communication exception */
|
||||
orte_rml_module_exception_fn_t add_exception_handler;
|
||||
/** Delete callback for communication exception */
|
||||
orte_rml_module_exception_fn_t del_exception_handler;
|
||||
|
||||
/** Fault tolerance handler */
|
||||
orte_rml_module_ft_event_fn_t ft_event;
|
||||
orte_rml_API_recv_cancel_fn_t recv_cancel;
|
||||
|
||||
/** Purge information */
|
||||
orte_rml_module_purge_fn_t purge;
|
||||
};
|
||||
/** Convenience typedef */
|
||||
typedef struct orte_rml_base_module_t orte_rml_base_module_t;
|
||||
orte_rml_API_purge_fn_t purge;
|
||||
|
||||
/** Query information of transport in system */
|
||||
orte_rml_API_query_transports_fn_t query_transports;
|
||||
|
||||
} orte_rml_base_API_t;
|
||||
|
||||
/** Interface for RML communication */
|
||||
ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml;
|
||||
ORTE_DECLSPEC extern orte_rml_base_API_t orte_rml;
|
||||
|
||||
/* ******************************************************************** */
|
||||
/* RML COMPONENT DEFINITION */
|
||||
|
||||
/**
|
||||
* RML open_conduit
|
||||
*
|
||||
* Create an instance (module) of the given RML component. Upon
|
||||
* returning, the module data structure should be fully populated and
|
||||
* all functions should be usable and will have the conduit information.
|
||||
*
|
||||
* @param[in] opal_list_t of all attributes requested for the conduit.
|
||||
* Each attribute will be key-value.
|
||||
* [TODO] put in examples of the key-value here.
|
||||
* @return Exactly one module created by the call to the component's
|
||||
* initialization function should be returned. The module structure
|
||||
* should be fully populated, and the priority should be set to a
|
||||
* reasonable value.
|
||||
*
|
||||
* @retval NULL An error occurred and initialization did not occur
|
||||
* @retval non-NULL The module was successfully initialized
|
||||
*/
|
||||
typedef orte_rml_base_module_t* (*orte_rml_component_open_conduit_fn_t)(opal_list_t *attributes);
|
||||
|
||||
/**
|
||||
* Query the library to provide all the supported interfaces/transport
|
||||
* providers in the current node/system.
|
||||
*
|
||||
*/
|
||||
typedef orte_rml_pathway_t* (*orte_rml_component_query_transports_fn_t)(void);
|
||||
|
||||
/* Get the contact info for this component */
|
||||
typedef char* (*orte_rml_component_get_contact_info_fn_t)(void);
|
||||
|
||||
/* Set contact info */
|
||||
typedef void (*orte_rml_component_set_contact_info_fn_t)(const char *uri);
|
||||
|
||||
/** Close conduit - allow the specific component to
|
||||
* cleanup the module for this conduit
|
||||
*/
|
||||
typedef void (*orte_rml_module_close_conduit_fn_t)(orte_rml_base_module_t *mod);
|
||||
|
||||
/**
|
||||
* RML component interface
|
||||
*
|
||||
* Component interface for the RML framework. A public instance of
|
||||
* this structure, called mca_rml_[component name]_component, must
|
||||
* exist in any RML component.
|
||||
*/
|
||||
struct orte_rml_component_3_0_0_t {
|
||||
/* Base component description */
|
||||
mca_base_component_t base;
|
||||
/* Base component data block */
|
||||
mca_base_component_data_t data;
|
||||
/* Component priority */
|
||||
int priority;
|
||||
/* Component interface functions */
|
||||
orte_rml_component_open_conduit_fn_t open_conduit;
|
||||
orte_rml_component_query_transports_fn_t query_transports;
|
||||
orte_rml_component_get_contact_info_fn_t get_contact_info;
|
||||
orte_rml_component_set_contact_info_fn_t set_contact_info;
|
||||
orte_rml_module_close_conduit_fn_t close_conduit;
|
||||
};
|
||||
/** Convienence typedef */
|
||||
typedef struct orte_rml_component_3_0_0_t orte_rml_component_t;
|
||||
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
/** Macro for use in components that are of type rml */
|
||||
#define ORTE_RML_BASE_VERSION_2_0_0 \
|
||||
ORTE_MCA_BASE_VERSION_2_1_0("rml", 2, 0, 0)
|
||||
#define ORTE_RML_BASE_VERSION_3_0_0 \
|
||||
ORTE_MCA_BASE_VERSION_2_1_0("rml", 3, 0, 0)
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
@ -171,9 +171,15 @@ BEGIN_C_DECLS
|
||||
|
||||
#define ORTE_RML_TAG_MAX 100
|
||||
|
||||
|
||||
#define ORTE_RML_TAG_NTOH(t) ntohl(t)
|
||||
#define ORTE_RML_TAG_HTON(t) htonl(t)
|
||||
|
||||
/*** length of the tag. change this when type of orte_rml_tag_t is changed ***/
|
||||
/*** max valu in unit32_t is 0xFFFF_FFFF when converted to char this is 8 **
|
||||
#define ORTE_RML_TAG_T_CHAR_LEN 8
|
||||
#define ORTE_RML_TAG_T_SPRINT "%8x" */
|
||||
|
||||
/**
|
||||
* Message matching tag
|
||||
*
|
||||
@ -184,11 +190,17 @@ BEGIN_C_DECLS
|
||||
*/
|
||||
typedef uint32_t orte_rml_tag_t;
|
||||
|
||||
/**
|
||||
* Channel number
|
||||
* Reference to a rml channel
|
||||
*/
|
||||
typedef uint32_t orte_rml_channel_num_t;
|
||||
/* Conduit ID */
|
||||
typedef uint16_t orte_rml_conduit_t;
|
||||
|
||||
/* define an object for reporting transports */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
char *component;
|
||||
opal_list_t attributes;
|
||||
opal_list_t transports;
|
||||
} orte_rml_pathway_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_pathway_t);
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits \
|
||||
orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix opal_interface orte_spin segfault \
|
||||
orte_exit test-time event-threads psm_keygen regex orte_errors evpri-test opal-evpri-test evpri-test2 \
|
||||
mapper reducer opal_hotel orte_dfs ulfm pmixtool orte_notify
|
||||
mapper reducer opal_hotel orte_dfs ulfm pmixtool
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
|
119
orte/test/system/ofi_big_msg.c
Обычный файл
119
orte/test/system/ofi_big_msg.c
Обычный файл
@ -0,0 +1,119 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
int sock_conduit_id = 1; //use the first one
|
||||
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
|
||||
if (argc > 1) {
|
||||
count = atoi(argv[1]);
|
||||
if (count < 0) {
|
||||
count = INT_MAX-1;
|
||||
}
|
||||
} else {
|
||||
count = MAX_COUNT;
|
||||
}
|
||||
|
||||
peer.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
if (peer.vpid == orte_process_info.num_procs) {
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
/* setup the initiating buffer - put random sized message in it */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
//maxpower = (double)(j%7);
|
||||
maxpower = (double)(j%8);
|
||||
msgsize = (int)pow(10.0, maxpower);
|
||||
//msgsize += 1401000;
|
||||
opal_output(0, "Ring %d message size %d bytes", j, msgsize);
|
||||
msg = (uint8_t*)malloc(msgsize);
|
||||
opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
|
||||
free(msg);
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, orte_rml_send_callback, NULL);
|
||||
|
||||
/* wait for it to come around */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
|
||||
opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
} else {
|
||||
/* wait for msg */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
msg_active = true;
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, send_callback, NULL);
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
|
||||
orte_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
193
orte/test/system/ofi_conduit_stress.c
Обычный файл
193
orte/test/system/ofi_conduit_stress.c
Обычный файл
@ -0,0 +1,193 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/util/attr.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
//debug routine to print the opal_value_t returned by query interface
|
||||
void print_transports_query()
|
||||
{
|
||||
opal_value_t *providers=NULL;
|
||||
char* prov_name = NULL;
|
||||
int ret;
|
||||
int32_t *protocol_ptr, protocol;
|
||||
int8_t conduit_id;
|
||||
int8_t *prov_num=&conduit_id;
|
||||
|
||||
protocol_ptr = &protocol;
|
||||
opal_output(0, "\n Current conduits loaded in rml-ofi ==>");
|
||||
/*opal_output(0,"\n print_transports_query() Begin- %s:%d",__FILE__,__LINE__);
|
||||
opal_output(0,"\n calling the orte_rml_ofi_query_transports() ");*/
|
||||
if( ORTE_SUCCESS == orte_rml.query_transports(&providers)) {
|
||||
//opal_output(0,"\n query_transports() completed, printing details\n");
|
||||
while (providers) {
|
||||
//get the first opal_list_t;
|
||||
opal_list_t temp;
|
||||
opal_list_t *prov = &temp;
|
||||
|
||||
ret = opal_value_unload(providers,(void **)&prov,OPAL_PTR);
|
||||
if (ret == OPAL_SUCCESS) {
|
||||
//opal_output(0,"\n %s:%d opal_value_unload() succeeded, opal_list* prov = %x",__FILE__,__LINE__,prov);
|
||||
if( orte_get_attribute( prov, ORTE_CONDUIT_ID, (void **)&prov_num,OPAL_UINT8)) {
|
||||
opal_output(0," Provider conduit_id : %d",*prov_num);
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROTOCOL, (void **)&protocol_ptr,OPAL_UINT32)) {
|
||||
opal_output(0," Protocol : %d",*protocol_ptr);
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROV_NAME, (void **)&prov_name ,OPAL_STRING)) {
|
||||
opal_output(0," Provider name : %s",prov_name);
|
||||
} else {
|
||||
opal_output(0," Error in getting Provider name");
|
||||
}
|
||||
} else {
|
||||
opal_output(0," %s:%d opal_value_unload() failed, opal_list* prov = %x",__FILE__,__LINE__,prov);
|
||||
}
|
||||
providers = (opal_value_t *)providers->super.opal_list_next;
|
||||
// opal_output_verbose(1,orte_rml_base_framework.framework_output,"\n %s:%d -
|
||||
// Moving on to next provider provders=%x",__FILE__,__LINE__,providers);
|
||||
}
|
||||
} else {
|
||||
opal_output(0,"\n query_transports() returned Error ");
|
||||
}
|
||||
//opal_output(0,"\n End of print_transports_query() from ofi_query_test.c \n");
|
||||
|
||||
//need to free all the providers here
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
int conduit_id = 0; //use the first available conduit
|
||||
struct timeval start, end;
|
||||
opal_list_t *conduit_attr;
|
||||
|
||||
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
|
||||
print_transports_query();
|
||||
conduit_attr = OBJ_NEW(opal_list_t);
|
||||
if( ORTE_SUCCESS ==
|
||||
( orte_set_attribute( conduit_attr, ORTE_RML_OFI_PROV_NAME_ATTRIB, ORTE_ATTR_GLOBAL,"sockets",OPAL_STRING))) {
|
||||
if( ORTE_SUCCESS ==
|
||||
( orte_set_attribute( conduit_attr, ORTE_RML_INCLUDE_COMP_ATTRIB, ORTE_ATTR_GLOBAL,"ofi",OPAL_STRING))) {
|
||||
opal_output(0, "%s calling open_conduit with ORTE_RML_INCLUDE_COMP_ATTRIB and ORTE_RML_OFI_PROV_NAME_ATTRIB",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
conduit_id = orte_rml_API_open_conduit(conduit_attr);
|
||||
if (0 > conduit_id ) {
|
||||
opal_output(0, "Conduit could not be opened for OFI, exiting");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
opal_output(0, "Using conduit-id %d ", conduit_id);
|
||||
|
||||
if (argc > 1) {
|
||||
count = atoi(argv[1]);
|
||||
if (count < 0) {
|
||||
count = INT_MAX-1;
|
||||
}
|
||||
} else {
|
||||
count = MAX_COUNT;
|
||||
}
|
||||
|
||||
peer.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
if (peer.vpid == orte_process_info.num_procs) {
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
/* setup the initiating buffer - put random sized message in it */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
maxpower = (double)(j%7);
|
||||
msgsize = (int)pow(10.0, maxpower);
|
||||
opal_output(0, "Ring %d message size %d bytes", j, msgsize);
|
||||
msg = (uint8_t*)malloc(msgsize);
|
||||
opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
|
||||
free(msg);
|
||||
orte_rml.send_buffer_nb_conduit(conduit_id,&peer, buf, MY_TAG, orte_rml_send_callback, NULL);
|
||||
|
||||
/* wait for it to come around */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
|
||||
opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
} else {
|
||||
/* wait for msg */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
msg_active = true;
|
||||
orte_rml.send_buffer_nb_conduit(conduit_id,&peer, buf, MY_TAG, send_callback, NULL);
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
orte_finalize();
|
||||
printf("start: %d secs, %d usecs\n",start.tv_sec,start.tv_usec);
|
||||
printf("end: %d secs, %d usecs\n",end.tv_sec,end.tv_usec);
|
||||
printf("Total minutes = %d, Total seconds = %d", (end.tv_sec - start.tv_sec)/60, (end.tv_sec - start.tv_sec) );
|
||||
return 0;
|
||||
}
|
137
orte/test/system/ofi_query_test.c
Обычный файл
137
orte/test/system/ofi_query_test.c
Обычный файл
@ -0,0 +1,137 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <rdma/fabric.h>
|
||||
#include <rdma/fi_cm.h>
|
||||
#include <rdma/fi_domain.h>
|
||||
#include <rdma/fi_endpoint.h>
|
||||
#include <rdma/fi_errno.h>
|
||||
#include <rdma/fi_tagged.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
|
||||
//debug routine to print the opal_value_t returned by query interface
|
||||
void print_transports_query()
|
||||
{
|
||||
opal_value_t *providers=NULL;
|
||||
char* prov_name = NULL;
|
||||
int ret;
|
||||
int32_t *protocol_ptr, protocol;
|
||||
int8_t conduit_id;
|
||||
int8_t *prov_num=&conduit_id;
|
||||
|
||||
protocol_ptr = &protocol;
|
||||
|
||||
opal_output(0,"\n print_transports_query() Begin- %s:%d",__FILE__,__LINE__);
|
||||
opal_output(0,"\n calling the orte_rml_ofi_query_transports() ");
|
||||
if( ORTE_SUCCESS == orte_rml.query_transports(&providers)) {
|
||||
opal_output(0,"\n query_transports() completed, printing details\n");
|
||||
while (providers) {
|
||||
//get the first opal_list_t;
|
||||
opal_list_t temp;
|
||||
opal_list_t *prov = &temp;
|
||||
|
||||
ret = opal_value_unload(providers,(void **)&prov,OPAL_PTR);
|
||||
if (ret == OPAL_SUCCESS) {
|
||||
opal_output_verbose(1,orte_rml_base_framework.framework_output,"\n %s:%d opal_value_unload() succeeded, opal_list* prov = %x",
|
||||
__FILE__,__LINE__,prov);
|
||||
if (orte_get_attribute( prov, ORTE_CONDUIT_ID, (void **)&prov_num,OPAL_UINT8)) {
|
||||
opal_output(0," Provider conduit_id : %d",*prov_num);
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROTOCOL, (void **)&protocol_ptr,OPAL_UINT32)) {
|
||||
opal_output(0," Protocol : %s",fi_tostr(protocol_ptr,FI_TYPE_PROTOCOL));
|
||||
}
|
||||
if( orte_get_attribute( prov, ORTE_PROV_NAME, (void **)&prov_name ,OPAL_STRING)) {
|
||||
opal_output(0," Provider name : %s",prov_name);
|
||||
} else {
|
||||
opal_output(0," Error in getting Provider name");
|
||||
}
|
||||
} else {
|
||||
opal_output(0," %s:%d opal_value_unload() failed, opal_list* prov = %x",__FILE__,__LINE__,prov);
|
||||
}
|
||||
providers = (opal_value_t *)providers->super.opal_list_next;
|
||||
// opal_output_verbose(1,orte_rml_base_framework.framework_output,"\n %s:%d -
|
||||
// Moving on to next provider provders=%x",__FILE__,__LINE__,providers);
|
||||
}
|
||||
} else {
|
||||
opal_output(0,"\n query_transports() returned Error ");
|
||||
}
|
||||
opal_output(0,"\n End of print_transports_query() from ofi_query_test.c \n");
|
||||
|
||||
//need to free all the providers here
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
|
||||
|
||||
opal_output(0, "%s pid = %d ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), getpid());
|
||||
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
// orte_init(&argc, &argv, ORTE_PROC_MPI);
|
||||
|
||||
/*
|
||||
* Runtime Messaging Layer - added this as RML was not being initialised in the app process,
|
||||
* but now ompimaster has code added to call this automatically
|
||||
*/
|
||||
/*
|
||||
if (ORTE_SUCCESS == ( mca_base_framework_open(&orte_rml_base_framework, 0))) {
|
||||
opal_output(0, "%s RML framework opened successfully ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), getpid());
|
||||
if (ORTE_SUCCESS == orte_rml_base_select()) {
|
||||
opal_output(0, "%s RML framework base_select completed successfully ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), getpid());
|
||||
print_transports_query();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
print_transports_query();
|
||||
opal_output(0, "%s calling orte_finalize() from ofi_query_test.c ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
121
orte/test/system/ofi_stress.c
Обычный файл
121
orte/test/system/ofi_stress.c
Обычный файл
@ -0,0 +1,121 @@
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#define MY_TAG 12345
|
||||
#define MAX_COUNT 3
|
||||
|
||||
static bool msg_recvd;
|
||||
static volatile bool msg_active;
|
||||
|
||||
static void send_callback(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
if (ORTE_SUCCESS != status) {
|
||||
exit(1);
|
||||
}
|
||||
msg_active = false;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]){
|
||||
int count;
|
||||
int msgsize;
|
||||
uint8_t *msg;
|
||||
int i, j, rc;
|
||||
orte_process_name_t peer;
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
int sock_conduit_id = 0; //use the first conduit
|
||||
struct timeval start, end;
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
|
||||
|
||||
if (argc > 1) {
|
||||
count = atoi(argv[1]);
|
||||
if (count < 0) {
|
||||
count = INT_MAX-1;
|
||||
}
|
||||
} else {
|
||||
count = MAX_COUNT;
|
||||
}
|
||||
|
||||
peer.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
if (peer.vpid == orte_process_info.num_procs) {
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
/* setup the initiating buffer - put random sized message in it */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
maxpower = (double)(j%7);
|
||||
msgsize = (int)pow(10.0, maxpower);
|
||||
opal_output(0, "Ring %d message size %d bytes", j, msgsize);
|
||||
msg = (uint8_t*)malloc(msgsize);
|
||||
opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
|
||||
free(msg);
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, orte_rml_send_callback, NULL);
|
||||
|
||||
/* wait for it to come around */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
|
||||
opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
} else {
|
||||
/* wait for msg */
|
||||
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
|
||||
blob.active = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
OBJ_DESTRUCT(&blob);
|
||||
msg_active = true;
|
||||
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, send_callback, NULL);
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
orte_finalize();
|
||||
printf("start: %d secs, %d usecs\n",start.tv_sec,start.tv_usec);
|
||||
printf("end: %d secs, %d usecs\n",end.tv_sec,end.tv_usec);
|
||||
printf("Total minutes = %d, Total seconds = %d", (end.tv_sec - start.tv_sec)/60, (end.tv_sec - start.tv_sec) );
|
||||
return 0;
|
||||
}
|
@ -3,6 +3,7 @@
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
|
||||
@ -44,7 +45,7 @@ main(int argc, char *argv[]){
|
||||
double maxpower;
|
||||
opal_buffer_t *buf;
|
||||
orte_rml_recv_cb_t blob;
|
||||
|
||||
struct timeval start, end;
|
||||
/*
|
||||
* Init
|
||||
*/
|
||||
@ -65,6 +66,7 @@ main(int argc, char *argv[]){
|
||||
peer.vpid = 0;
|
||||
}
|
||||
|
||||
gettimeofday(&start,NULL);
|
||||
for (j=1; j < count+1; j++) {
|
||||
/* rank0 starts ring */
|
||||
if (ORTE_PROC_MY_NAME->vpid == 0) {
|
||||
@ -98,8 +100,6 @@ main(int argc, char *argv[]){
|
||||
orte_rml_recv_callback, &blob);
|
||||
ORTE_WAIT_FOR_COMPLETION(blob.active);
|
||||
|
||||
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
|
||||
|
||||
/* send it along */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.copy_payload(buf, &blob.data);
|
||||
@ -109,6 +109,8 @@ main(int argc, char *argv[]){
|
||||
ORTE_WAIT_FOR_COMPLETION(msg_active);
|
||||
}
|
||||
}
|
||||
gettimeofday(&end,NULL);
|
||||
printf("Total minutes = %d, Total seconds = %d\n",(end.tv_sec - start.tv_sec)/60,(end.tv_sec - start.tv_sec));
|
||||
|
||||
orte_finalize();
|
||||
|
||||
|
@ -312,6 +312,25 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key)
|
||||
case ORTE_PROC_NBEATS:
|
||||
return "PROC-NBEATS";
|
||||
|
||||
case ORTE_RML_TRANSPORT_TYPE:
|
||||
return "RML-TRANSPORT-TYPE";
|
||||
case ORTE_RML_PROTOCOL_TYPE:
|
||||
return "RML-PROTOCOL-TYPE";
|
||||
case ORTE_RML_CONDUIT_ID:
|
||||
return "RML-CONDUIT-ID";
|
||||
case ORTE_RML_INCLUDE_COMP_ATTRIB:
|
||||
return "RML-INCLUDE";
|
||||
case ORTE_RML_EXCLUDE_COMP_ATTRIB:
|
||||
return "RML-EXCLUDE";
|
||||
case ORTE_RML_TRANSPORT_ATTRIB:
|
||||
return "RML-TRANSPORT";
|
||||
case ORTE_RML_QUALIFIER_ATTRIB:
|
||||
return "RML-QUALIFIER";
|
||||
case ORTE_RML_PROVIDER_ATTRIB:
|
||||
return "RML-DESIRED-PROVIDERS";
|
||||
case ORTE_RML_PROTOCOL_ATTRIB:
|
||||
return "RML-DESIRED-PROTOCOLS";
|
||||
|
||||
default:
|
||||
return "UNKNOWN-KEY";
|
||||
}
|
||||
@ -339,9 +358,24 @@ static int orte_attr_load(orte_attribute_t *kv,
|
||||
struct timeval *tv;
|
||||
|
||||
kv->type = type;
|
||||
if (NULL == data && OPAL_STRING != type && OPAL_BYTE_OBJECT != type) {
|
||||
/* just set the fields to zero */
|
||||
memset(&kv->data, 0, sizeof(kv->data));
|
||||
if (NULL == data) {
|
||||
/* if the type is BOOL, then the user wanted to
|
||||
* use the presence of the attribute to indicate
|
||||
* "true" - so let's mark it that way just in
|
||||
* case a subsequent test looks for the value */
|
||||
if (OPAL_BOOL == type) {
|
||||
kv->data.flag = true;
|
||||
} else {
|
||||
/* otherwise, check to see if this type has storage
|
||||
* that is already allocated, and free it if so */
|
||||
if (OPAL_STRING == type && NULL != kv->data.string) {
|
||||
free(kv->data.string);
|
||||
} else if (OPAL_BYTE_OBJECT == type && NULL != kv->data.bo.bytes) {
|
||||
free(kv->data.bo.bytes);
|
||||
}
|
||||
/* just set the fields to zero */
|
||||
memset(&kv->data, 0, sizeof(kv->data));
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -181,15 +181,18 @@ typedef uint16_t orte_proc_flags_t;
|
||||
|
||||
#define ORTE_PROC_MAX_KEY 400
|
||||
|
||||
/*** MESSAGING QOS ATTRIBUTE KEYS ***/
|
||||
#define ORTE_QOS_START_KEY ORTE_PROC_MAX_KEY
|
||||
#define ORTE_QOS_TYPE (ORTE_QOS_START_KEY + 1) //uint8- defining what type of qos - refer to orte_qos_type enum for values
|
||||
#define ORTE_QOS_WINDOW_SIZE (ORTE_QOS_START_KEY + 2) // uint32 - number of messages in the window (stream)
|
||||
#define ORTE_QOS_ACK_NACK_TIMEOUT (ORTE_QOS_START_KEY + 3) //uint32 - timeout value in secs for msg/window ack nack
|
||||
#define ORTE_QOS_MSG_RETRY (ORTE_QOS_START_KEY + 4) // bool- resend message upon ACK fail or NACK or timeout.
|
||||
#define ORTE_QOS_NUM_RETRIES (ORTE_QOS_START_KEY + 5) // uint32 - number of retries.
|
||||
/*** RML ATTRIBUTE keys ***/
|
||||
#define ORTE_RML_START_KEY ORTE_PROC_MAX_KEY
|
||||
#define ORTE_RML_TRANSPORT_TYPE (ORTE_RML_START_KEY + 1) // string - null terminated string containing transport type
|
||||
#define ORTE_RML_PROTOCOL_TYPE (ORTE_RML_START_KEY + 2) // string - protocol type (e.g., as returned by fi_info)
|
||||
#define ORTE_RML_CONDUIT_ID (ORTE_RML_START_KEY + 3) // orte_rml_conduit_t - conduit_id for this transport
|
||||
#define ORTE_RML_INCLUDE_COMP_ATTRIB (ORTE_RML_START_KEY + 4) // string - comma delimited list of RML component names to be considered
|
||||
#define ORTE_RML_EXCLUDE_COMP_ATTRIB (ORTE_RML_START_KEY + 5) // string - comma delimited list of RML component names to be excluded
|
||||
#define ORTE_RML_TRANSPORT_ATTRIB (ORTE_RML_START_KEY + 6) // string - comma delimited list of transport types to be considered (e.g., "fabric,ethernet")
|
||||
#define ORTE_RML_QUALIFIER_ATTRIB (ORTE_RML_START_KEY + 7) // string - comma delimited list of qualifiers (e.g., routed=direct,bandwidth=xxx)
|
||||
#define ORTE_RML_PROVIDER_ATTRIB (ORTE_RML_START_KEY + 8) // string - comma delimited list of provider names to be considered
|
||||
#define ORTE_RML_PROTOCOL_ATTRIB (ORTE_RML_START_KEY + 9) // string - comma delimited list of protocols to be considered (e.g., tcp,udp)
|
||||
|
||||
#define ORTE_QOS_MAX_KEY 500
|
||||
|
||||
#define ORTE_ATTR_KEY_MAX 1000
|
||||
|
||||
|
@ -234,6 +234,9 @@ int orte_err2str(int errnum, const char **errmsg)
|
||||
case ORTE_ERR_JOB_CANCELLED:
|
||||
retval = "Job cancelled";
|
||||
break;
|
||||
case ORTE_ERR_CONDUIT_SEND_FAIL:
|
||||
retval = " Transport Conduit returned send error";
|
||||
break;
|
||||
case ORTE_ERR_DEBUGGER_RELEASE:
|
||||
retval = "Debugger release";
|
||||
break;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user