39a6057fc6
* General TCP cleanup for OPAL / ORTE * Simplifying the OOB by moving much of the logic into the RML * Allowing the OOB RML component to do routing of messages * Adding a component framework for handling routing tables * Moving the xcast functionality from the OOB base to its own framework Includes merge from tmp/bwb-oob-rml-merge revisions: r15506, r15507, r15508, r15510, r15511, r15512, r15513 This commit was SVN r15528. The following SVN revisions from the original message are invalid or inconsistent and therefore were not cross-referenced: r15506 r15507 r15508 r15510 r15511 r15512 r15513
337 строки
8.7 KiB
C
337 строки
8.7 KiB
C
/*
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "rml_oob.h"
|
|
|
|
#include "opal/util/output.h"
|
|
#include "opal/mca/base/base.h"
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
#include "orte/orte_constants.h"
|
|
#include "orte/mca/rml/base/base.h"
|
|
#include "orte/mca/routed/routed.h"
|
|
#include "orte/mca/oob/oob.h"
|
|
#include "orte/mca/oob/base/base.h"
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
#include "orte/mca/ns/ns.h"
|
|
|
|
static orte_rml_module_t* rml_oob_init(int* priority);
|
|
static int rml_oob_open(void);
|
|
static int rml_oob_close(void);
|
|
static void rml_oob_recv_route_callback(int status,
|
|
struct orte_process_name_t* peer,
|
|
struct iovec* iov,
|
|
int count,
|
|
orte_rml_tag_t tag,
|
|
void *cbdata);
|
|
|
|
|
|
/**
|
|
* component definition
|
|
*/
|
|
orte_rml_component_t mca_rml_oob_component = {
|
|
/* First, the mca_base_component_t struct containing meta
|
|
information about the component itself */
|
|
|
|
{
|
|
/* Indicate that we are a rml v1.0.0 component (which also
|
|
implies a specific MCA version) */
|
|
|
|
ORTE_RML_BASE_VERSION_1_0_0,
|
|
|
|
"oob", /* MCA component name */
|
|
ORTE_MAJOR_VERSION, /* MCA component major version */
|
|
ORTE_MINOR_VERSION, /* MCA component minor version */
|
|
ORTE_RELEASE_VERSION, /* MCA component release version */
|
|
rml_oob_open, /* component open */
|
|
rml_oob_close, /* component close */
|
|
},
|
|
|
|
/* Next the MCA v1.0.0 component meta data */
|
|
{
|
|
/* The component is checkpoint ready */
|
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
|
},
|
|
rml_oob_init
|
|
};
|
|
|
|
orte_rml_oob_module_t orte_rml_oob_module = {
|
|
{
|
|
orte_rml_oob_init,
|
|
orte_rml_oob_fini,
|
|
|
|
orte_rml_oob_get_uri,
|
|
orte_rml_oob_set_uri,
|
|
|
|
orte_rml_oob_get_new_name,
|
|
orte_rml_oob_ping,
|
|
|
|
orte_rml_oob_send,
|
|
orte_rml_oob_send_nb,
|
|
orte_rml_oob_send_buffer,
|
|
orte_rml_oob_send_buffer_nb,
|
|
|
|
orte_rml_oob_recv,
|
|
orte_rml_oob_recv_nb,
|
|
orte_rml_oob_recv_buffer,
|
|
orte_rml_oob_recv_buffer_nb,
|
|
orte_rml_oob_recv_cancel,
|
|
|
|
orte_rml_oob_add_exception,
|
|
orte_rml_oob_del_exception,
|
|
|
|
orte_rml_oob_ft_event
|
|
}
|
|
};
|
|
|
|
|
|
static int
|
|
rml_oob_open(void)
|
|
{
|
|
int rc;
|
|
|
|
if (ORTE_SUCCESS != (rc = mca_oob_base_open())) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
static int
|
|
rml_oob_close(void)
|
|
{
|
|
int rc;
|
|
|
|
if (ORTE_SUCCESS != (rc = mca_oob_base_close())) {
|
|
return rc;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static orte_rml_module_t*
|
|
rml_oob_init(int* priority)
|
|
{
|
|
if (mca_oob_base_init() != ORTE_SUCCESS)
|
|
return NULL;
|
|
*priority = 1;
|
|
|
|
OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);
|
|
OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions_lock, opal_mutex_t);
|
|
|
|
orte_rml_oob_module.active_oob = &mca_oob;
|
|
orte_rml_oob_module.active_oob->oob_exception_callback =
|
|
orte_rml_oob_exception_callback;
|
|
|
|
return &orte_rml_oob_module.super;
|
|
}
|
|
|
|
|
|
int
|
|
orte_rml_oob_init(void)
|
|
{
|
|
int ret;
|
|
struct iovec iov[1];
|
|
|
|
ret = orte_rml_oob_module.active_oob->oob_init();
|
|
|
|
iov[0].iov_base = NULL;
|
|
iov[0].iov_len = 0;
|
|
|
|
ret = orte_rml_oob_module.active_oob->oob_recv_nb(ORTE_NAME_WILDCARD,
|
|
iov, 1,
|
|
ORTE_RML_TAG_RML_ROUTE,
|
|
ORTE_RML_ALLOC|ORTE_RML_PERSISTENT,
|
|
rml_oob_recv_route_callback,
|
|
NULL);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
int
|
|
orte_rml_oob_fini(void)
|
|
{
|
|
opal_list_item_t *item;
|
|
|
|
while (NULL !=
|
|
(item = opal_list_remove_first(&orte_rml_oob_module.exceptions))) {
|
|
OBJ_RELEASE(item);
|
|
}
|
|
OBJ_DESTRUCT(&orte_rml_oob_module.exceptions);
|
|
OBJ_DESTRUCT(&orte_rml_oob_module.exceptions_lock);
|
|
orte_rml_oob_module.active_oob->oob_exception_callback = NULL;
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
int
|
|
orte_rml_oob_ft_event(int state) {
|
|
int exit_status = ORTE_SUCCESS;
|
|
int ret;
|
|
|
|
if(OPAL_CRS_CHECKPOINT == state) {
|
|
;
|
|
}
|
|
else if(OPAL_CRS_CONTINUE == state) {
|
|
;
|
|
}
|
|
else if(OPAL_CRS_RESTART == state) {
|
|
;
|
|
}
|
|
else if(OPAL_CRS_TERM == state ) {
|
|
;
|
|
}
|
|
else {
|
|
;
|
|
}
|
|
|
|
if( ORTE_SUCCESS !=
|
|
(ret = orte_rml_oob_module.active_oob->oob_ft_event(state)) ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
|
|
|
|
if(OPAL_CRS_CHECKPOINT == state) {
|
|
;
|
|
}
|
|
else if(OPAL_CRS_CONTINUE == state) {
|
|
;
|
|
}
|
|
else if(OPAL_CRS_RESTART == state) {
|
|
if( ORTE_SUCCESS != (ret = mca_oob_base_close())) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
|
|
if( ORTE_SUCCESS != (ret = mca_oob_base_open())) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
|
|
if( ORTE_SUCCESS != (ret = mca_oob_base_init())) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
|
|
if(NULL != orte_process_info.ns_replica_uri) {
|
|
orte_rml_oob_set_uri(orte_process_info.ns_replica_uri);
|
|
}
|
|
|
|
if(NULL != orte_process_info.gpr_replica_uri) {
|
|
orte_rml_oob_set_uri(orte_process_info.gpr_replica_uri);
|
|
}
|
|
}
|
|
else if(OPAL_CRS_TERM == state ) {
|
|
;
|
|
}
|
|
else {
|
|
;
|
|
}
|
|
|
|
cleanup:
|
|
return exit_status;
|
|
}
|
|
|
|
|
|
static void
|
|
msg_construct(orte_rml_oob_msg_t *msg)
|
|
{
|
|
OBJ_CONSTRUCT(&msg->msg_lock, opal_mutex_t);
|
|
OBJ_CONSTRUCT(&msg->msg_cond, opal_condition_t);
|
|
msg->msg_status = 0;
|
|
msg->msg_complete = false;
|
|
msg->msg_persistent = false;
|
|
OBJ_CONSTRUCT(&msg->msg_recv_buffer, orte_buffer_t);
|
|
msg->msg_data = NULL;
|
|
}
|
|
|
|
static void
|
|
msg_destruct(orte_rml_oob_msg_t *msg)
|
|
{
|
|
if (NULL != msg->msg_data) free(msg->msg_data);
|
|
OBJ_DESTRUCT(&msg->msg_recv_buffer);
|
|
OBJ_DESTRUCT(&msg->msg_lock);
|
|
OBJ_DESTRUCT(&msg->msg_cond);
|
|
}
|
|
|
|
OBJ_CLASS_INSTANCE(orte_rml_oob_msg_t, opal_object_t,
|
|
msg_construct, msg_destruct);
|
|
|
|
static void
|
|
rml_oob_recv_route_send_callback(int status,
|
|
struct orte_process_name_t* peer,
|
|
struct iovec* iov,
|
|
int count,
|
|
orte_rml_tag_t tag,
|
|
void* cbdata)
|
|
{
|
|
/* BWB -- propogate errors here... */
|
|
if (NULL != iov[0].iov_base) free(iov[0].iov_base);
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
rml_oob_recv_route_callback(int status,
|
|
struct orte_process_name_t* peer,
|
|
struct iovec* iov,
|
|
int count,
|
|
orte_rml_tag_t tag,
|
|
void *cbdata)
|
|
{
|
|
orte_rml_oob_msg_header_t *hdr =
|
|
(orte_rml_oob_msg_header_t*) iov[0].iov_base;
|
|
int real_tag;
|
|
int ret;
|
|
orte_process_name_t next;
|
|
|
|
/* BWB -- propogate errors here... */
|
|
assert(status >= 0);
|
|
|
|
next = orte_routed.get_route(&hdr->destination);
|
|
if (next.vpid == ORTE_VPID_INVALID) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
|
|
abort();
|
|
}
|
|
|
|
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &next, peer)) {
|
|
real_tag = hdr->tag;
|
|
} else {
|
|
real_tag = ORTE_RML_TAG_RML_ROUTE;
|
|
}
|
|
|
|
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
|
|
iov,
|
|
count,
|
|
real_tag,
|
|
0,
|
|
rml_oob_recv_route_send_callback,
|
|
NULL);
|
|
assert(ret == ORTE_SUCCESS);
|
|
}
|