1
1
openmpi/orte/mca/iof/svc/iof_svc_proxy.c
Ralph Castain 9613b3176c Effectively revert the orte_output system and return to direct use of opal_output at all levels. Retain the orte_show_help subsystem to allow aggregation of show_help messages at the HNP.
After much work by Jeff and myself, and quite a lot of discussion, it has become clear that we simply cannot resolve the infinite loops caused by RML-involved subsystems calling orte_output. The original rationale for the change to orte_output has also been reduced by shifting the output of XML-formatted vs human readable messages to an alternative approach.

I have globally replaced the orte_output/ORTE_OUTPUT calls in the code base, as well as the corresponding .h file name. I have test compiled and run this on the various environments within my reach, so hopefully this will prove minimally disruptive.

This commit was SVN r18619.
2008-06-09 14:53:58 +00:00

339 строки
11 KiB
C

/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/types.h"
#include "orte/constants.h"
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/mca/iof/base/iof_base_header.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "orte/mca/iof/base/iof_base_fragment.h"
#include "iof_svc.h"
#include "iof_svc_proxy.h"
#include "iof_svc_pub.h"
#include "iof_svc_sub.h"
static void orte_iof_svc_proxy_msg(const orte_process_name_t*, orte_iof_base_msg_header_t*, unsigned char*);
static void orte_iof_svc_proxy_ack(const orte_process_name_t*, orte_iof_base_msg_header_t*, bool do_close);
static void orte_iof_svc_proxy_pub(const orte_process_name_t*, orte_iof_base_pub_header_t*);
static void orte_iof_svc_proxy_unpub(const orte_process_name_t*, orte_iof_base_pub_header_t*);
static void orte_iof_svc_proxy_sub(const orte_process_name_t*, orte_iof_base_sub_header_t*);
static void orte_iof_svc_proxy_unsub(const orte_process_name_t*, orte_iof_base_sub_header_t*);
/**
* Callback function from RML on receipt of IOF request.
*
* @param status (IN) Completion status.
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param cbdata (IN) User data.
*/
void orte_iof_svc_proxy_recv(
int status,
orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
orte_iof_base_header_t* hdr = (orte_iof_base_header_t*)iov[0].iov_base;
if(status < 0) {
ORTE_ERROR_LOG(status);
goto done;
}
switch(hdr->hdr_common.hdr_type) {
case ORTE_IOF_BASE_HDR_MSG:
ORTE_IOF_BASE_HDR_MSG_NTOH(hdr->hdr_msg);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_recv: HDR_MSG\n");
orte_iof_svc_proxy_msg(peer, &hdr->hdr_msg,
((unsigned char*)iov[0].iov_base)+sizeof(orte_iof_base_header_t));
break;
case ORTE_IOF_BASE_HDR_ACK:
ORTE_IOF_BASE_HDR_MSG_NTOH(hdr->hdr_msg);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_recv: HDR_ACK\n");
orte_iof_svc_proxy_ack(peer, &hdr->hdr_msg, false);
break;
case ORTE_IOF_BASE_HDR_CLOSE:
ORTE_IOF_BASE_HDR_MSG_NTOH(hdr->hdr_msg);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_recv: HDR_CLOSE\n");
orte_iof_svc_proxy_ack(peer, &hdr->hdr_msg, true);
break;
case ORTE_IOF_BASE_HDR_PUB:
ORTE_IOF_BASE_HDR_PUB_NTOH(hdr->hdr_pub);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_recv: HDR_PUB\n");
orte_iof_svc_proxy_pub(peer, &hdr->hdr_pub);
break;
case ORTE_IOF_BASE_HDR_UNPUB:
ORTE_IOF_BASE_HDR_PUB_NTOH(hdr->hdr_pub);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_recv: HDR_UNPUB\n");
orte_iof_svc_proxy_unpub(peer, &hdr->hdr_pub);
break;
case ORTE_IOF_BASE_HDR_SUB:
ORTE_IOF_BASE_HDR_SUB_NTOH(hdr->hdr_sub);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_recv: HDR_SUB\n");
orte_iof_svc_proxy_sub(peer, &hdr->hdr_sub);
break;
case ORTE_IOF_BASE_HDR_UNSUB:
ORTE_IOF_BASE_HDR_SUB_NTOH(hdr->hdr_sub);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_recv: HDR_UNSUB\n");
orte_iof_svc_proxy_unsub(peer, &hdr->hdr_sub);
break;
default:
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_recv: invalid message type: %d (ignored)\n",
hdr->hdr_common.hdr_type);
break;
}
done:
free(hdr);
mca_iof_svc_component.svc_iov[0].iov_base = NULL;
mca_iof_svc_component.svc_iov[0].iov_len = 0;
}
/**
* Release resources when ack completed.
*/
static void ack_send_cb(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)cbdata;
ORTE_IOF_BASE_FRAG_RETURN(frag);
if(status < 0) {
ORTE_ERROR_LOG(status);
}
}
/**
* Receive a data message. Check the subscription list for a match
* on the source - and on matches forward to any published endpoints
* that match the subscriptions destination.
*/
static void orte_iof_svc_proxy_msg(
const orte_process_name_t* peer,
orte_iof_base_msg_header_t* hdr,
unsigned char* data)
{
opal_list_item_t* item;
bool forwarded_at_all = false, forward = false;
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_svc_proxy_msg: tag %d seq %d",
hdr->msg_tag,hdr->msg_seq);
/* dispatch based on subscription list */
OPAL_THREAD_LOCK(&mca_iof_svc_component.svc_lock);
for(item = opal_list_get_first(&mca_iof_svc_component.svc_subscribed);
item != opal_list_get_end(&mca_iof_svc_component.svc_subscribed);
item = opal_list_get_next(item)) {
orte_iof_svc_sub_t* sub = (orte_iof_svc_sub_t*)item;
/* if the tags don't match, skip this subscription */
if(sub->origin_tag != hdr->msg_tag && hdr->msg_tag != ORTE_IOF_ANY) {
continue;
}
/* if the subscription origin doesn't match the message's
origin, skip this subscription */
if (OPAL_EQUAL == orte_util_compare_name_fields(sub->origin_mask,&sub->origin_name,&hdr->msg_origin)) {
opal_output_verbose(1, orte_iof_base.iof_output, "sub MATCH: origin %s, msg origin %s, msg proxy %s orte_iof_svc_proxy_msg: tag %d sequence %d, len %d",
ORTE_NAME_PRINT(&sub->origin_name),
ORTE_NAME_PRINT(&hdr->msg_origin),
ORTE_NAME_PRINT(&hdr->msg_proxy),
hdr->msg_tag, hdr->msg_seq, hdr->msg_len);
/* Everthing matched -- forward the message */
OPAL_THREAD_UNLOCK(&mca_iof_svc_component.svc_lock);
orte_iof_svc_sub_forward(sub,peer,hdr,data,&forward);
OPAL_THREAD_LOCK(&mca_iof_svc_component.svc_lock);
if (forward) {
forwarded_at_all = true;
}
}
}
OPAL_THREAD_UNLOCK(&mca_iof_svc_component.svc_lock);
/* If there was no one to forward to, then we effectively drop it.
But ACK it so that the sender doesn't block. */
if (!forwarded_at_all) {
orte_iof_base_frag_t* frag;
int rc;
opal_output_verbose(1, orte_iof_base.iof_output, "no sub match found -- dropped");
ORTE_IOF_BASE_FRAG_ALLOC(frag,rc);
if(NULL == frag) {
ORTE_ERROR_LOG(rc);
return;
}
frag->frag_hdr.hdr_msg = *hdr;
frag->frag_hdr.hdr_common.hdr_type = ORTE_IOF_BASE_HDR_ACK;
frag->frag_iov[0].iov_base = (IOVBASE_TYPE*)&frag->frag_hdr;
frag->frag_iov[0].iov_len = sizeof(frag->frag_hdr);
ORTE_IOF_BASE_HDR_MSG_HTON(frag->frag_hdr.hdr_msg);
rc = orte_rml.send_nb(
&hdr->msg_proxy,
frag->frag_iov,
1,
ORTE_RML_TAG_IOF_SVC,
0,
ack_send_cb,
frag);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
}
}
}
static void orte_iof_svc_proxy_ack(
const orte_process_name_t* peer,
orte_iof_base_msg_header_t* hdr,
bool do_close)
{
orte_iof_svc_sub_ack(peer, hdr, do_close);
}
/**
* A remote process has announced that it has an endpoint that it is
* making available. Create an entry for it, and also check to see if
* the new publication matches any pending subscriptions.
*/
static void orte_iof_svc_proxy_pub(
const orte_process_name_t* peer,
orte_iof_base_pub_header_t* hdr)
{
int rc;
opal_output_verbose(1, orte_iof_base.iof_output, "orte_iof_svc_proxy_pub: mask %d, tag %d, proc %s, proxy %s",
hdr->pub_mask, hdr->pub_tag,
ORTE_NAME_PRINT(&hdr->pub_name),
ORTE_NAME_PRINT(&hdr->pub_proxy));
rc = orte_iof_svc_pub_create(
&hdr->pub_name,
&hdr->pub_proxy,
hdr->pub_mask,
hdr->pub_tag);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
/**
* Opposite of publish -- remove any previous publications and unmap
* it from any subscriptions that were using it.
*/
static void orte_iof_svc_proxy_unpub(
const orte_process_name_t* peer,
orte_iof_base_pub_header_t* hdr)
{
int rc;
opal_output_verbose(1, orte_iof_base.iof_output, "orte_iof_svc_proxy_unpub: mask %d, tag %d, proc %s, proxy %s",
hdr->pub_mask, hdr->pub_tag,
ORTE_NAME_PRINT(&hdr->pub_name),
ORTE_NAME_PRINT(&hdr->pub_proxy));
rc = orte_iof_svc_pub_delete(
&hdr->pub_name,
&hdr->pub_proxy,
hdr->pub_mask,
hdr->pub_tag);
if (ORTE_SUCCESS != rc && ORTE_ERR_NOT_FOUND != rc) {
ORTE_ERROR_LOG(rc);
}
}
/**
* Create a subscription entry. A subscription entry determines the
* set of origin(s) that will forward to any matching published
* endpoint targets.
*/
static void orte_iof_svc_proxy_sub(
const orte_process_name_t* peer,
orte_iof_base_sub_header_t* hdr)
{
int rc;
opal_output_verbose(1, orte_iof_base.iof_output, "orte_iof_svc_proxy_sub");
rc = orte_iof_svc_sub_create(
&hdr->origin_name,
hdr->origin_mask,
hdr->origin_tag,
&hdr->target_name,
hdr->target_mask,
hdr->target_tag);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
/**
* Remove a subscription.
*/
static void orte_iof_svc_proxy_unsub(
const orte_process_name_t* peer,
orte_iof_base_sub_header_t* hdr)
{
int rc;
opal_output_verbose(1, orte_iof_base.iof_output, "orte_iof_svc_proxy_unsub");
rc = orte_iof_svc_sub_delete(
&hdr->origin_name,
hdr->origin_mask,
hdr->origin_tag,
&hdr->target_name,
hdr->target_mask,
hdr->target_tag);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}