1
1

Merge branch 'rhc54-rml-multiplugin-support'

Этот коммит содержится в:
Anandhi S Jayakumar 2016-03-10 14:39:08 -08:00
родитель 7b73c868d5 a4c8e8c28a
Коммит 3ffb203aaf
42 изменённых файлов: 674 добавлений и 4207 удалений

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -50,7 +50,6 @@
#include "orte/mca/routed/base/base.h"
#include "orte/mca/routed/routed.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/dfs/base/base.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/grpcomm/base/base.h"
@ -394,17 +393,6 @@ int orte_ess_base_orted_setup(char **hosts)
/* add our contact info */
proc->rml_uri = orte_rml.get_contact_info();
/* Messaging QoS Layer */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_qos_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_qos_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_qos_base_select";
goto error;
}
/* select the errmgr */
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
*
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
@ -43,7 +43,6 @@
#include "orte/mca/oob/base/base.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/routed/base/base.h"
#include "orte/mca/errmgr/base/base.h"
#include "orte/mca/iof/base/base.h"
@ -120,17 +119,6 @@ int orte_ess_base_tool_setup(void)
error = "orte_rml_base_select";
goto error;
}
/* Messaging QoS Layer */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_qos_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_qos_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_qos_base_select";
goto error;
}
/* Routed system */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -55,7 +55,6 @@
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/routed/base/base.h"
#include "orte/mca/routed/routed.h"
@ -342,16 +341,6 @@ static int rte_init(void)
goto error;
}
/* Messaging QoS Layer */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) {
error = "orte_qos_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_qos_base_select())) {
error = "orte_qos_base_select";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
error = "orte_errmgr_base_select";
goto error;

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -45,7 +45,7 @@
#include "orte/mca/mca.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/qos/base/base.h"
BEGIN_C_DECLS
typedef int (*mca_oob_base_component_avail_fn_t)(void);

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -398,10 +398,10 @@ static void process_send(int fd, short args, void *cbdata)
orte_process_name_t hop;
opal_output_verbose(2, orte_oob_base_framework.framework_output,
"%s:[%s:%d] processing send to peer %s:%d to channel =%d seq_num = %d",
"%s:[%s:%d] processing send to peer %s:%d seq_num = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
__FILE__, __LINE__,
ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag, op->msg->dst_channel, op->msg->seq_num);
ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag, op->msg->seq_num);
/* do we have a route to this peer (could be direct)? */
hop = orte_routed.get_route(&op->msg->dst);

Просмотреть файл

@ -14,7 +14,7 @@
* reserved.
* Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -704,9 +704,9 @@ static void component_shutdown(void)
static int component_send(orte_rml_send_t *msg)
{
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:tcp:send_nb to peer %s:%d to channel=%d seq = %d",
"%s oob:tcp:send_nb to peer %s:%d seq = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&msg->dst), msg->tag,msg->dst_channel, msg->seq_num );
ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num );
/* the module is potentially running on its own event
* base, so all it can do is push our send request
@ -1093,7 +1093,6 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata)
snd->dst = mop->snd->hdr.dst;
snd->origin = mop->snd->hdr.origin;
snd->tag = mop->snd->hdr.tag;
snd->dst_channel = mop->snd->hdr.channel;
snd->seq_num = mop->snd->hdr.seq_num;
snd->data = mop->snd->data;
snd->count = mop->snd->hdr.nbytes;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 -2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -56,8 +56,6 @@ typedef struct {
mca_oob_tcp_msg_type_t type;
/* the rml tag where this message is headed */
orte_rml_tag_t tag;
/* the rml channel where this message is headed */
orte_rml_channel_num_t channel;
/* the seq number of this message */
uint32_t seq_num;
/* number of bytes in message */

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -196,12 +196,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata)
ORTE_NAME_PRINT(&(peer->name)));
opal_event_del(&peer->send_event);
msg->msg->status = rc;
if( NULL == msg->msg->channel) {
ORTE_RML_SEND_COMPLETE(msg->msg);
}
else {
ORTE_QOS_SEND_COMPLETE(msg->msg);
}
OBJ_RELEASE(msg);
peer->send_msg = NULL;
goto next;
@ -228,12 +223,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata)
ORTE_NAME_PRINT(&(peer->name)),
(int)ntohl(msg->hdr.nbytes), peer->sd);
msg->msg->status = ORTE_SUCCESS;
if( NULL == msg->msg->channel) {
ORTE_RML_SEND_COMPLETE(msg->msg);
}
else {
ORTE_QOS_SEND_COMPLETE(msg->msg);
}
OBJ_RELEASE(msg);
peer->send_msg = NULL;
} else if (NULL != msg->msg->data) {
@ -268,12 +258,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata)
ORTE_NAME_PRINT(&(peer->name)),
(int)ntohl(msg->hdr.nbytes), peer->sd);
msg->msg->status = ORTE_SUCCESS;
if( NULL == msg->msg->channel) {
ORTE_RML_SEND_COMPLETE(msg->msg);
}
else {
ORTE_QOS_SEND_COMPLETE(msg->msg);
}
OBJ_RELEASE(msg);
peer->send_msg = NULL;
}
@ -290,12 +275,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata)
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
opal_event_del(&peer->send_event);
msg->msg->status = rc;
if( NULL == msg->msg->channel) {
ORTE_RML_SEND_COMPLETE(msg->msg);
}
else {
ORTE_QOS_SEND_COMPLETE(msg->msg);
}
OBJ_RELEASE(msg);
peer->send_msg = NULL;
ORTE_FORCED_TERMINATE(1);
@ -570,12 +550,13 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
/* yes - post it to the RML for delivery */
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
"%s DELIVERING TO RML tag = %d channel = %d seq_num = %d",
"%s DELIVERING TO RML tag = %d seq_num = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
peer->recv_msg->hdr.tag, peer->recv_msg->hdr.channel,
peer->recv_msg->hdr.tag,
peer->recv_msg->hdr.seq_num);
ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num,
ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin,
peer->recv_msg->hdr.tag,
peer->recv_msg->hdr.seq_num,
peer->recv_msg->data,
peer->recv_msg->hdr.nbytes);
OBJ_RELEASE(peer->recv_msg);
@ -591,7 +572,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
snd->origin = peer->recv_msg->hdr.origin;
snd->tag = peer->recv_msg->hdr.tag;
snd->data = peer->recv_msg->data;
snd->dst_channel = peer->recv_msg->hdr.channel;
snd->seq_num = peer->recv_msg->hdr.seq_num;
snd->count = peer->recv_msg->hdr.nbytes;
snd->cbfunc.iov = NULL;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -107,18 +107,16 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t);
mca_oob_tcp_send_t *msg; \
int i; \
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
"%s:[%s:%d] queue send to %s channel =%d", \
"%s:[%s:%d] queue send to %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, \
ORTE_NAME_PRINT(&((m)->dst)), \
(m)->dst_channel); \
ORTE_NAME_PRINT(&((m)->dst))); \
msg = OBJ_NEW(mca_oob_tcp_send_t); \
/* setup the header */ \
msg->hdr.origin = (m)->origin; \
msg->hdr.dst = (m)->dst; \
msg->hdr.type = MCA_OOB_TCP_USER; \
msg->hdr.tag = (m)->tag; \
msg->hdr.channel = (m)->dst_channel; \
msg->hdr.seq_num = (m)->seq_num; \
/* point to the actual message */ \
msg->msg = (m); \
@ -163,7 +161,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t);
msg->hdr.dst = (m)->dst; \
msg->hdr.type = MCA_OOB_TCP_USER; \
msg->hdr.tag = (m)->tag; \
msg->hdr.channel = (m)->dst_channel; \
msg->hdr.seq_num = (m)->seq_num; \
/* point to the actual message */ \
msg->msg = (m); \

Просмотреть файл

@ -1,31 +0,0 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# main library setup
noinst_LTLIBRARIES = libmca_qos.la
libmca_qos_la_SOURCES =
# pkgdata setup
dist_ortedata_DATA =
# local files
headers = qos.h
libmca_qos_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
ortedir = $(orteincludedir)/$(subdir)
nobase_orte_HEADERS = $(headers)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

Просмотреть файл

@ -1,34 +0,0 @@
#
# Copyright (c) 2015 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
qos_ack.h \
qos_ack_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_qos_ack_DSO
component_noinst =
component_install = mca_qos_ack.la
else
component_noinst = libmca_qos_ack.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_qos_ack_la_SOURCES = $(sources)
mca_qos_ack_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_qos_ack_la_SOURCES = $(sources)
libmca_qos_ack_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,101 +0,0 @@
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* QoS Ack Component interface
*
*
*
*/
#ifndef MCA_QOS_ACK_H
#define MCA_QOS_ACK_H
#include "orte_config.h"
#include "orte/mca/qos/qos.h"
#include "orte/mca/qos/base/base.h"
#include "opal/class/opal_hotel.h"
BEGIN_C_DECLS
#define QOS_ACK_SEQ_NUM_UNINITIALIZED 0
#define QOS_ACK_MAX_WINDOW 100
#define QOS_ACK_MAX_OUTSTANDING_MSGS (QOS_ACK_MAX_WINDOW *2)
/* window timeout in secs - 100 seconds ok?
TO DO: make this a QOS attribute that can be specified by the user */
#define QOS_ACK_WINDOW_TIMEOUT_IN_SECS 1
#define ACK_WINDOW_COMPLETE 0
#define ACK_TIMEOUT 1
#define ACK_OUT_OF_ORDER 2
#define ACK_RECV_MISSED_MSG 3 /* received previously missed msgs*/
typedef enum {
orte_qos_ack_channel_state_inactive = 0,
orte_qos_ack_channel_state_filling_window = 1,
orte_qos_ack_channel_state_window_completed = 2,
orte_qos_ack_channel_state_awaiting_ack = 3,
orte_qos_ack_channel_state_received_ack = 4,
}orte_qos_ack_channel_state_t ;
/* Ack Qos channel data structure */
typedef struct orte_qos_ack_channel {
opal_list_item_t super;
uint32_t channel_num;
// we retain the attributes so we can compare channels - we can get rid of this and compare incoming attributes
// with attributes of interest to this channel type
opal_list_t attributes;
/* size of the message window */
uint32_t window;
/* window timeout in secs.*/
uint32_t timeout_secs;
/* retry msg window on ack fail */
bool retry;
/* seq number of the first msg in the active window */
uint32_t window_first_seq_num;
/* sequence number of last outgoing msg */
uint32_t out_msg_seq_num;
/* sequence number of last incoming msg */
uint32_t in_msg_seq_num;
/* sequence number of the last message acked */
uint32_t ack_msg_seq_num;
/* ACK outstanding msgs hotel */
opal_hotel_t outstanding_msgs;
/* array for mapping msg seq num to room num for outgoing msgs in hotels */
int seq_num_to_room_num[QOS_ACK_MAX_OUTSTANDING_MSGS];
/* channel state */
orte_qos_ack_channel_state_t state;
/* window timer event */
opal_event_t msg_ack_timer_event;
}orte_qos_ack_channel_t;
OBJ_CLASS_DECLARATION(orte_qos_ack_channel_t);
extern orte_qos_module_t orte_qos_ack_module;
static inline int orte_qos_ack_channel_get_msg_room (orte_qos_ack_channel_t * ack_chan,
uint32_t seq_num)
{
return ack_chan->seq_num_to_room_num[(seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)];
}
static inline void orte_qos_ack_channel_set_msg_room (orte_qos_ack_channel_t * ack_chan,
uint32_t seq_num, int room_num)
{
ack_chan->seq_num_to_room_num[(seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)] = room_num;
}
ORTE_DECLSPEC void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel,
int room_num, void *occupant);
ORTE_DECLSPEC void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata);
ORTE_DECLSPEC void orte_qos_ack_recv_msg_timeout_callback (struct opal_hotel_t *hotel,
int room_num, void *occupant);
END_C_DECLS
#endif /* MCA_QOS_ACK_H */

Просмотреть файл

@ -1,718 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/qos/qos.h"
#include "qos_ack.h"
/* ack module functions */
static int qos_ack_start (void);
static void qos_ack_shutdown (void);
static void* ack_create (opal_list_t *qos_attributes, uint32_t channel_num);
static int ack_open (void *qos_channel,
opal_buffer_t * buf);
static int ack_send ( void *qos_channel, orte_rml_send_t *msg);
static int ack_recv (void *channel, orte_rml_recv_t *msg);
static int ack_close (void * channel);
static int ack_init_recv (void *channel, opal_list_t *attributes);
static int ack_cmp (void *channel, opal_list_t *attributes);
static void ack_send_callback (orte_rml_send_t *msg);
/* utility functions */
static inline int send_ack (orte_qos_ack_channel_t * channel,
orte_rml_channel_num_t channel_num,
uint32_t ack_type,
uint32_t last_msg_seq_num);
void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata);
void orte_qos_ack_msg_send_callback ( int status,
orte_process_name_t *peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata);
static inline int process_out_of_order_msg ( orte_qos_ack_channel_t *channel,
orte_rml_recv_t *msg);
/**
* ack module definition
*/
orte_qos_module_t orte_qos_ack_module = {
ack_create,
ack_open,
ack_send,
ack_recv,
ack_close,
ack_init_recv,
ack_cmp,
ack_send_callback
};
/**
* component definition
*/
mca_qos_base_component_t mca_qos_ack_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
MCA_QOS_BASE_VERSION_2_0_0,
"ack", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
NULL,
NULL,
},
qos_ack_start,
qos_ack_shutdown,
orte_qos_ack,
{
ack_create,
ack_open,
ack_send,
ack_recv,
ack_close,
ack_init_recv,
ack_cmp,
ack_send_callback
}
};
static int qos_ack_start(void) {
orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_MSG_ACK,
ORTE_RML_PERSISTENT, orte_qos_ack_channel_process_ack,
NULL);
/* post a persistent recieve for ACK TAG */
return ORTE_SUCCESS;
}
static void qos_ack_shutdown (void) {
}
static void* ack_create (opal_list_t *qos_attributes, uint32_t channel_num) {
orte_qos_ack_channel_t * ack_chan;
int32_t rc;
uint32_t *type, type_val, *attribute, attribute_val;
type_val = orte_qos_ack;
ack_chan = OBJ_NEW (orte_qos_ack_channel_t);
ack_chan->channel_num = channel_num;
type = &type_val;
attribute = &attribute_val;
/* validate and store ack specific channel attributes */
/* set channel type */
if (ORTE_SUCCESS == (rc = orte_set_attribute( &ack_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) {
if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&attribute, OPAL_UINT32)) {
if ( QOS_ACK_MAX_WINDOW < (*attribute)) {
ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS);
OBJ_RELEASE(ack_chan);
}
else {
ack_chan->window = *attribute;
if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_WINDOW_SIZE,
ORTE_ATTR_GLOBAL, (void*)attribute, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(ack_chan);
} else {
if( orte_get_attribute (qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, (void**)&attribute, OPAL_UINT32)) {
ack_chan->timeout_secs = *attribute;
if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_ACK_NACK_TIMEOUT,
ORTE_ATTR_GLOBAL, (void*)attribute, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(ack_chan);
} else {
if( orte_get_attribute (qos_attributes, ORTE_QOS_MSG_RETRY, NULL, OPAL_BOOL)) {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_create created channel = %p window = %d timeout =%d retry = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)ack_chan,
ack_chan->window,
ack_chan->timeout_secs,
ack_chan->retry));
ack_chan->retry = true;
if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_MSG_RETRY,
ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(ack_chan);
}
} else {
ack_chan->retry = false;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_create created channel = %p window = %d timeout =%d retry = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)ack_chan,
ack_chan->window,
ack_chan->timeout_secs,
ack_chan->retry));
}
}
}else {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(ack_chan);
}
}
}
}else
OBJ_RELEASE(ack_chan);
}else {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(ack_chan);
}
return ack_chan;
}
static int ack_open (void *qos_channel, opal_buffer_t * buf) {
int32_t rc = ORTE_SUCCESS;
uint32_t eviction_timeout;
orte_qos_ack_channel_t *ack_chan;
ack_chan = (orte_qos_ack_channel_t*) (qos_channel);
/* TO DO - need to adjust eviction timeout according to window size
lets keep max time out for the first pass */
eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000;
/* init outstanding msg hotel */
opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS,
orte_event_base, eviction_timeout, 0,
orte_qos_ack_msg_ack_timeout_callback);
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_open channel = %p init hotel timeout =%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)ack_chan, eviction_timeout));
/* set the message window timer event, but don't activate it */
/*opal_event_set(opal_event_base,
&ack_chan->msg_window_timer_event,
-1, 0, orte_qos_ack_msg_window_timeout_callback,
ack_chan);
opal_event_set_priority(&ack_chan->msg_window_timer_event, ORTE_MSG_PRI);*/
/* the Qos module puts the non local attributes to be sent to the peer in a list at the time of create.
pack those attributes into the buffer.*/
if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buf, &ack_chan->attributes)))
ORTE_ERROR_LOG(rc);
return rc;
}
static int ack_send ( void *qos_channel, orte_rml_send_t *msg) {
int32_t room_num;
orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) (qos_channel);
if (ack_chan->out_msg_seq_num == ack_chan->window_first_seq_num -1 ) {
/* begining msg window */
ack_chan->out_msg_seq_num = ack_chan->window_first_seq_num;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_send msg = %p to peer = %s\n begining window at seq_num = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, ORTE_NAME_PRINT(&msg->dst), ack_chan->out_msg_seq_num));
ack_chan->state = orte_qos_ack_channel_state_filling_window;
}
else
ack_chan->out_msg_seq_num++;
if(ack_chan->out_msg_seq_num - ack_chan->window_first_seq_num == ack_chan->window - 1) {
/* we are at the end of the window. */
/* update state */
ack_chan->state = orte_qos_ack_channel_state_window_completed;
/* set begin window for next sequence */
ack_chan->window_first_seq_num = ack_chan->out_msg_seq_num + 1;
}
msg->seq_num = ack_chan->out_msg_seq_num;
/* check msg into hotel */
if( OPAL_SUCCESS == (opal_hotel_checkin(&ack_chan->outstanding_msgs, msg, &room_num ))) {
/* store room number */
orte_qos_ack_channel_set_msg_room(ack_chan, msg->seq_num, room_num);
} else {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_send msg = %p to peer = %s returned with error %d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, ORTE_NAME_PRINT(&msg->dst),
ORTE_ERR_QOS_ACK_WINDOW_FULL));
return ORTE_ERR_QOS_ACK_WINDOW_FULL;
}
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_send msg = %p to peer = %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, ORTE_NAME_PRINT(&msg->dst)));
return ORTE_SUCCESS;
}
static inline int send_ack (orte_qos_ack_channel_t * ack_chan,
orte_rml_channel_num_t channel_num,
uint32_t ack_type, uint32_t last_msg_seq_num)
{
int rc;
orte_rml_channel_t *rml_channel;
opal_buffer_t *buffer;
uint32_t num_msgs_to_ack = 0;
uint32_t *ack_seq_num_array;
uint32_t i;
rml_channel = orte_rml_base_get_channel (channel_num);
num_msgs_to_ack = ack_chan->in_msg_seq_num - ack_chan->ack_msg_seq_num + 1;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s sending ack type = %d \n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ack_type));
if ( NULL != (ack_seq_num_array = malloc (sizeof(uint32_t) * num_msgs_to_ack))) {
for (i = 1; i <= num_msgs_to_ack ; i++) {
ack_seq_num_array[i-1] = ack_chan->ack_msg_seq_num + i;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_recv acking msg %d to peer = %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ack_seq_num_array[i-1],
ORTE_NAME_PRINT(&rml_channel->peer)));
}
ack_seq_num_array[num_msgs_to_ack - 1] = last_msg_seq_num;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_recv acking last msg %d to peer = %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ack_seq_num_array[num_msgs_to_ack - 1],
ORTE_NAME_PRINT(&rml_channel->peer)));
}
else {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_recv cannot allocate ack array to send ack to peer = %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&rml_channel->peer)));
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
return rc;
}
buffer = OBJ_NEW (opal_buffer_t);
/* pack channel number */
opal_dss.pack (buffer, &rml_channel->peer_channel, 1, OPAL_UINT32);
/* pack ack type */
opal_dss.pack (buffer, &ack_type, 1, OPAL_UINT32);
/* pack num messages */
opal_dss.pack (buffer, &num_msgs_to_ack, 1, OPAL_UINT32);
/* pack seq number array */
for (i =0; i<num_msgs_to_ack; i++) {
opal_dss.pack (buffer, &ack_seq_num_array[i], 1 , OPAL_UINT32);
}
rc = orte_rml.send_buffer_nb (&rml_channel->peer, buffer, ORTE_RML_TAG_MSG_ACK,
orte_qos_ack_msg_send_callback, rml_channel);
if(ORTE_SUCCESS == rc) {
/* update last acked msg */
ack_chan->ack_msg_seq_num = last_msg_seq_num;
} else {
//TO DO
}
return rc;
}
static inline int process_out_of_order_msg ( orte_qos_ack_channel_t *ack_chan,
orte_rml_recv_t *msg)
{
int32_t rc, room_num, first_lost_msg_seq_num, num_lost_msgs, i;
orte_rml_recv_t *out_msg;
void *occupant = NULL;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s process_out_of_order_msg msg %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
msg->seq_num));
/* if this msg is a duplicate - then do nothing */
if ((orte_qos_ack_channel_get_msg_room(ack_chan, msg->seq_num)) != -1) {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s process_out_of_order_msg msg %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
msg->seq_num));
rc = ORTE_ERR_DUPLICATE_MSG;
}
else {
if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)msg, &room_num))) {
return rc;
}
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"process_out_of_order_msg checked in msg %d in room %d\n",
msg->seq_num, room_num));
orte_qos_ack_channel_set_msg_room (ack_chan, msg->seq_num, room_num);
rc = ORTE_ERR_OUT_OF_ORDER_MSG;
/* check if we need to send an ACK */
if (ack_chan->ack_msg_seq_num <= ack_chan->in_msg_seq_num) {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s process_out_of_order_msg sending ack last seq_num = %d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
msg->seq_num));
/* send ACK. */
send_ack (ack_chan, msg->channel_num, ACK_OUT_OF_ORDER, msg->seq_num);
/* stop window ack timer */
opal_event_evtimer_del (&ack_chan->msg_ack_timer_event);
}
else {
/* if we got a lost msg - any seq num between in_msg_seq_num and ack_seq_num*/
if (ack_chan->ack_msg_seq_num > msg->seq_num) {
/* check if we have got all lost msgs */
first_lost_msg_seq_num = ack_chan->in_msg_seq_num + 1;
num_lost_msgs = ack_chan->ack_msg_seq_num - ack_chan->in_msg_seq_num;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s process_out_of_order_msg msg %d first_lost_msg =%d num_lost_msgs =%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
msg->seq_num, first_lost_msg_seq_num, num_lost_msgs));
for (i =0 ; i < num_lost_msgs; i++) {
if ((orte_qos_ack_channel_get_msg_room(ack_chan, first_lost_msg_seq_num +i)) == -1)
break;
}
if (i == num_lost_msgs) {
/* we got all the lost msgs so we can complete all the msgs in the hotel now */
/* reset ack_seq_num */
ack_chan->ack_msg_seq_num = first_lost_msg_seq_num -1;
room_num = 0;
for ( i = 0; room_num != -1; i++) {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s process_out_of_order_msg got all lost msgs completing outstanding msgs %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(first_lost_msg_seq_num + i)));
/* evict msg and complete it */
room_num = orte_qos_ack_channel_get_msg_room (ack_chan, first_lost_msg_seq_num +i);
opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant);
orte_qos_ack_channel_set_msg_room(ack_chan, first_lost_msg_seq_num +i, -1);
out_msg = (orte_rml_recv_t *) occupant;
if ((NULL != out_msg) && (room_num != -1)) {
// set in seq num */
ack_chan->in_msg_seq_num = out_msg->seq_num;
orte_rml_base_complete_recv_msg(&out_msg);
/* completing recv msg to rml */
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"process_out_of_order_msg completed recv msg %d",
(first_lost_msg_seq_num + i)));
} else {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s process_out_of_order_msg lost msg %d not in hotel",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(first_lost_msg_seq_num + i)));
}
} //end for
/* send ACK */
send_ack (ack_chan, ack_chan->channel_num, ACK_RECV_MISSED_MSG,
ack_chan->in_msg_seq_num);
} //end if (i== num_lost_msgs)
} // if (ack_chan->ack_msg_seq_num > msg->seq_num)
} //end else
} // end duplicate else
return rc;
}
static int ack_recv (void *qos_channel, orte_rml_recv_t *msg) {
orte_qos_ack_channel_t *ack_chan;
ack_chan = (orte_qos_ack_channel_t*) (qos_channel);
int32_t rc;
struct timeval ack_timeout;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_recv msg = %p seq_num = %d from peer = %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, msg->seq_num,
ORTE_NAME_PRINT(&msg->sender)));
/** HACK - drop every third msg to stimulate lost msg */
/* if ((msg->seq_num == 3) && (hack == 0)) {
OBJ_RELEASE(msg);
hack = 1;
return ORTE_ERROR;
}*/
/* check if this is the next expected msg*/
if((ack_chan->in_msg_seq_num + 1 == msg->seq_num) && (ack_chan->ack_msg_seq_num < msg->seq_num))
{
/* check if we are at the end of the window */
if(ack_chan->window == (msg->seq_num - ack_chan->ack_msg_seq_num)) {
/* stop window ack timer */
opal_event_evtimer_del (&ack_chan->msg_ack_timer_event);
rc = send_ack (ack_chan, msg->channel_num, ACK_WINDOW_COMPLETE, msg->seq_num);
} else {
if(ack_chan->in_msg_seq_num == ack_chan->ack_msg_seq_num) {
/* begining window -start window ack timer */
ack_timeout.tv_sec = ack_chan->timeout_secs;
ack_timeout.tv_usec = 0;
opal_event_evtimer_add (&ack_chan->msg_ack_timer_event, &ack_timeout);
}
rc = ORTE_SUCCESS;
}
ack_chan->in_msg_seq_num = msg->seq_num;
}
else {
rc = process_out_of_order_msg(ack_chan, msg);
}
return rc;
}
static int ack_close (void * channel) {
int32_t rc = ORTE_SUCCESS;
orte_qos_ack_channel_t *ack_chan;
ack_chan = (orte_qos_ack_channel_t*) (channel);
/* check if channel is busy (no outstanding msgs */
if (opal_hotel_is_empty (&ack_chan->outstanding_msgs)) {
/* no outstanding msgs, release channel */
OBJ_RELEASE(ack_chan);
rc = ORTE_SUCCESS;
} else
rc = ORTE_ERR_CHANNEL_BUSY;
return rc;
}
static int ack_init_recv (void *channel, opal_list_t *attributes) {
int32_t rc = ORTE_SUCCESS;
uint32_t eviction_timeout;
orte_qos_ack_channel_t *ack_chan;
ack_chan = (orte_qos_ack_channel_t*) channel;
/* TO DO - need to adjust eviction timeout according to window size
lets keep max time out for the first pass */
eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000;
/* init outstanding msg hotel */
opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS,
orte_event_base, eviction_timeout, 0,
orte_qos_ack_recv_msg_timeout_callback);
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_open channel = %p init hotel timeout =%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)ack_chan, eviction_timeout));
opal_event_evtimer_set (orte_event_base, &ack_chan->msg_ack_timer_event,
orte_qos_ack_msg_window_timeout_callback, (void *) ack_chan);
return rc;
}
static int ack_cmp (void *channel, opal_list_t *attributes) {
return false;
}
static void ack_send_callback (orte_rml_send_t *msg)
{
/* complete the request back to the user only upon receiving the ack
nothing to do here, just make sure that the request is in the hotel */
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_send_callback for msg = %p seq num =%d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, msg->seq_num));
/* if msg->status != SUCCESS - then evict all messages in the window and
complete them?? */
if(ORTE_SUCCESS == msg->status) {
#if OPAL_ENABLE_DEBUG
orte_qos_ack_channel_t *ack_chan;
ack_chan = (orte_qos_ack_channel_t *) msg->channel->qos_channel_ptr;
#endif
// nothing to do
assert((orte_qos_ack_channel_get_msg_room(ack_chan, msg->seq_num)) != -1);
} else {
// TO DO : error handling
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s ack_send_callback for msg = %p seq num =%d SEND FAILED status = %d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, msg->seq_num, msg->status));
/* evict message from hotel and send end of window to receiver?? */
}
}
void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel,
int room_num, void *occupant)
{
orte_rml_send_t *msg;
orte_qos_ack_channel_t *ack_chan;
msg = (orte_rml_send_t *) occupant;
ack_chan = (orte_qos_ack_channel_t*) msg->channel->qos_channel_ptr;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s orte_qos_ack_msg_ack_timeout_callback for msg = %p seq num =%d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, msg->seq_num));
/* for now complete only the msg that timed out
TO DO : handle the completion of all messages in the window */
msg->status = ORTE_ERR_ACK_TIMEOUT_SENDER;
// set room num to -1 for the msg's seq number
orte_qos_ack_channel_set_msg_room (ack_chan, msg->seq_num , -1);
// complete the msg
ORTE_RML_SEND_COMPLETE(msg);
}
void orte_qos_ack_recv_msg_timeout_callback (struct opal_hotel_t *hotel,
int room_num, void *occupant)
{
#if OPAL_ENABLE_DEBUG
orte_rml_recv_t *msg = (orte_rml_recv_t *) occupant;
#endif
#if 0
orte_qos_ack_channel_t *ack_chan;
orte_rml_channel_t *channel;
channel = orte_rml_base_get_channel(msg->channel_num);
ack_chan = (orte_qos_ack_channel_t*) channel->qos_channel_ptr;
#endif
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s OOPS received msg = %p seq num =%d timed out on ACK Queue\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, msg->seq_num));
/* Need to determine correct action here as the sender hasn't responded yet to
a lost msg event */
/* This is highly unlikely - lets assert to enable debug*/
assert(0);
/*
// set room num to -1 for the msg's seq number
ack_chan->seq_num_to_room_num[msg->seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS] = -1;
// complete the msg
ORTE_RML_REACTIVATE_MESSAGE(msg);*/
}
void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata)
{
/* process ack received for the msg */
uint32_t num_msgs_acked, channel_num, i;
int32_t num_values, room_num;
orte_rml_send_t *msg, *missed_msg;
void *occupant = NULL;
orte_rml_channel_t *channel;
orte_qos_ack_channel_t *ack_chan;
uint32_t *seq_num_array;
uint32_t ack_type;
uint32_t missed_msg_seq_num = 0;
num_values = 1;
/* unpack channel number first */
opal_dss.unpack(buffer, (void*) &channel_num, &num_values, OPAL_UINT32);
OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output,
"orte_qos_ack_channel_process_ack recieved ack on channel = %d",
channel_num));
channel = orte_rml_base_get_channel (channel_num);
if ((NULL != channel) || (NULL != channel->qos_channel_ptr)) {
ack_chan = (orte_qos_ack_channel_t *) (channel->qos_channel_ptr);
seq_num_array = malloc (sizeof(uint32_t) * ack_chan->window);
num_values = 1;
/* unpack ack type */
opal_dss.unpack(buffer, (void*) &ack_type, &num_values, OPAL_UINT32);
num_values = 1;
/* unpack num messages acked */
opal_dss.unpack(buffer, (void*) &num_msgs_acked, &num_values, OPAL_UINT32);
OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output,
"orte_qos_ack_channel_process_ack recieved ack type %d for %d msgs on channel = %d",
ack_type, num_msgs_acked, channel_num));
if (ACK_OUT_OF_ORDER != ack_type) {
//handle normal ACK
for (i = 0; i < num_msgs_acked; i++)
{
opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32);
room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]);
opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant);
orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1);
if((occupant != NULL) && (room_num != -1)) {
msg = (orte_rml_send_t*) occupant;
OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output,
"Releasing sent message with tag %d and seq_num %d after receiving Ack from dest ",
msg->tag, msg->seq_num ));
msg->status = ORTE_SUCCESS;
ORTE_RML_SEND_COMPLETE(msg);
} else {
OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output,
"OOPS received an ACK for already completed seq_num =%d ",
seq_num_array[i] ));
}
}
} else {
// handle out of order ACK - complete msgs received in order, retry the lost msg.
for (i = 0; i < num_msgs_acked; i++)
{
opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32);
room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]);
opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant);
orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1);
if ((NULL != occupant) && ((i == 0 )|| (seq_num_array[i] == seq_num_array[i-1] +1 ))) {
msg = (orte_rml_send_t*) occupant;
msg->status = ORTE_SUCCESS;
ORTE_RML_SEND_COMPLETE(msg);
} else {
if (NULL != occupant) {
// num_missed_msgs = (seq_num_array[i] - seq_num_array [i-1] - 1);
assert( i == num_msgs_acked -1);
/* recheck the ith msg */
opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)occupant, &room_num);
orte_qos_ack_channel_set_msg_room (ack_chan, seq_num_array[i], room_num);
/* resend and recheck all the missed msgs*/
missed_msg_seq_num = seq_num_array[i-1] + 1;
for (; missed_msg_seq_num < seq_num_array[i]; missed_msg_seq_num++) {
room_num = orte_qos_ack_channel_get_msg_room (ack_chan, missed_msg_seq_num);
opal_hotel_checkout_and_return_occupant (&ack_chan->outstanding_msgs, room_num, &occupant);
assert ( NULL != occupant);
missed_msg = (orte_rml_send_t*) occupant;
missed_msg->status = ORTE_ERR_LOST_MSG_IN_WINDOW;
opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)missed_msg, &room_num);
orte_qos_ack_channel_set_msg_room (ack_chan, missed_msg_seq_num, room_num);
/* send this out on wire directly */
ORTE_OOB_SEND (missed_msg);
} //end for
} else {
OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output,
"OOPS received an ACK for already completed seq_num =%d ",
seq_num_array[i] ));
}//end if (NULL != occupant)
} //end else
} // end for
}//end out of order ack processing
free(seq_num_array);
}else {
OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output,
"orte_qos_ack_channel_msg_ack_recv_callback recieved ack on non existent channel = %d",
channel_num));
}
}
void orte_qos_ack_msg_send_callback ( int status,
orte_process_name_t *peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
#if OPAL_ENABLE_DEBUG
orte_rml_channel_t *channel = (orte_rml_channel_t*) cbdata;
#endif
OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output,
" orte_qos_ack_msg_send_callback channel num =%d status =%d",
channel->channel_num, status));
}
void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata)
{
// int32_t rc;
orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) cbdata;
OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output,
" orte_qos_ack_msg_window_timeout_callback for channel = %p last acked seq num = %d, last received seq num =%d",
(void*)ack_chan, ack_chan->ack_msg_seq_num, ack_chan->in_msg_seq_num ));
/* send ack message */
send_ack(ack_chan, ack_chan->channel_num, ACK_TIMEOUT, ack_chan->in_msg_seq_num);
}
/*** ACK QOS CLASS INSTANCES ***/
static void channel_cons (orte_qos_ack_channel_t *ptr)
{
int i;
OBJ_CONSTRUCT (&ptr->attributes, opal_list_t);
ptr->out_msg_seq_num = 0;
ptr->window_first_seq_num = 1;
ptr->in_msg_seq_num = 0;
ptr->ack_msg_seq_num = 0;
/* init seq num to room num array to -1 */
for (i =0; i< QOS_ACK_MAX_OUTSTANDING_MSGS; i++)
ptr->seq_num_to_room_num[i] = -1;
OBJ_CONSTRUCT (&ptr->outstanding_msgs, opal_hotel_t);
ptr->state = orte_qos_ack_channel_state_inactive;
}
static void channel_des (orte_qos_ack_channel_t *ptr)
{
// OPAL_LIST_DESTRUCT(&ptr->attributes);
//OBJ_DESTRUCT (&ptr->outstanding_msgs);
}
OBJ_CLASS_INSTANCE (orte_qos_ack_channel_t,
opal_list_item_t,
channel_cons, channel_des);

Просмотреть файл

@ -1,18 +0,0 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_ortedata_DATA += base/help-qos-base.txt
headers += \
base/base.h
libmca_qos_la_SOURCES += \
base/qos_base_frame.c \
base/qos_base_select.c \
base/qos_base_channel_handlers.c

Просмотреть файл

@ -1,75 +0,0 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* QoS Framework maintenence interface
*
*
*
*/
#ifndef MCA_QOS_BASE_H
#define MCA_QOS_BASE_H
#include "orte_config.h"
#include "orte/mca/qos/qos.h"
#include "orte/mca/rml/base/base.h"
#include "opal/class/opal_list.h"
/*
* MCA Framework
*/
ORTE_DECLSPEC extern mca_base_framework_t orte_qos_base_framework;
/* select a component */
ORTE_DECLSPEC int orte_qos_base_select(void);
/* a global struct containing framework-level values */
typedef struct {
opal_list_t open_channels;
opal_pointer_array_t actives;
#if OPAL_ENABLE_TIMING
bool timing;
#endif
} orte_qos_base_t;
ORTE_DECLSPEC extern orte_qos_base_t orte_qos_base;
#define ORTE_QOS_MAX_WINDOW_SIZE 1000
typedef struct orte_qos_base_channel {
opal_list_item_t super;
uint32_t channel_num;
opal_list_t attributes;
} orte_qos_base_channel_t;
OBJ_CLASS_DECLARATION(orte_qos_base_channel_t);
/* common implementations */
ORTE_DECLSPEC void* orte_qos_get_module ( opal_list_t *qos_attributes);
int orte_qos_base_pack_attributes (opal_buffer_t * buffer, opal_list_t * qos_attributes);
#define ORTE_QOS_SEND_COMPLETE(m) \
do { \
orte_qos_module_t *mod; \
opal_output_verbose(5, orte_qos_base_framework.framework_output, \
"%s-%s Send message complete at %s:%d", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
ORTE_NAME_PRINT(&((m)->dst)), \
__FILE__, __LINE__); \
mod = (orte_qos_module_t*) m->channel->qos; \
if (NULL != mod) \
mod->send_callback(m); \
else \
ORTE_RML_SEND_COMPLETE(m); \
} while(0);
END_C_DECLS
#endif /* MCA_QOS_BASE_H */

Просмотреть файл

@ -1,12 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2014 Intel, Inc. All rights reserved
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
#
[no-qos-avail]
No Qos protocols available.

Просмотреть файл

@ -1,163 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
* qos_base_channel_handlers.c - contains base functions handlers for open, send and close channel requests.
*/
/*
* includes
*/
#include "orte_config.h"
#include <string.h>
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/dss/dss.h"
#include "opal/util/output.h"
#include "opal/util/timings.h"
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/util/name_fns.h"
#include "orte/mca/qos/qos.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/rml/base/base.h"
int orte_qos_base_pack_attributes (opal_buffer_t * buffer,
opal_list_t * qos_attributes)
{
int32_t num_attributes;
int32_t rc= ORTE_SUCCESS;
orte_attribute_t *kv;
num_attributes = opal_list_get_size (qos_attributes);
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s orte_qos_base_pack_attributes num_attributes = %d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
num_attributes));
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)(&num_attributes), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG (rc);
return rc;
}
OPAL_LIST_FOREACH(kv, qos_attributes, orte_attribute_t) {
if (ORTE_ATTR_GLOBAL == kv->local) {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s orte_qos_base_pack_attributes attribute key = %d value =%d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
kv->key, kv->data.uint8));
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return rc;
}
void* orte_qos_get_module (opal_list_t *qos_attributes)
{
int32_t * type, type_val =0;
mca_qos_base_component_t *qos_comp;
type = &type_val;
if(!orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8))
return NULL;
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s orte_qos_get_module channel type = %d\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
type_val));
//check if type is valid
if (type_val < 0 || ORTE_QOS_MAX_COMPONENTS <= type_val)
return NULL;
// associate the qos module
qos_comp = (mca_qos_base_component_t *) opal_pointer_array_get_item(&orte_qos_base.actives, type_val);
if (NULL != qos_comp)
{
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s qos_base_get_module returning qos module %p type =%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)&qos_comp->mod, type_val));
return (void*)(&qos_comp->mod);
} else {
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s qos_base_get_module failed to get qos component of type =%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
type_val));
}
return NULL;
}
void * orte_qos_create_channel (void *qos_mod, opal_list_t *qos_attributes, uint32_t channel_num) {
orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod);
if (NULL != qos)
return qos->create(qos_attributes, channel_num);
else
ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM);
return NULL;
}
int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buffer) {
orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod);
if (NULL != qos)
return (qos->open (qos_channel, buffer));
else
ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
int orte_qos_close_channel (void *qos_mod, void *qos_channel) {
orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod);
if ((NULL != qos) && (NULL != qos_channel))
return (qos->close (qos_channel));
else
ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM);
return (ORTE_ERR_BAD_PARAM);
}
void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t * qos_attributes) {
orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod);
if (NULL != qos)
qos->init_recv (qos_channel, qos_attributes);
else
ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM);
}
int orte_qos_cmp_channel (void *qos_mod, void *qos_channel, opal_list_t * qos_attributes) {
orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod);
if (NULL != qos)
return (qos->cmp (qos_channel, qos_attributes));
ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM);
return -1;
}
int orte_qos_send_channel (void *qos_mod, void *qos_channel, orte_rml_send_t *msg) {
orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod);
if (NULL != qos)
return(qos->send (qos_channel, msg));
else
ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM);
return ORTE_ERROR;
}
int orte_qos_recv_channel (void *qos_mod, void *qos_channel, orte_rml_recv_t *msg) {
orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod);
if (NULL != qos)
return(qos->recv(qos_channel, msg));
else {
ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM);
return ORTE_ERROR;
}
}

Просмотреть файл

@ -1,120 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/class/opal_bitmap.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/qos/qos.h"
#if OPAL_ENABLE_FT_CR == 1
#include "orte/mca/state/state.h"
#endif
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "orte/mca/qos/base/static-components.h"
/*
* Global variables
*/
orte_qos_base_t orte_qos_base = {{{0}}};
OPAL_TIMING_DECLARE(tm_qos)
static int orte_qos_base_register(mca_base_register_flag_t flags)
{
#if OPAL_ENABLE_TIMING
/* Detailed timing setup */
orte_qos_base.timing = false;
(void) mca_base_var_register ("orte", "qos", "base", "timing",
"Enable QOS timings",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_qos_base.timing);
#endif
return ORTE_SUCCESS;
}
static int orte_qos_base_close(void)
{
/* shutdown all active transports */
/*while (NULL != (cli = (mca_base_component_list_item_t *) opal_list_remove_first (&orte_qos_base.actives))) {
component = (mca_qos_base_component_t*)cli->cli_component;
if (NULL != component->shutdown) {
component->shutdown();
}
OBJ_RELEASE(cli);
}*/
// TO DO
/* destruct our internal lists */
OBJ_DESTRUCT(&orte_qos_base.actives);
OPAL_TIMING_EVENT((&tm_qos, "Finish"));
OPAL_TIMING_REPORT(orte_qos_base.timing, &tm_qos);
return mca_base_framework_components_close(&orte_qos_base_framework, NULL);
}
/**
* Function for finding and opening either all MCA components,
* or the one that was specifically requested via a MCA parameter.
*/
static int orte_qos_base_open(mca_base_open_flag_t flags)
{
/* setup globals */
OBJ_CONSTRUCT(&orte_qos_base.actives, opal_pointer_array_t);
opal_pointer_array_init(&orte_qos_base.actives, ORTE_QOS_MAX_COMPONENTS, INT_MAX, 1);
/*
#if OPAL_ENABLE_FT_CR == 1
orte_state.add_job_state(ORTE_JOB_STATE_FT_CHECKPOINT, orte_qos_base_ft_event, ORTE_ERROR_PRI);
orte_state.add_job_state(ORTE_JOB_STATE_FT_CONTINUE, orte_qos_base_ft_event, ORTE_ERROR_PRI);
orte_state.add_job_state(ORTE_JOB_STATE_FT_RESTART, orte_qos_base_ft_event, ORTE_ERROR_PRI);
#endif*/
OPAL_TIMING_INIT(&tm_qos);
/* Open up all available components */
return mca_base_framework_components_open(&orte_qos_base_framework, flags);
}
MCA_BASE_FRAMEWORK_DECLARE(orte, qos, "Messaging Quality of Service Subsystem",
orte_qos_base_register, orte_qos_base_open, orte_qos_base_close,
mca_qos_base_static_components, 0);
/*** QOS CLASS INSTANCES ***/
static void channel_cons (orte_qos_base_channel_t *ptr)
{
OBJ_CONSTRUCT(&ptr->attributes, opal_list_t);
}
static void channel_des (orte_qos_base_channel_t *ptr)
{
OPAL_LIST_DESTRUCT(&ptr->attributes);
}
OBJ_CLASS_INSTANCE (orte_qos_base_channel_t,
opal_list_item_t,
channel_cons, channel_des);

Просмотреть файл

@ -1,73 +0,0 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/qos/qos.h"
#include "orte/mca/qos/base/base.h"
/**
* Function for selecting all runnable modules from those that are
* available.
*
* Call the init function on all available modules.
*/
int orte_qos_base_select(void)
{
mca_base_component_list_item_t *cli;
mca_qos_base_component_t *component;
int count = 0;
/* Query all available components and ask if their transport is available */
OPAL_LIST_FOREACH(cli, &orte_qos_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_qos_base_component_t *) cli->cli_component;
opal_output_verbose(5, orte_qos_base_framework.framework_output,
"mca:qos:select: checking available component %s",
component->qos_base.mca_component_name);
if (NULL == component->start )
opal_output_verbose(5, orte_qos_base_framework.framework_output,
"mca:qos:select: component %s start function is null, type =%d",
component->qos_base.mca_component_name, component->type);
else {
/* if it fails to startup, then skip it */
if (ORTE_SUCCESS != component->start()) {
opal_output_verbose(5, orte_qos_base_framework.framework_output,
"mca:qos:select: Skipping component [%s] - failed to initialize",
component->qos_base.mca_component_name );
continue;
}
}
count++;
/* store each qos componenet in the actives pointer array at the index of that component type */
opal_pointer_array_set_item(&orte_qos_base.actives,
component->type, component);
}
if (0 == count) {
/* no support available means we really cannot run */
opal_output_verbose(5, orte_qos_base_framework.framework_output,
"mca:qos:select: Init failed to return any available QoS components");
orte_show_help("help-qos-base.txt", "no-interfaces-avail", true);
return ORTE_ERR_SILENT;
}
opal_output_verbose(5, orte_qos_base_framework.framework_output,
"mca:qos:select: Found %d active QoS components",
count);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,34 +0,0 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
qos_noop.h \
qos_noop_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_qos_noop_DSO
component_noinst =
component_install = mca_qos_noop.la
else
component_noinst = libmca_qos_noop.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_qos_noop_la_SOURCES = $(sources)
mca_qos_noop_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_qos_noop_la_SOURCES = $(sources)
libmca_qos_noop_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,35 +0,0 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* QoS No-op Component interface
*
*
*
*/
#ifndef MCA_QOS_NOOP_H
#define MCA_QOS_NOOP_H
#include "orte_config.h"
#include "orte/mca/qos/qos.h"
#include "orte/mca/qos/base/base.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_qos_component_t mca_qos_noop_component;
extern orte_qos_module_t orte_qos_noop_module;
END_C_DECLS
#endif /* MCA_QOS_NOOP_H */

Просмотреть файл

@ -1,337 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
* qos_base_channel_handlers.c - contains base functions handlers for open, send and close channel requests.
*/
/*
* includes
*/
#include "orte_config.h"
#include <string.h>
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/dss/dss.h"
#include "opal/util/output.h"
#include "opal/util/timings.h"
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/util/name_fns.h"
#include "orte/mca/qos/qos.h"
#include "orte/mca/qos/base/base.h"
static int orte_qos_base_pack_attributes (opal_buffer_t * buffer,
opal_list_t * qos_attributes)
{
int32_t num_attributes;
int32_t rc= ORTE_SUCCESS;
orte_attribute_t *kv;
num_attributes = opal_list_get_size (qos_attributes);
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)(&num_attributes), 1, ORTE_STD_CNTR))) {
ORTE_LOG_ERROR (rc);
return rc;
}
OPAL_LIST_FOREACH(kv, qos_attributes, orte_attribute_t) {
if (ORTE_ATTR_GLOBAL == kv->local) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return rc;
}
static int orte_qos_base_unpack_attributes (opal_buffer_t *buffer,
opal_list_t *qos_attributes)
{
orte_attribute_t *kv;
int32_t count, n, k;
int32_t rc=ORTE_SUCCESS;
/* unpack the attributes */
n=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &count,
&n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
for (k=0; k < count; k++) {
n=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv,
&n, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(qos_attributes, &kv->super);
}
return rc;
}
void* orte_qos_base_create_channel ( orte_rml_channel_t *channel,
opal_list_t *qos_attributes)
{
int32_t * type, type_val;
mca_qos_base_component_t *qos_comp;
if(!orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8))
return NULL;
type_val = *type;
//check if type is valid
if (0 < type_val || ORTE_QOS_MAX_COMPONENTS <= type_val)
return NULL;
// associate the qos module
qos_comp = (mca_qos_base_component_t *) opal_pointer_array_get_item(&orte_qos_base.actives, type_val);
channel->qos = (void*) &qos_comp->mod;
// call create channel function of the module.
return (qos_comp->mod.create( qos_attributes));
}
void * orte_qos_base_create (opal_list_t *qos_attributes)
{
orte_qos_base_channel_t * base_chan;
int32_t num_attributes;
int32_t rc, *window;
orte_qos_type_t *type;
orte_attribute_t *kv;
base_chan = OBJ_NEW (orte_qos_base_channel_t);
*type = orte_qos_noop;
// TBD _ we ignore inapplicable attributes for now - need to return error?
// get attributes of interest to the base and store them locally.
if (ORTE_SUCCESS == (rc = orte_set_attribute( &base_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) {
// window size??
if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&window, OPAL_UINT32)) {
if ( ORTE_QOS_MAX_WINDOW_SIZE > (*window)) {
ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS);
OBJ_RELEASE(base_chan);
}
else {
if (ORTE_SUCCESS != (rc = orte_set_attribute(&base_chan->attributes, ORTE_QOS_WINDOW_SIZE,
ORTE_ATTR_GLOBAL, (void*)window, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(base_chan);
}
}
} else
OBJ_RELEASE(base_chan);
} else {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(base_chan);
}
return base_chan;
}
int orte_qos_base_open_channel ( void * qos_channel,
opal_buffer_t *buffer)
{
int32_t rc = ORTE_SUCCESS;
orte_qos_base_channel_t *base_chan;
base_chan = (orte_qos_base_channel_t*) (qos_channel);
// the Qos module puts the non local attributes to be sent to the peer in a list at the time of create.
// pack those attributes into the buffer.
if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buffer, &base_chan->attributes)))
ORTE_ERROR_LOG(rc);
return rc;
}
void orte_qos_base_chan_recv_init ( void * qos_channel,
opal_list_t *qos_attributes)
{
// nothing to do for no op channel.
}
void orte_qos_base_close_channel ( void * qos_channel)
{
qos_channel = (orte_qos_base_channel_t*) (qos_channel);
OBJ_RELEASE(qos_channel);
}
int orte_qos_base_comp_channel (void *qos_channel,
opal_list_t *qos_attributes)
{
int32_t chan_typea, chan_typeb, *ptr, window_sizea, window_sizeb;
orte_qos_base_channel_t *base_chan = (orte_qos_base_channel_t*) qos_channel;
ptr = &chan_typea;
if (!orte_get_attribute(&base_chan->attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8))
return ORTE_ERROR;
ptr = &chan_typeb;
if (!orte_get_attribute(qos_attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8))
return ORTE_ERROR;
if (chan_typea == chan_typeb) {
ptr = &window_sizea;
if (!orte_get_attribute(&base_chan->attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32))
return ORTE_ERROR;
ptr = &window_sizeb;
if (!orte_get_attribute(qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32))
return ORTE_ERROR;
return (window_sizea != window_sizeb);
}
else
return ORTE_ERROR;
}
/*static void orte_qos_open_channel_reply_send_callback ( int status,
orte_process_name_t* sender,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
// this is the send call back for open channel reply
orte_qos_channel_t *channel = (orte_qos_channel_t*) cbdata;
// if the message was not sent we should retry or complete the request appropriately
if (status!= ORTE_SUCCESS)
{
//retry request.
}
// if success then release the buffer and do open channel request completion after receiving response from peer
OBJ_RELEASE(buffer);
}
static void orte_qos_open_channel_send_callback ( int status,
orte_process_name_t* sender,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
// this is the send call back for open channel request
orte_qos_open_channel_t *req = (orte_qos_open_channel_t*) cbdata;
// if the message was not sent we should retry or complete the request appropriately
if (status!= ORTE_SUCCESS)
{
// retry if retriable failure.
// else call completion handler.
//remove channel from list
opal_list_remove_item(&orte_qos_base.open_channels, &req->channel->super);
OBJ_RELEASE(req->channel);
// update msg status and channel num so end point can have appropriate info
req->msg->status = status;
req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM;
ORTE_RML_OPEN_CHANNEL_COMPLETE(req->msg);
OBJ_RELEASE(req);
}
// if success then release the buffer and do open channel request completion after receiving response from peer
OBJ_RELEASE(buffer);
}
void orte_qos_base_open_channel(int sd, short args, void *cbdata)
{
opal_buffer_t *buffer; int rc;
orte_qos_open_channel_t *open_channel;
orte_qos_open_channel_request_t *req = (orte_qos_open_channel_request_t*)cbdata;
// create channel on sender side by calling the respective qos module.
req->post.channel = orte_qos_base_create_channel(req->post.msg->dst, req->post.msg->qos_attributes);
buffer = OBJ_NEW(opal_buffer_t);
//pack qos attributes list in buffer
if (ORTE_SUCCESS != orte_qos_base_pack_attributes(buffer, req->post.msg->qos_attributes)) {
//invalid attributes complete request with error
}
open_channel = OBJ_NEW(orte_qos_open_channel_t);
open_channel->msg = req->post.msg;
open_channel->channel = req->post.channel;
open_channel->msg->channel_num = open_channel->channel->channel_num;
OBJ_RELEASE(req);
// send request to peer to open channel
orte_rml.send_buffer_nb( &open_channel->msg->dst, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REQ,
orte_qos_open_channel_send_callback,
open_channel);
// now post a recieve for open_channel_response tag
orte_rml.recv_buffer_nb(&open_channel->msg->dst, ORTE_RML_TAG_OPEN_CHANNEL_REPLY,
ORTE_RML_NON_PERSISTENT, orte_qos_open_channel_reply_callback, open_channel);
} */
/*
void orte_qos_open_channel_recv_callback (int status,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
int32_t rc;
opal_list_t *qos_attributes = OBJ_NEW(opal_list_t);
orte_qos_channel_t *channel;
// un pack attributes first
if ( ORTE_SUCCESS == orte_qos_base_unpack_attributes( buffer, qos_attributes)) {
// create channel
if (NULL != (channel = orte_qos_base_create_channel ( *peer, qos_attributes)) ) {
buffer = OBJ_NEW (opal_buffer_t);
if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &channel->channel_num , 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
return;
}
// send channel accept to sender with local channel num
orte_rml.send_buffer_nb ( peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REPLY,
orte_qos_open_channel_reply_send_callback,
channel);
}
else {
// reply with error message
}
}
else {
//reply with error message
}
}
void orte_qos_open_channel_reply_callback (int status,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
orte_qos_open_channel_t *req = (orte_qos_open_channel_t*) cbdata;
orte_qos_channel_t * channel = req->channel;
int32_t count = 1;
int32_t rc;
// process open_channel response from a peer for a open channel request
if (ORTE_SUCCESS == status) {
// unpack buffer and get peer channel number.
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel->peer_channel_num, &count, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
// do error completion
channel->state = orte_qos_channel_closed;
//remove channel from list
opal_list_remove_item(&orte_qos_base.open_channels, &channel->super);
OBJ_RELEASE(channel);
// update msg status and channel num so end point can have appropriate info
req->msg->status = ORTE_ERR_OPEN_CHANNEL_PEER_RESPONSE_INV;
req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM;
}
else {
channel->state = orte_qos_channel_open;
req->msg->status = ORTE_SUCCESS;
req->msg->channel_num = channel->channel_num;
}
}
else {
channel->state = orte_qos_channel_closed;
//remove channel from list
opal_list_remove_item(&orte_qos_base.open_channels, &channel->super);
OBJ_RELEASE(channel);
// update msg status and channel num so end point can have appropriate info
req->msg->status = ORTE_ERR_OPEN_CHANNEL_PEER_FAIL;
req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM;
}
ORTE_RML_OPEN_CHANNEL_COMPLETE(req->msg);
OBJ_RELEASE(req);
OBJ_RELEASE(buffer);
// 1: If success record peer channel number, update channel state.
//2: If not destroy channel.
//3: complete openchannel request.
} */

Просмотреть файл

@ -1,198 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/qos/qos.h"
static int qos_noop_start (void);
static void qos_noop_shutdown (void);
static void* noop_create (opal_list_t *qos_attributes, uint32_t channel_num);
static int noop_open (void *qos_channel,
opal_buffer_t * buf);
static int noop_send ( void *qos_channel, orte_rml_send_t *msg);
static int noop_recv (void *channel, orte_rml_recv_t *msg);
static int noop_close (void * channel);
static int noop_init_recv (void *channel, opal_list_t *attributes);
static int noop_cmp (void *channel, opal_list_t *attributes);
static void noop_send_callback (orte_rml_send_t *msg);
/**
* noop module definition
*/
orte_qos_module_t orte_qos_noop_module = {
noop_create,
noop_open,
noop_send,
noop_recv,
noop_close,
noop_init_recv,
noop_cmp,
noop_send_callback
};
/**
* component definition
*/
mca_qos_base_component_t mca_qos_noop_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
MCA_QOS_BASE_VERSION_2_0_0,
"noop", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
NULL,
NULL,
},
qos_noop_start,
qos_noop_shutdown,
orte_qos_noop,
{
noop_create,
noop_open,
noop_send,
noop_recv,
noop_close,
noop_init_recv,
noop_cmp,
noop_send_callback
}
};
static int qos_noop_start(void) {
return ORTE_SUCCESS;
}
static void qos_noop_shutdown (void) {
}
static void* noop_create (opal_list_t *qos_attributes, uint32_t channel_num) {
orte_qos_base_channel_t * noop_chan;
int32_t rc, *window, window_val;
orte_qos_type_t type_val = orte_qos_noop;
orte_qos_type_t *type;
noop_chan = OBJ_NEW (orte_qos_base_channel_t);
noop_chan->channel_num = channel_num;
type = &type_val;
window = &window_val;
// TBD _ we ignore inapplicable attributes for now - need to return error?
// get attributes of interest to the base and store them locally.
if (ORTE_SUCCESS == (rc = orte_set_attribute( &noop_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) {
// window size??
if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&window, OPAL_UINT32)) {
if ( ORTE_QOS_MAX_WINDOW_SIZE < (*window)) {
ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS);
OBJ_RELEASE(noop_chan);
}
else {
if (ORTE_SUCCESS != (rc = orte_set_attribute(&noop_chan->attributes, ORTE_QOS_WINDOW_SIZE,
ORTE_ATTR_GLOBAL, (void*)window, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(noop_chan);
}
}
}else
OBJ_RELEASE(noop_chan);
} else {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(noop_chan);
}
return noop_chan;
}
static int noop_open (void *qos_channel, opal_buffer_t * buf)
{
int32_t rc = ORTE_SUCCESS;
orte_qos_base_channel_t *noop_chan;
noop_chan = (orte_qos_base_channel_t*) (qos_channel);
// the Qos module puts the non local attributes to be sent to the peer in a list at the time of create.
// pack those attributes into the buffer.
if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buf, &noop_chan->attributes)))
ORTE_ERROR_LOG(rc);
return rc;
}
static int noop_send ( void *qos_channel, orte_rml_send_t *msg)
{
//nothing to do
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s noop_send msg = %p to peer = %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, ORTE_NAME_PRINT(&msg->dst)));
return ORTE_SUCCESS;
}
static int noop_recv (void *qos_channel, orte_rml_recv_t *msg)
{
OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output,
"%s noop_recv msg = %p from peer = %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(void*)msg, ORTE_NAME_PRINT(&msg->sender)));
return ORTE_SUCCESS;
}
static int noop_close (void * channel)
{
orte_qos_base_channel_t *noop_chan;
if(NULL != channel) {
noop_chan = (orte_qos_base_channel_t*) channel;
OBJ_RELEASE (noop_chan);
return ORTE_SUCCESS;
} else
return ORTE_ERR_BAD_PARAM;
}
static int noop_init_recv (void *channel, opal_list_t *attributes)
{
return ORTE_SUCCESS;
}
static int noop_cmp (void *channel, opal_list_t *attributes)
{
int32_t chan_typea, chan_typeb, *ptr, window_sizea, window_sizeb;
orte_qos_base_channel_t *noop_chan = (orte_qos_base_channel_t*) channel;
ptr = &chan_typea;
if (!orte_get_attribute(&noop_chan->attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8))
return ORTE_ERROR;
ptr = &chan_typeb;
if (!orte_get_attribute(attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8))
return ORTE_ERROR;
if (chan_typea == chan_typeb) {
ptr = &window_sizea;
if (!orte_get_attribute(&noop_chan->attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32))
return ORTE_ERROR;
ptr = &window_sizeb;
if (!orte_get_attribute(attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32))
return ORTE_ERROR;
return (window_sizea != window_sizeb);
}
else
return ORTE_ERROR;
}
static void noop_send_callback (orte_rml_send_t *msg)
{
// nothing to do for noop
ORTE_RML_SEND_COMPLETE(msg);
}

Просмотреть файл

@ -1,159 +0,0 @@
/**
* copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
* This header defines Quality of Service Interface for Runtime messaging
*/
/**
* @file
*
* Quality of Service (QoS) Communication Interface
*
* The QoS layer is responsible for providing quality of service for
* messages exchanged between two ORTE processes through the use of
* channels.
*/
#ifndef MCA_QOS_H_
#define MCA_QOS_H_
#include "orte_config.h"
#include "orte/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "opal/class/opal_list.h"
#include "opal/mca/mca.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/qos/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
BEGIN_C_DECLS
/* ******************************************************************** */
struct opal_buffer_t;
struct orte_process_name_t;
/* ******************************************************************** */
#define ORTE_QOS_INVALID_CHANNEL_NUM 0xFFFF
#define ORTE_QOS_MAX_COMPONENTS 5
typedef void (*orte_qos_callback_fn_t)(int status,
int channel_num,
struct orte_process_name_t* peer,
void* cbdata);
typedef int (*mca_qos_base_component_start_fn_t)(void);
typedef void (*mca_qos_base_component_shutdown_fn_t)(void);
#if OPAL_ENABLE_FT_CR == 1
typedef int (*mca_qos_base_component_ft_event_fn_t)(int state);
#endif
ORTE_DECLSPEC void * orte_qos_create_channel (void *qos_mod, opal_list_t *qos_attributes, uint32_t channel_num);
ORTE_DECLSPEC int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buffer);
ORTE_DECLSPEC int orte_qos_close_channel (void *qos_mod, void *qos_channel);
ORTE_DECLSPEC void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes);
ORTE_DECLSPEC int orte_qos_cmp_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes);
ORTE_DECLSPEC int orte_qos_send_channel (void *qos_mod, void *qos_channel, orte_rml_send_t *msg);
ORTE_DECLSPEC int orte_qos_recv_channel (void *qos_mod, void *qos_channel, orte_rml_recv_t *msg);
/**
* qos module (channel) create function
* initialize type specific attributes of the channel.
*/
typedef void* (*orte_qos_base_module_create_fn_t) (opal_list_t *qos_attributes, uint32_t channel_num);
/**
* qos module (channel) open function
* this function is called when rml_open_channel is requested
*/
typedef int (*orte_qos_base_module_open_fn_t) (void *qos_channel,
opal_buffer_t * buf);
/**
* qos module (channel) send function
* this function is called when rml_send_channel is requested
*/
typedef int (*orte_qos_base_module_send_fn_t) ( void * qos_channel,
orte_rml_send_t *send);
/**
* qos module (channel) recv function
* this function is called when a message is received on a channel
*/
typedef int (*orte_qos_base_module_recv_fn_t) ( void * channel,
orte_rml_recv_t *msg);
/**
* qos module (channel) close function
* this function is called when a message is received on a channel
*/
typedef int (*orte_qos_base_module_close_fn_t) ( void * channel);
/**
* qos module (channel) init recv
* this function is used to initialize a channel for receiving msgs (called in response to open_channel req from peer)
*/
typedef int (*orte_qos_base_module_init_recv_fn_t) (void * channel, opal_list_t * attributes);
/**
* qos module (channel) compare functions
* compares attributes of existing channel with the requested list of attributes
*/
typedef int (*orte_qos_base_module_cmp_fn_t) (void * channel, opal_list_t * attributes);
/**
* qos module (channel) compare functions
* compares attributes of existing channel with the requested list of attributes
*/
typedef void (*orte_qos_base_module_send_callback_fn_t) (orte_rml_send_t *msg);
/**
*
* the qos channel data structure
*/
typedef struct {
orte_qos_base_module_create_fn_t create;
orte_qos_base_module_open_fn_t open;
orte_qos_base_module_send_fn_t send;
orte_qos_base_module_recv_fn_t recv;
orte_qos_base_module_close_fn_t close;
orte_qos_base_module_init_recv_fn_t init_recv;
orte_qos_base_module_cmp_fn_t cmp;
orte_qos_base_module_send_callback_fn_t send_callback;
} orte_qos_module_t;
typedef enum {
orte_qos_noop = 0,
orte_qos_ack = 1,
orte_qos_nack = 2,
orte_qos_ack_nack_hybrid = 3,
orte_qos_multipath = 4,
}orte_qos_type_t ;
typedef struct {
mca_base_component_t qos_base;
mca_qos_base_component_start_fn_t start;
mca_qos_base_component_shutdown_fn_t shutdown;
orte_qos_type_t type;
orte_qos_module_t mod;
/* mca_qos_base_componenet_open_channel_fn_t open_channel;
mca_qos_base_component_send_channel_nb_fn_t send_channel;
mca_qos_base_component_recv_channel_nb_fn_t recv_channel;
mca_qos_base_component_close_channel_fn_t close_channel;*/
#if OPAL_ENABLE_FT_CR == 1
mca_qos_base_component_ft_event_fn_t ft_event;
#endif
} mca_qos_base_component_t;
/**
* Macro for use in components that are of type oob
*/
#define MCA_QOS_BASE_VERSION_2_0_0 \
ORTE_MCA_BASE_VERSION_2_1_0 ("qos", 2, 0, 0)
END_C_DECLS
#endif

Просмотреть файл

@ -11,6 +11,7 @@
# All rights reserved.
# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -27,4 +28,4 @@ libmca_rml_la_SOURCES += \
base/rml_base_receive.c \
base/rml_base_contact.c \
base/rml_base_msg_handlers.c \
base/rml_base_channel_handlers.c
base/rml_base_stubs.c

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -82,11 +82,23 @@ ORTE_DECLSPEC void orte_rml_base_comm_start(void);
ORTE_DECLSPEC void orte_rml_base_comm_stop(void);
/*
* globals that might be needed
*/
/* adding element to hold the active modules and components */
typedef struct {
opal_list_item_t super;
int pri;
orte_rml_base_module_t *module;
mca_base_component_t *component;
} orte_rml_base_active_t;
OBJ_CLASS_DECLARATION(orte_rml_base_active_t);
/* a global struct containing framework-level values */
typedef struct {
opal_list_t actives; /* list to hold the active plugins */
opal_list_t posted_recvs;
opal_list_t unmatched_msgs;
opal_pointer_array_t open_channels;
#if OPAL_ENABLE_TIMING
bool timing;
#endif
@ -105,45 +117,6 @@ ORTE_DECLSPEC extern orte_rml_base_t orte_rml_base;
*/
ORTE_DECLSPEC extern opal_list_t orte_rml_base_components;
/**
* Component structure for the selected RML component
*
* Component structure pointer for the currently selected RML
* component. Useable between calls to orte_rml_base_select() and
* orte_rml_base_close().
* @note This pointer should not be used outside the RML base. It is
* available outside the RML base only for the F/T component.
*/
ORTE_DECLSPEC extern orte_rml_component_t *orte_rml_component;
typedef enum {
orte_rml_channel_opening = 0,
orte_rml_channel_open = 1,
orte_rml_channel_closing = 2,
orte_rml_channel_closed = 3,
}orte_rml_channel_state_t;
/**
* RML channel structure.
* The RML only needs basic channel information as the rest of the book keeping information
* is stored in the QoS module specific channel object.
* It contains a pointer to the QoS module that handles requests on the channel.
* It contains a pointer to a struct that contains the QoS specific channel data.
*/
typedef struct {
opal_list_item_t super;
orte_rml_channel_num_t channel_num; // the channel number reference (exposed to the user).
orte_process_name_t peer; // the other end point (peer) of the channel
orte_rml_channel_num_t peer_channel; // peer channel number
void * qos; // pointer to QoS component specific module
void * qos_channel_ptr; // pointer to QoS component specific channel struct
orte_rml_channel_state_t state; // channel state
bool recv; // set to true if this is a receive (peer opened) channel. (Default is send channel)
} orte_rml_channel_t;
OBJ_CLASS_DECLARATION(orte_rml_channel_t);
/* structure to send RML messages - used internally */
typedef struct {
opal_list_item_t super;
@ -156,8 +129,6 @@ typedef struct {
union {
orte_rml_callback_fn_t iov;
orte_rml_buffer_callback_fn_t buffer;
orte_rml_send_channel_callback_fn_t iov_chan;
orte_rml_send_buffer_channel_callback_fn_t buf_chan;
} cbfunc;
void *cbdata;
@ -166,11 +137,6 @@ typedef struct {
int count;
/* pointer to the user's buffer */
opal_buffer_t *buffer;
/*** TODO : need to move channel specific data to a channel struct */
/* pointer to the channel object */
orte_rml_channel_t *channel;
/* destination channel number */
orte_rml_channel_num_t dst_channel;
/* msg seq number */
uint32_t seq_num;
/* pointer to raw data for cross-transport
@ -180,47 +146,11 @@ typedef struct {
} orte_rml_send_t;
OBJ_CLASS_DECLARATION(orte_rml_send_t);
/* structure to send RML channel open messages - used internally */
typedef struct {
opal_list_item_t super;
/* peer process */
orte_process_name_t dst;
/* msg send status */
int status;
/* channel object */
orte_rml_channel_t *channel;
/* attributes of the channel */
opal_list_t *qos_attributes;
/* user's callback function */
orte_rml_channel_callback_fn_t cbfunc;
/* user's cbdata */
void *cbdata;
} orte_rml_open_channel_t;
OBJ_CLASS_DECLARATION(orte_rml_open_channel_t);
/* structure to send RML channel close messages - used internally */
typedef struct {
opal_list_item_t super;
/* msg send status */
int status;
/* channel object */
orte_rml_channel_t *channel;
/* user's callback function */
orte_rml_channel_callback_fn_t cbfunc;
/* user's cbdata */
void *cbdata;
} orte_rml_close_channel_t;
OBJ_CLASS_DECLARATION(orte_rml_close_channel_t);
/* define an object for transferring send requests to the event lib */
typedef struct {
opal_object_t super;
opal_event_t ev;
union {
orte_rml_send_t send;
orte_rml_open_channel_t open_channel;
orte_rml_close_channel_t close_channel;
}post;
} orte_rml_send_request_t;
OBJ_CLASS_DECLARATION(orte_rml_send_request_t);
@ -230,7 +160,6 @@ typedef struct {
opal_event_t ev;
orte_process_name_t sender; // sender
orte_rml_tag_t tag; // targeted tag
orte_rml_channel_num_t channel_num; // channel number
uint32_t seq_num; //sequence number
struct iovec iov; // the recvd data
} orte_rml_recv_t;
@ -259,7 +188,7 @@ typedef struct {
} orte_rml_recv_request_t;
OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
#define ORTE_RML_POST_MESSAGE(p, t, c, s, b, l) \
#define ORTE_RML_POST_MESSAGE(p, t, s, b, l) \
do { \
orte_rml_recv_t *msg; \
opal_output_verbose(5, orte_rml_base_framework.framework_output, \
@ -270,7 +199,6 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
msg->sender.jobid = (p)->jobid; \
msg->sender.vpid = (p)->vpid; \
msg->tag = (t); \
msg->channel_num = (c); \
msg->seq_num = (s); \
msg->iov.iov_base = (IOVBASE_TYPE*)(b); \
msg->iov.iov_len = (l); \
@ -314,7 +242,6 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
ORTE_NAME_PRINT(&((m)->dst)), \
__FILE__, __LINE__); \
if( NULL == (m)->channel) { \
if (NULL != (m)->iov) { \
if (NULL != (m)->cbfunc.iov) { \
(m)->cbfunc.iov((m)->status, \
@ -328,85 +255,53 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t);
(m)->buffer, \
(m)->tag, (m)->cbdata); \
} \
} else { \
if (NULL != (m)->iov) { \
if (NULL != (m)->cbfunc.iov_chan) { \
(m)->cbfunc.iov_chan((m)->status, \
(m)->channel->channel_num, \
(m)->iov, (m)->count, \
(m)->tag, (m)->cbdata); \
} \
} else { \
/* non-blocking buffer send */ \
(m)->cbfunc.buf_chan((m)->status, \
(m)->channel->channel_num, \
(m)->buffer, \
(m)->tag, (m)->cbdata); \
} \
} \
OBJ_RELEASE(m); \
}while(0);
#define ORTE_RML_OPEN_CHANNEL_COMPLETE(m) \
do { \
opal_output_verbose(5, orte_rml_base_framework.framework_output, \
"%s-%s open channel message complete at %s:%d", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
ORTE_NAME_PRINT(&((m)->dst)), \
__FILE__, __LINE__); \
/* call the callback function */ \
(m)->cbfunc((m)->status, (m)->channel->channel_num, \
&((m)->dst), \
NULL, (m)->cbdata) ; \
}while(0);
#define ORTE_RML_CLOSE_CHANNEL_COMPLETE(m) \
do { \
opal_output_verbose(5, orte_rml_base_framework.framework_output, \
"%s-%d close channel message complete at %s:%d", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
(m)->channel->channel_num, \
__FILE__, __LINE__); \
/* call the callback function */ \
(m)->cbfunc((m)->status, (m)->channel->channel_num, \
NULL, NULL, (m)->cbdata) ; \
}while(0);
/*
* This is the base priority for a RML wrapper component
* If there exists more than one wrapper, then the one with
* the lowest priority wins.
*/
#define RML_SELECT_WRAPPER_PRIORITY -128
#define ORTE_RML_INVALID_CHANNEL_NUM UINT32_MAX
ORTE_DECLSPEC orte_rml_channel_t * orte_rml_base_get_channel (orte_rml_channel_num_t chan_num);
/* common implementations */
ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata);
ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata);
ORTE_DECLSPEC void orte_rml_base_process_error(int fd, short flags, void *cbdata);
ORTE_DECLSPEC void orte_rml_base_open_channel(int fd, short flags, void *cbdata);
ORTE_DECLSPEC void orte_rml_base_close_channel(int fd, short flags, void *cbdata);
ORTE_DECLSPEC void orte_rml_base_open_channel_send_callback ( int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
ORTE_DECLSPEC void orte_rml_base_open_channel_resp_callback (int status, orte_process_name_t* peer,
struct opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
ORTE_DECLSPEC void orte_rml_base_open_channel_reply_send_callback ( int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
ORTE_DECLSPEC void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel,
orte_rml_send_t *send);
ORTE_DECLSPEC int orte_rml_base_process_recv_channel (orte_rml_channel_t *channel,
orte_rml_recv_t *recv);
ORTE_DECLSPEC void orte_rml_base_close_channel_send_callback ( int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata);
ORTE_DECLSPEC void orte_rml_base_send_close_channel ( orte_rml_close_channel_t *close_chan);
ORTE_DECLSPEC void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata);
ORTE_DECLSPEC void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg);
/* Stub API interfaces to cycle through active plugins and call highest priority */
ORTE_DECLSPEC int orte_rml_API_enable_comm(void);
ORTE_DECLSPEC void orte_rml_API_finalize(void);
ORTE_DECLSPEC char* orte_rml_API_get_contact_info(void);
ORTE_DECLSPEC void orte_rml_API_set_contact_info(const char *contact_info);
ORTE_DECLSPEC int orte_rml_API_ping(const char* contact_info, const struct timeval* tv);
ORTE_DECLSPEC int orte_rml_API_send_nb(orte_process_name_t* peer, struct iovec* msg,
int count, orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc, void* cbdata);
ORTE_DECLSPEC int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
ORTE_DECLSPEC void orte_rml_API_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
ORTE_DECLSPEC void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
ORTE_DECLSPEC void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
ORTE_DECLSPEC int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc);
ORTE_DECLSPEC int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc);
ORTE_DECLSPEC int orte_rml_API_ft_event(int state);
ORTE_DECLSPEC void orte_rml_API_purge(orte_process_name_t *peer);
END_C_DECLS
#endif /* MCA_RML_BASE_H */

Просмотреть файл

@ -1,544 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
*
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
*/
/*
* includes
*/
#include "orte_config.h"
#include <string.h>
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/dss/dss.h"
#include "opal/util/output.h"
#include "opal/util/timings.h"
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/util/name_fns.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/qos/base/base.h"
static int unpack_channel_attributes (opal_buffer_t *buffer, opal_list_t *qos_attributes);
static orte_rml_channel_t * get_channel ( orte_process_name_t * peer,
opal_list_t *qos_attributes,
bool recv);
static int send_open_channel_reply (orte_process_name_t *peer,
orte_rml_channel_t *channel,
bool accept);
void orte_rml_base_close_channel(int fd, short flags, void *cbdata)
{
orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata;
orte_rml_close_channel_t *close_chan;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_close_channel to peer %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&req->post.close_channel.channel->peer)));
OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(&req->post.close_channel.channel->peer)));
close_chan = OBJ_NEW(orte_rml_close_channel_t);
close_chan->channel = req->post.close_channel.channel;
close_chan->cbfunc = req->post.close_channel.cbfunc;
close_chan->cbdata = req->post.close_channel.cbdata;
OBJ_RELEASE(req);
/* check with qos if the channel ready to be closed */
if (ORTE_SUCCESS == orte_qos_close_channel (close_chan->channel->qos,
close_chan->channel->qos_channel_ptr)) {
orte_rml_base_send_close_channel( close_chan);
}
/* complete close request with error channel busy */
else {
close_chan->status = ORTE_ERR_CHANNEL_BUSY;
ORTE_RML_CLOSE_CHANNEL_COMPLETE(close_chan);
OBJ_RELEASE(close_chan);
}
}
void orte_rml_base_send_close_channel ( orte_rml_close_channel_t *close_chan)
{
opal_buffer_t *buffer;
// send msg to peer to close channel.
buffer = OBJ_NEW (opal_buffer_t);
/* pack the channel number*/
opal_dss.pack(buffer, &close_chan->channel->peer_channel, 1, OPAL_UINT32);
orte_rml.send_buffer_nb( &close_chan->channel->peer, buffer, ORTE_RML_TAG_CLOSE_CHANNEL_REQ,
orte_rml_base_close_channel_send_callback,
close_chan);
}
void orte_rml_base_close_channel_send_callback ( int status,
orte_process_name_t* sender,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
// this is the send call back for open channel request
orte_rml_close_channel_t *req = (orte_rml_close_channel_t*) cbdata;
orte_process_name_t peer = req->channel->peer;
opal_output_verbose(5, orte_rml_base_framework.framework_output,
"%s rml_close_channel_send_callback to peer %s status = %d",
ORTE_NAME_PRINT(sender),
ORTE_NAME_PRINT(&peer), status);
req->status = status;
// if the message could not be sent log error
if (ORTE_SUCCESS != req->status)
ORTE_ERROR_LOG (req->status);
//complete the req.
ORTE_RML_CLOSE_CHANNEL_COMPLETE(req);
opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL);
// release the channel object and the req.
OBJ_RELEASE(req->channel);
OBJ_RELEASE(req);
OBJ_RELEASE(buffer);
}
void orte_rml_base_open_channel(int fd, short flags, void *cbdata)
{
int32_t *type, type_val;
orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata;
orte_process_name_t peer;
orte_rml_open_channel_t *open_chan;
orte_rml_channel_t *channel;
opal_buffer_t *buffer;
peer = req->post.open_channel.dst;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel to peer %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer)));
OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(&peer)));
/* return error if a channel already exists */
if ( NULL != (channel = get_channel (&peer, req->post.open_channel.qos_attributes, false)))
{
req->post.open_channel.status = ORTE_ERR_OPEN_CHANNEL_DUPLICATE;
req->post.open_channel.channel = channel;
ORTE_RML_OPEN_CHANNEL_COMPLETE(&req->post.open_channel);
OBJ_RELEASE(req);
return;
}
channel = OBJ_NEW(orte_rml_channel_t);
channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel);
channel->peer = peer;
open_chan = OBJ_NEW(orte_rml_open_channel_t);
open_chan->dst = peer;
open_chan->qos_attributes = req->post.open_channel.qos_attributes;
open_chan->cbfunc = req->post.open_channel.cbfunc;
open_chan->cbdata = req->post.open_channel.cbdata;
OBJ_RELEASE(req);
// associate open channel request and the newly created channel object
open_chan->channel = channel;
type = &type_val;
if (!orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) {
return;
}
open_chan->channel->qos = (void*) orte_qos_get_module (open_chan->qos_attributes);
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel type = %d to peer %s ",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
*type,
ORTE_NAME_PRINT(&peer)));
// now associate qos with the channel based on user requested attributes.
if ( NULL != open_chan->channel->qos)
{
open_chan->channel->qos_channel_ptr = orte_qos_create_channel (open_chan->channel->qos,
open_chan->qos_attributes,
open_chan->channel->channel_num);
// create rml send for open channel request. Call the corresponding QoS module to pack the attributes.
buffer = OBJ_NEW (opal_buffer_t);
// call QoS module to pack attributes
if ( ORTE_SUCCESS == (orte_qos_open_channel(open_chan->channel->qos, open_chan->channel->qos_channel_ptr, buffer)))
{
/* pack channel number at the end */
opal_dss.pack(buffer, (void*) &open_chan->channel->channel_num, 1, OPAL_UINT32);
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel to peer %s SUCCESS sending to peer",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer)));
// post a recieve for open_channel_response tag
orte_rml.recv_buffer_nb(&peer, ORTE_RML_TAG_OPEN_CHANNEL_RESP,
ORTE_RML_NON_PERSISTENT, orte_rml_base_open_channel_resp_callback, open_chan);
// send request to peer to open channel
orte_rml.send_buffer_nb( &peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REQ,
orte_rml_base_open_channel_send_callback,
open_chan);
} else {
open_chan->status = ORTE_ERR_PACK_FAILURE;
ORTE_RML_OPEN_CHANNEL_COMPLETE(open_chan);
opal_pointer_array_set_item ( &orte_rml_base.open_channels, open_chan->channel->channel_num, NULL);
// call QoS module to release the QoS channel object.
orte_qos_close_channel (open_chan->channel->qos, open_chan->channel->qos_channel_ptr);
OBJ_RELEASE (buffer);
OBJ_RELEASE(open_chan->channel);
OBJ_RELEASE(open_chan);
}
}
else
{
// do error completion because a component for the requested QoS does not exist
open_chan->status = ORTE_ERR_QOS_TYPE_UNSUPPORTED;
ORTE_RML_OPEN_CHANNEL_COMPLETE(open_chan);
opal_pointer_array_set_item ( &orte_rml_base.open_channels, open_chan->channel->channel_num, NULL);
OBJ_RELEASE(open_chan->channel);
OBJ_RELEASE(open_chan);
}
}
void orte_rml_base_open_channel_send_callback ( int status,
orte_process_name_t* sender,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
// this is the send call back for open channel request
orte_rml_open_channel_t *req = (orte_rml_open_channel_t*) cbdata;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel_send_callback to peer %s status = %d",
ORTE_NAME_PRINT(sender),
ORTE_NAME_PRINT(&req->dst), status));
// if the message was not sent we should retry or complete the request appropriately
if (status!= ORTE_SUCCESS)
{
req->status = status;
ORTE_RML_OPEN_CHANNEL_COMPLETE(req);
opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL);
// call QoS module to release the QoS channel object.
orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr);
OBJ_RELEASE(req->channel);
OBJ_RELEASE(req);
}
else {
// start a timer for response from peer
}
OBJ_RELEASE(buffer);
}
void orte_rml_base_open_channel_resp_callback (int status,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
orte_rml_open_channel_t *req = (orte_rml_open_channel_t*) cbdata;
orte_rml_channel_t * channel = req->channel;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel_resp_callback to peer %s status = %d channel = %p",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), status,
(void*)channel));
int32_t rc;
bool peer_resp = false;
int32_t count = 1;
// unpack peer response from buffer to determine if peer has accepted the open request
if ((ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &peer_resp, &count, OPAL_BOOL))) && peer_resp) {
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel_resp_callback to peer response = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
peer_resp));
/* response will contain the peer channel number - the peer does not have the
option to change the channel attributes
unpack and get peer channel number.*/
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel->peer_channel, &count, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
req->status = ORTE_ERR_UNPACK_FAILURE;
opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL);
// call QoS module to release the QoS channel object.
orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr);
OBJ_RELEASE(req->channel);
// TBD : should we send a close channel to the peer??
}
else {
// call qos module to update the channel state.??
req->status = ORTE_SUCCESS;
req->channel->state = orte_rml_channel_open;
}
}
else {
if (rc) {
ORTE_ERROR_LOG(rc);
req->status = ORTE_ERR_UNPACK_FAILURE;
} else {
req->status = ORTE_ERR_OPEN_CHANNEL_PEER_REJECT;
}
opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL);
// call QoS module to release the QoS channel object.
orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr);
OBJ_RELEASE(req->channel);
}
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel_resp_callback to peer %s status = %d channel =%p num = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), req->status,
(void*)channel, channel->channel_num));
ORTE_RML_OPEN_CHANNEL_COMPLETE(req);
OBJ_RELEASE(req);
}
static int unpack_channel_attributes (opal_buffer_t *buffer,
opal_list_t *qos_attributes)
{
orte_attribute_t *kv;
int32_t count, n, k;
int32_t rc=ORTE_SUCCESS;
/* unpack the attributes */
n=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &count,
&n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_unpack_attributes num attributes = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
count));
for (k=0; k < count; k++) {
n=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv,
&n, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"rml_unpack_attributes unpacked attribute key = %d, value = %d ",
kv->key,
kv->data.uint8));
kv->local = ORTE_ATTR_GLOBAL;
opal_list_append(qos_attributes, &kv->super);
}
return rc;
}
void orte_rml_open_channel_recv_callback (int status,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
opal_list_t qos_attributes;
orte_rml_channel_t *channel;
uint8_t *type, type_val = 10;
int32_t count =1;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel_recv_callback from peer %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
OBJ_CONSTRUCT(&qos_attributes, opal_list_t);
/* unpack attributes first */
if ( ORTE_SUCCESS == unpack_channel_attributes( buffer, &qos_attributes)) {
type = &type_val;
if (!orte_get_attribute( &qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) {
OPAL_LIST_DESTRUCT(&qos_attributes);
return;
}
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"rml_open_channel_recv_callback type =%d",
type_val));
/* scan the list of channels to see if we already have a channel with qos_attributes */
if (NULL == (channel = get_channel ( peer, &qos_attributes, true))) {
/* create a new channel for the req */
channel = OBJ_NEW(orte_rml_channel_t);
channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel);
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"rml_open_channel_recv_callback channel num =%d",
channel->channel_num));
channel->peer = *peer;
channel->recv = true;
channel->qos = (void*) orte_qos_get_module (&qos_attributes);
/* now associate qos with the channel based on requested attributes */
channel->qos_channel_ptr = (void*) orte_qos_create_channel(channel->qos, &qos_attributes,
channel->channel_num);
if (channel->qos_channel_ptr) {
/* call qos to init recv state */
orte_qos_init_recv_channel ( channel->qos, channel->qos_channel_ptr, &qos_attributes);
/* send channel accept reply to sender */
if(ORTE_SUCCESS == send_open_channel_reply (peer, channel, true)) {
/* update channel state */
channel->state = orte_rml_channel_open;
/*store src channel number */
opal_dss.unpack(buffer, (void*) &channel->peer_channel, &count, OPAL_UINT32);
}
else {
/* the receiver shall not attempt to resend or send a reject message
instead we let the sender's request timeout at his end.
release the channel etc */
opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL);
orte_qos_close_channel (channel->qos, channel->qos_channel_ptr);
OBJ_RELEASE(channel);
}
} else {
send_open_channel_reply (peer, NULL, false);
opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL);
//orte_qos_close_channel (channel->qos, channel->qos_channel_ptr);
OBJ_RELEASE(channel);
}
}
else {
/* there exists a channel with the same attributes reject the request */
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"rml_open_channel_recv_callback OOPS CHANNEL EXISTS ALREADY channel num =%d",
channel->channel_num));
send_open_channel_reply (peer, channel, false);
}
}
else {
//reply with error message
send_open_channel_reply (peer, NULL, false);
}
OPAL_LIST_DESTRUCT(&qos_attributes);
}
static int send_open_channel_reply (orte_process_name_t *peer,
orte_rml_channel_t *channel,
bool accept)
{
opal_buffer_t *buffer;
int32_t rc;
buffer = OBJ_NEW (opal_buffer_t);
if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &accept , 1, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (accept) {
if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &channel->channel_num , 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* TBD: should specify reason for reject
send open channel response to sender */
orte_rml.send_buffer_nb ( peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_RESP,
orte_rml_base_open_channel_reply_send_callback,
channel);
return rc;
}
static orte_rml_channel_t * get_channel ( orte_process_name_t * peer,
opal_list_t *qos_attributes,
bool recv)
{
orte_rml_channel_t *channel = NULL;
int32_t i = 0;
/* search available channels and return channel that matches the attributes */
for (i=0; i < orte_rml_base.open_channels.size; i++) {
if (NULL != (channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, i))) {
/* compare basic properties */
if ((OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &channel->peer, peer)) &&
((orte_rml_channel_open == channel->state) ||
(orte_rml_channel_opening == channel->state)) &&
(channel->recv == recv))
{
/* compare channel attributes */
if( ORTE_SUCCESS == orte_qos_cmp_channel ( channel->qos, channel->qos_channel_ptr, qos_attributes))
return channel;
}
}
}
return NULL;
}
void orte_rml_base_open_channel_reply_send_callback ( int status,
orte_process_name_t* sender,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
// this is the send call back for open channel reply
orte_rml_channel_t *channel = (orte_rml_channel_t*) cbdata;
// if the message was not sent we should retry or release the channel resources
if (status!= ORTE_SUCCESS)
{
ORTE_ERROR_LOG (status);
// release channel
if(NULL != channel) {
opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL);
// call QoS module to release the QoS channel object.
orte_qos_close_channel (channel->qos, channel->qos_channel_ptr);
OBJ_RELEASE(channel);
} else {
// we did not accept the request so nothing to do
}
}
// if success then release the buffer and do open channel request completion after receiving response from peer
OBJ_RELEASE(buffer);
}
orte_rml_channel_t * orte_rml_base_get_channel (orte_rml_channel_num_t chan_num) {
orte_rml_channel_t * channel;
channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, chan_num);
if ((NULL != channel) && (orte_rml_channel_open == channel->state))
return channel;
else
return NULL;
return channel;
}
void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel,
orte_rml_send_t *send)
{
// add channel number and notify Qos
send->dst_channel = channel->peer_channel;
orte_qos_send_channel (channel->qos, channel->qos_channel_ptr, send);
}
int orte_rml_base_process_recv_channel (orte_rml_channel_t *channel,
orte_rml_recv_t *recv)
{
// call qos for recv post processing
return (orte_qos_recv_channel (channel->qos, channel->qos_channel_ptr, recv));
}
void orte_rml_close_channel_recv_callback (int status,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
// find the channel and close it or log error
orte_rml_channel_t *channel;
int32_t count =1, rc;
orte_rml_channel_num_t channel_num =5;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_close_channel_recv_callback from peer %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
/* unpack channel number */
if(ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel_num,
&count, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
return;
}
channel = orte_rml_base_get_channel(channel_num);
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_close_channel_recv_callback for channel num =%d channel=%p",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
channel_num, (void*)channel));
if (NULL != channel) {
orte_qos_close_channel ( channel->qos, channel->qos_channel_ptr);
opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL);
OBJ_RELEASE(channel);
} else {
ORTE_ERROR_LOG(OPAL_ERR_BAD_PARAM);
}
}

Просмотреть файл

@ -5,7 +5,7 @@
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel Corporation. All rights reserved.
* Copyright (c) 2014-2016 Intel Corporation. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -36,33 +36,34 @@
* component's public mca_base_component_t struct. */
#include "orte/mca/rml/base/static-components.h"
orte_rml_module_t orte_rml = {0};
/* Initialising stub fns in the global var used by other modules */
orte_rml_base_module_t orte_rml = {
orte_rml_API_enable_comm,
orte_rml_API_finalize,
orte_rml_API_get_contact_info,
orte_rml_API_set_contact_info,
orte_rml_API_ping,
orte_rml_API_send_nb,
orte_rml_API_send_buffer_nb,
orte_rml_API_recv_nb,
orte_rml_API_recv_buffer_nb,
orte_rml_API_recv_cancel,
orte_rml_API_add_exception_handler,
orte_rml_API_del_exception_handler,
orte_rml_API_ft_event,
orte_rml_API_purge
};
orte_rml_base_t orte_rml_base = {{{0}}};
OPAL_TIMING_DECLARE(tm_rml)
orte_rml_component_t *orte_rml_component = NULL;
static bool selected = false;
static char *orte_rml_base_wrapper = NULL;
static int orte_rml_base_register(mca_base_register_flag_t flags)
{
int var_id;
/*
* Which RML Wrapper component to use, if any
* - NULL or "" = No wrapper
* - ow. select that specific wrapper component
*/
orte_rml_base_wrapper = NULL;
var_id = mca_base_var_register("orte", "rml", "base", "wrapper",
"Use a Wrapper component around the selected RML component",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&orte_rml_base_wrapper);
(void) mca_base_var_register_synonym(var_id, "orte", "rml",NULL,"wrapper", 0);
#if OPAL_ENABLE_TIMING
orte_rml_base.timing = false;
(void) mca_base_var_register ("orte", "rml", "base", "timing",
@ -89,6 +90,17 @@ static int orte_rml_base_close(void)
{
bool active;
orte_rml_base_active_t *active_module;
/*close the active modules */
OPAL_LIST_FOREACH(active_module, &orte_rml_base.actives, orte_rml_base_active_t)
{
if (NULL != active_module->module->finalize) {
active_module->module->finalize();
}
}
OPAL_LIST_DESTRUCT(&orte_rml_base.actives)
/* because the RML posted recvs list is in a separate
* async thread for apps, we can't just destruct it here.
* Instead, we push it into that event thread and destruct
@ -107,7 +119,6 @@ static int orte_rml_base_close(void)
}
OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml);
OBJ_DESTRUCT(&orte_rml_base.open_channels);
return mca_base_framework_components_close(&orte_rml_base_framework, NULL);
}
@ -115,13 +126,11 @@ static int orte_rml_base_close(void)
static int orte_rml_base_open(mca_base_open_flag_t flags)
{
/* Initialize globals */
/* construct object for holding the active plugin modules */
OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_base.open_channels, opal_pointer_array_t);
if (OPAL_SUCCESS != opal_pointer_array_init(&orte_rml_base.open_channels, 0,
INT_MAX, 1)) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
OPAL_TIMING_INIT(&tm_rml);
/* Open up all available components */
return mca_base_framework_components_open(&orte_rml_base_framework, flags);
@ -131,18 +140,23 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, rml, "ORTE Run-Time Messaging Layer",
orte_rml_base_register, orte_rml_base_open, orte_rml_base_close,
mca_rml_base_static_components, 0);
OBJ_CLASS_INSTANCE(orte_rml_base_active_t,
opal_list_item_t,
NULL, NULL);
/**
* Function for selecting one component(plugin) from all those that are
* available.
*/
int orte_rml_base_select(void)
{
opal_list_item_t *item, *next;
mca_base_component_list_item_t *cli;
int selected_priority = -1;
orte_rml_component_t *selected_component = NULL;
orte_rml_module_t *selected_module = NULL;
orte_rml_component_t *wrapper_component = NULL;
bool return_silent=false;
mca_base_component_list_item_t *cli=NULL;
mca_base_component_t *component=NULL;
mca_base_module_t *module=NULL;
orte_rml_base_module_t *nmodule;
orte_rml_base_active_t *newmodule, *mod;
int priority;
bool inserted;
if (selected) {
return ORTE_SUCCESS;
@ -150,97 +164,56 @@ int orte_rml_base_select(void)
selected = true;
OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) {
orte_rml_component_t* component;
component = (orte_rml_component_t *) cli->cli_component;
component = (mca_base_component_t *) cli->cli_component;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"orte_rml_base_select: initializing %s component %s",
component->rml_version.mca_type_name,
component->rml_version.mca_component_name);
"orte_rml_base_select: Initializing %s component %s",
component->mca_type_name,
component->mca_component_name);
if (NULL == component->rml_init) {
if (NULL == ((orte_rml_component_t *)component)->rml_init) {
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"orte_rml_base_select: no init function; ignoring component");
"orte_rml_base_select: no init function; ignoring component [%s]",component->mca_component_name);
} else {
int priority = 0;
orte_rml_module_t* module = component->rml_init(&priority);
module = (mca_base_module_t *) ((orte_rml_component_t *)component)->rml_init(&priority);
if (NULL == module) {
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"orte_rml_base_select: init returned failure");
if (priority < 0) {
return_silent = true;
}
"orte_rml_base_select: init returned failure [%s]",component->mca_component_name);
continue;
}
if(NULL != orte_rml_base_wrapper &&
/* If this is a wrapper component then save it for later */
RML_SELECT_WRAPPER_PRIORITY >= priority) {
if( 0 == strncmp(component->rml_version.mca_component_name,
orte_rml_base_wrapper,
strlen(orte_rml_base_wrapper) ) ) {
wrapper_component = component;
/* based on priority add it to the actives list */
nmodule = (orte_rml_base_module_t*) module;
/* add to the list of selected modules */
newmodule = OBJ_NEW(orte_rml_base_active_t);
newmodule->pri = priority;
newmodule->module = nmodule;
newmodule->component = component;
/* maintain priority order */
inserted = false;
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
if (priority > mod->pri) {
opal_list_insert_pos(&orte_rml_base.actives,
(opal_list_item_t*)mod, &newmodule->super);
inserted = true;
break;
}
} else if (priority > selected_priority) {
/* Otherwise this is a normal module and subject to normal selection */
if (NULL != selected_module && NULL != selected_module->finalize) {
selected_module->finalize();
}
selected_priority = priority;
selected_component = component;
selected_module = module;
if (!inserted) {
/* must be lowest priority - add to end */
opal_list_append(&orte_rml_base.actives, &newmodule->super);
}
}
}
if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) {
opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* show the prioritized list */
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
opal_output(0, "\tComponent: %s Priority: %d", mod->component->mca_component_name, mod->pri);
}
}
/*
* Unload all components that were not selected
*/
OPAL_LIST_FOREACH_SAFE(item, next, &orte_rml_base_framework.framework_components, opal_list_item_t) {
mca_base_component_list_item_t *cli = (mca_base_component_list_item_t *) item;
orte_rml_component_t* component = (orte_rml_component_t *) cli->cli_component;
/* Keep it if it is the wrapper component */
if ((component == wrapper_component) || (component == selected_component)) {
continue;
}
/* Not the selected component */
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"orte_rml_base_select: module %s unloaded",
component->rml_version.mca_component_name);
opal_list_remove_item(&orte_rml_base_framework.framework_components, item);
mca_base_component_repository_release((mca_base_component_t *) component);
OBJ_RELEASE(item);
}
/* setup reference to selected module */
if (NULL != selected_module) {
orte_rml = *selected_module;
orte_rml_component = selected_component;
}
/* If a wrapper component was requested then
* Make sure it can switch out the selected module
*/
if( NULL != wrapper_component) {
wrapper_component->rml_init(NULL);
}
if (NULL == selected_component) {
if (return_silent) {
return ORTE_ERR_SILENT;
}
return ORTE_ERROR;
}
/* Post a persistent recieve for open channel request */
orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_OPEN_CHANNEL_REQ,
ORTE_RML_PERSISTENT, orte_rml_open_channel_recv_callback,
NULL);
/* post a persistent recieve for close channel request */
orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_CLOSE_CHANNEL_REQ,
ORTE_RML_PERSISTENT, orte_rml_close_channel_recv_callback,
NULL);
return ORTE_SUCCESS;
}
@ -278,48 +251,16 @@ static void send_cons(orte_rml_send_t *ptr)
ptr->iov = NULL;
ptr->buffer = NULL;
ptr->data = NULL;
ptr->channel = NULL;
ptr->dst_channel = ORTE_RML_INVALID_CHANNEL_NUM;
ptr->seq_num = 0xFFFFFFFF;
}
OBJ_CLASS_INSTANCE(orte_rml_send_t,
opal_list_item_t,
send_cons, NULL);
static void channel_cons(orte_rml_channel_t *ptr)
{
ptr->channel_num = ORTE_RML_INVALID_CHANNEL_NUM;
ptr->qos = NULL;
ptr->qos_channel_ptr = NULL;
ptr->recv = false;
}
OBJ_CLASS_INSTANCE(orte_rml_channel_t,
opal_list_item_t,
channel_cons, NULL);
static void open_channel_cons(orte_rml_open_channel_t *ptr)
{
ptr->cbdata = NULL;
ptr->qos_attributes = NULL;
}
OBJ_CLASS_INSTANCE(orte_rml_open_channel_t,
opal_list_item_t,
open_channel_cons, NULL);
static void close_channel_cons(orte_rml_close_channel_t *ptr)
{
ptr->cbdata = NULL;
ptr->channel = NULL;
}
OBJ_CLASS_INSTANCE(orte_rml_close_channel_t,
opal_list_item_t,
close_channel_cons, NULL);
static void send_req_cons(orte_rml_send_request_t *ptr)
{
OBJ_CONSTRUCT(&ptr->post.send, orte_rml_send_t);
OBJ_CONSTRUCT(&ptr->post.open_channel, orte_rml_open_channel_t);
OBJ_CONSTRUCT(&ptr->send, orte_rml_send_t);
}
OBJ_CLASS_INSTANCE(orte_rml_send_request_t,
opal_object_t,
@ -329,7 +270,6 @@ static void recv_cons(orte_rml_recv_t *ptr)
{
ptr->iov.iov_base = NULL;
ptr->iov.iov_len = 0;
ptr->channel_num = ORTE_RML_INVALID_CHANNEL_NUM;
}
static void recv_des(orte_rml_recv_t *ptr)
{

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -46,7 +46,6 @@
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/qos/base/base.h"
static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all);
@ -143,11 +142,10 @@ void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg)
* to retain ownership of it, so release whatever remains
*/
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output,
"%s message received bytes from %s for tag %d on channel=%d called callback",
"%s message received bytes from %s for tag %d called callback",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&msg->sender),
msg->tag,
msg->channel_num));
msg->tag));
OBJ_DESTRUCT(&buf);
} else {
/* deliver as an iovec */
@ -180,11 +178,10 @@ void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg)
* the message until such a recv is issued
*/
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output,
"%s message received bytes from %s for tag %d on channel=%d Not Matched adding to unmatched msgs",
"%s message received bytes from %s for tag %d Not Matched adding to unmatched msgs",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&msg->sender),
msg->tag,
msg->channel_num));
msg->tag));
opal_list_append(&orte_rml_base.unmatched_msgs, &msg->super);
}
@ -227,29 +224,13 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata)
{
orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata;
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output,
"%s message received from %s for tag %d on channel=%d",
"%s message received from %s for tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&msg->sender),
msg->tag,
msg->channel_num));
msg->tag));
OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes",
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));
if ((ORTE_RML_INVALID_CHANNEL_NUM != msg->channel_num) &&
(NULL != orte_rml_base_get_channel(msg->channel_num) )) {
// call channel for recv post processing
if (ORTE_SUCCESS != (orte_rml_base_process_recv_channel (orte_rml_base_get_channel(msg->channel_num), msg)))
{
/* the qos channel has determined an error so we cannot complete this msg to the caller */
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output,
"%s QoS channel receive error - cannot complete msg on channel=%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
msg->channel_num));
return;
}
}
orte_rml_base_complete_recv_msg(&msg);
}
@ -257,11 +238,10 @@ void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata)
{
orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata;
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output,
"%s reprocessing msg received from %s for tag %d on channel=%d",
"%s reprocessing msg received from %s for tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&msg->sender),
msg->tag,
msg->channel_num));
msg->tag));
OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes",
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));

406
orte/mca/rml/base/rml_base_stubs.c Обычный файл
Просмотреть файл

@ -0,0 +1,406 @@
/*
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel Corporation. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <string.h>
#include "opal/dss/dss.h"
#include "orte/mca/mca.h"
#include "opal/mca/base/mca_base_component_repository.h"
#include "opal/util/output.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/state/state.h"
#include "orte/runtime/orte_wait.h"
#include "orte/util/name_fns.h"
#include "orte/mca/rml/base/base.h"
/*
* The stub API interface functions
*/
/** Enable communication once a process name has been assigned */
int orte_rml_API_enable_comm(void)
{
orte_rml_base_active_t *active, *next;
int rc;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:enable_comm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and let each one enable their comm */
OPAL_LIST_FOREACH_SAFE(active, next, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->enable_comm) {
if (ORTE_SUCCESS != (rc = active->module->enable_comm())) {
opal_output_verbose(2, orte_rml_base_framework.framework_output,
"%s rml:base:enable_comm Component %s was unable to enable comm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
active->component->mca_component_name);
/* remove this component from our actives */
opal_list_remove_item(&orte_rml_base.actives, &active->super);
/* give the module a chance to finalize */
if (NULL != active->module->finalize) {
active->module->finalize();
}
OBJ_RELEASE(active);
}
}
}
/* ensure we still have someone */
if (0 < opal_list_get_size(&orte_rml_base.actives)) {
return ORTE_SUCCESS;
}
return ORTE_ERR_UNREACH;
}
/** Shutdown the communication system and clean up resources */
void orte_rml_API_finalize(void)
{
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:finalize()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and see who can send it */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->finalize) {
active->module->finalize();
}
}
}
/** Get contact information for local process */
char* orte_rml_API_get_contact_info(void)
{
char **rc = NULL, *tmp;
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:get_contact_info()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and see who can send it */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->get_contact_info) {
tmp = active->module->get_contact_info();
if (NULL != tmp) {
opal_argv_append_nosize(&rc, tmp);
free(tmp);
}
}
}
if (NULL != rc) {
tmp = opal_argv_join(rc, ';');
} else {
tmp = NULL;
}
return tmp;
}
/** Set contact information for remote process */
void orte_rml_API_set_contact_info(const char *contact_info)
{
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:set_contact_info()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and let all modules parse the info
* to extract their relevant portions */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->set_contact_info) {
active->module->set_contact_info(contact_info);
}
}
}
/** Ping process for connectivity check */
int orte_rml_API_ping(const char* contact_info,
const struct timeval* tv)
{
int rc = ORTE_ERR_UNREACH;
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:ping()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and see if anyone can confirm connection */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->ping) {
rc = active->module->ping(contact_info, tv);
if (ORTE_SUCCESS == rc) {
/* at least someone can reach this target */
break;
}
}
}
return rc;
}
/** Send non-blocking iovec message */
int orte_rml_API_send_nb(orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
int rc = ORTE_ERR_UNREACH;
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:send_nb() to peer %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
/* cycle thru the actives and see who can send it */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->send_nb) {
rc = active->module->send_nb(peer, msg, count, tag, cbfunc, cbdata);
if (ORTE_SUCCESS == rc) {
/* someone was able to send it */
break;
}
}
}
return rc;
}
/** Send non-blocking buffer message */
int orte_rml_API_send_buffer_nb(orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
int rc = ORTE_ERR_UNREACH;
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:send_buffer_nb()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and see who can send it */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->send_buffer_nb) {
if (ORTE_SUCCESS == (rc = active->module->send_buffer_nb(peer, buffer, tag, cbfunc, cbdata))) {
break;
}
}
}
return rc;
}
/** post a receive for an IOV message */
void orte_rml_API_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_base_active_t *active;
orte_rml_recv_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_recv_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag));
/* cycle thru the actives and give each module a chance
* to do whatever module-specific things they need to do */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->recv_nb) {
active->module->recv_nb(peer, tag, persistent, cbfunc, cbdata);
}
}
/* now push the request into the event base so we can add
* the receive to our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = false;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.iov = cbfunc;
req->post->cbdata = cbdata;
opal_event_set(orte_event_base, &req->ev, -1,
OPAL_EV_WRITE,
orte_rml_base_post_recv, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
}
/** Receive non-blocking buffer message */
void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_base_active_t *active;
orte_rml_recv_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_recv_buffer_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag));
/* cycle thru the actives and give each module a chance
* to do whatever module-specific things they need to do */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->recv_buffer_nb) {
active->module->recv_buffer_nb(peer, tag, persistent, cbfunc, cbdata);
}
}
/* now push the request into the event base so we can add
* the receive to our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.buffer = cbfunc;
req->post->cbdata = cbdata;
opal_event_set(orte_event_base, &req->ev, -1,
OPAL_EV_WRITE,
orte_rml_base_post_recv, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
}
/** Cancel posted non-blocking receive */
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
{
orte_rml_base_active_t *active;
orte_rml_recv_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_recv_cancel for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag));
/* cycle thru the actives and give each module a chance
* to do whatever module-specific things they need to do */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->recv_cancel) {
active->module->recv_cancel(peer,tag);
}
}
/* now push the request into the event base so we can remove
* the receive from our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->cancel = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
opal_event_set(orte_event_base, &req->ev, -1,
OPAL_EV_WRITE,
orte_rml_base_post_recv, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
}
/** Add callback for communication exception */
int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc)
{
int rc = ORTE_ERROR;
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:add_exception_handler()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and see who can send it */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->add_exception_handler) {
if (ORTE_SUCCESS == (rc = active->module->add_exception_handler(cbfunc))) {
break;
}
}
}
return rc;
}
/** Delete callback for communication exception */
int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc)
{
int rc = ORTE_ERROR;
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:del_exception_handler()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and see who can send it */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->del_exception_handler) {
if (ORTE_SUCCESS == (rc = active->module->del_exception_handler(cbfunc))) {
break;
}
}
}
return rc;
}
/** Fault tolerance handler */
int orte_rml_API_ft_event(int state)
{
int rc = ORTE_ERROR;
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:ft_event()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and let them all handle this event */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->ft_event) {
if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) {
break;
}
}
}
return rc;
}
/** Purge information */
void orte_rml_API_purge(orte_process_name_t *peer)
{
orte_rml_base_active_t *active;
OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output,
"%s rml:base:purge() - calling the respective plugin that implements this",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* cycle thru the actives and let everyone purge related info */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->module->purge) {
active->module->purge(peer);
}
}
}

Просмотреть файл

@ -1,45 +0,0 @@
#
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
rml_ftrm.h \
rml_ftrm_component.c \
rml_ftrm_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_rml_ftrm_DSO
component_noinst =
component_install = mca_rml_ftrm.la
else
component_noinst = libmca_rml_ftrm.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_rml_ftrm_la_SOURCES = $(sources)
mca_rml_ftrm_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_rml_ftrm_la_SOURCES = $(sources)
libmca_rml_ftrm_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,28 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2010 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_rml_ftrm_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_orte_rml_ftrm_CONFIG],[
AC_CONFIG_FILES([orte/mca/rml/ftrm/Makefile])
# If we don't want FT, don't compile this component
AS_IF([test "$opal_want_ft_cr" = "1"],
[$1],
[$2])
])dnl

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: ?
status: unmaintained

Просмотреть файл

@ -1,130 +0,0 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* ORTE RML Fault Tolerance Wrapper - Ready Message Protocol (FTRM)
*
* @file
*/
#ifndef MCA_RML_FTRM_H
#define MCA_RML_FTRM_H
#include "orte_config.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
BEGIN_C_DECLS
extern int rml_ftrm_output_handle;
/*
* Component Information
*/
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ftrm_component;
ORTE_MODULE_DECLSPEC extern orte_rml_module_t orte_rml_ftrm_module;
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ftrm_wrapped_component;
ORTE_MODULE_DECLSPEC extern orte_rml_module_t orte_rml_ftrm_wrapped_module;
/*
* Init (Component)
*/
orte_rml_module_t* orte_rml_ftrm_component_init(int *priority);
/*
* Init (Module)
*/
int orte_rml_ftrm_module_enable_comm(void);
/*
* Finalize (Module)
*/
int orte_rml_ftrm_module_finalize(void);
/*
* Get URI
*/
char * orte_rml_ftrm_get_contact_info(void);
/*
* Set URI
*/
void orte_rml_ftrm_set_contact_info(const char* uri);
/*
* Ping
*/
int orte_rml_ftrm_ping(const char* uri, const struct timeval* tv);
/*
* Send Non-blocking
*/
int orte_rml_ftrm_send_nb(orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/*
* Send Buffer Non-blocking
*/
int orte_rml_ftrm_send_buffer_nb(orte_process_name_t* peer,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
/*
* Recv Non-blocking
*/
void orte_rml_ftrm_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/*
* Recv Buffer Non-blocking
*/
void orte_rml_ftrm_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
/*
* Recv Cancel
*/
void orte_rml_ftrm_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
/*
* Register a callback on loss of connection
*/
int orte_rml_ftrm_add_exception_handler(orte_rml_exception_callback_t cbfunc);
int orte_rml_ftrm_del_exception_handler(orte_rml_exception_callback_t cbfunc);
/*
* FT Event
*/
int orte_rml_ftrm_ft_event(int state);
void orte_rml_ftrm_purge(orte_process_name_t *peer);
END_C_DECLS
#endif

Просмотреть файл

@ -1,183 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "orte/mca/rml/base/base.h"
#include "rml_ftrm.h"
static int orte_rml_ftrm_register(void);
static int orte_rml_ftrm_open(void);
static int orte_rml_ftrm_close(void);
/**
* Component definition
*/
orte_rml_component_t mca_rml_ftrm_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.rml_version = {
ORTE_RML_BASE_VERSION_2_0_0,
.mca_component_name = "ftrm",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
.mca_open_component = orte_rml_ftrm_open,
.mca_close_component = orte_rml_ftrm_close,
.mca_register_component_params = orte_rml_ftrm_register,
},
.rml_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.rml_init = orte_rml_ftrm_component_init,
};
orte_rml_module_t orte_rml_ftrm_module = {
orte_rml_ftrm_module_enable_comm,
orte_rml_ftrm_module_finalize,
orte_rml_ftrm_get_contact_info,
orte_rml_ftrm_set_contact_info,
orte_rml_ftrm_ping,
orte_rml_ftrm_send_nb,
orte_rml_ftrm_send_buffer_nb,
orte_rml_ftrm_recv_nb,
orte_rml_ftrm_recv_buffer_nb,
orte_rml_ftrm_recv_cancel,
orte_rml_ftrm_add_exception_handler,
orte_rml_ftrm_del_exception_handler,
orte_rml_ftrm_ft_event,
orte_rml_ftrm_purge,
};
int rml_ftrm_output_handle;
static int ftrm_priority = -1;
static int ftrm_verbosity;
/*
* Initalize the wrapper component
*/
orte_rml_module_t* orte_rml_ftrm_component_init(int* priority)
{
/*
* Asked to return a priority
*/
if( NULL != priority ) {
*priority = ftrm_priority;
return &orte_rml_ftrm_module;
}
/*
* Called a second time to swap module pointers
*/
else {
/* Copy the wrapped versions */
orte_rml_ftrm_wrapped_module = orte_rml;
mca_rml_ftrm_wrapped_component = *orte_rml_component;
/* Replace with ourselves */
orte_rml = orte_rml_ftrm_module;
orte_rml_component = &mca_rml_ftrm_component;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: component_init(): Wrapped Component (%s)",
mca_rml_ftrm_wrapped_component.rml_version.mca_component_name);
return NULL;
}
}
static int orte_rml_ftrm_register(void)
{
#if OPAL_ENABLE_FT_CR != 1
return ORTE_ERR_NOT_AVAILABLE;
#endif
ftrm_priority = RML_SELECT_WRAPPER_PRIORITY;
(void) mca_base_component_var_register(&mca_rml_ftrm_component.rml_version,
"priority",
"Priority of the RML ftrm component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ftrm_priority);
/* Enable this wrapper = RML_SELECT_WRAPPER_PRIORITY
* ow = -1 or never selected
*/
ftrm_verbosity = 0;
(void) mca_base_component_var_register(&mca_rml_ftrm_component.rml_version,
"verbose",
"Verbose level for the RML ftrm component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ftrm_verbosity);
return ORTE_SUCCESS;
}
/*
* Initalize the structures upon opening
*/
static int orte_rml_ftrm_open(void)
{
#if OPAL_ENABLE_FT_CR != 1
return ORTE_ERR_NOT_AVAILABLE;
#endif
/* If there is a custom verbose level for this component than use it
* otherwise take our parents level and output channel
*/
if ( 0 != ftrm_verbosity ) {
rml_ftrm_output_handle = opal_output_open(NULL);
opal_output_set_verbosity(rml_ftrm_output_handle, ftrm_verbosity);
} else {
rml_ftrm_output_handle = -1;
}
opal_output_verbose(10, rml_ftrm_output_handle,
"orte_rml_ftrm: open(): Priority = %d", ftrm_priority);
opal_output_verbose(10, rml_ftrm_output_handle,
"orte_rml_ftrm: open(): Verbosity = %d", ftrm_verbosity);
return ORTE_SUCCESS;
}
/*
* Finalize the remaining structures upon close
*/
static int orte_rml_ftrm_close(void)
{
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,326 +0,0 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/util/name_fns.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rml/rml.h"
#include "rml_ftrm.h"
orte_rml_component_t mca_rml_ftrm_wrapped_component;
orte_rml_module_t orte_rml_ftrm_wrapped_module;
/*
* Init (Module)
*/
int orte_rml_ftrm_module_enable_comm(void)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: module_init(): Normal...");
if( NULL != orte_rml_ftrm_wrapped_module.enable_comm ) {
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.enable_comm() ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* Finalize (Module)
*/
int orte_rml_ftrm_module_finalize(void)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: module_finalize()");
if( NULL != orte_rml_ftrm_wrapped_module.finalize ) {
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.finalize() ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* Get URI
*/
char * orte_rml_ftrm_get_contact_info(void)
{
char * rtn_val = NULL;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: get_uri()");
if( NULL != orte_rml_ftrm_wrapped_module.get_contact_info ) {
rtn_val = orte_rml_ftrm_wrapped_module.get_contact_info();
}
return rtn_val;
}
/*
* Set CONTACT_INFO
*/
void orte_rml_ftrm_set_contact_info(const char* contact_info)
{
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: set_contact_info()");
if( NULL != orte_rml_ftrm_wrapped_module.set_contact_info ) {
orte_rml_ftrm_wrapped_module.set_contact_info(contact_info);
}
}
/*
* Ping
*/
int orte_rml_ftrm_ping(const char* uri, const struct timeval* tv)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: ping()");
if( NULL != orte_rml_ftrm_wrapped_module.ping ) {
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.ping(uri, tv) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* Send Non-blocking
*/
int orte_rml_ftrm_send_nb(orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: send_nb(%s, %d, %d )",
ORTE_NAME_PRINT(peer), count, tag);
if( NULL != orte_rml_ftrm_wrapped_module.send_nb ) {
if(ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send_nb(peer, msg, count, tag, cbfunc, cbdata))) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* Send Buffer Non-blocking
*/
int orte_rml_ftrm_send_buffer_nb(orte_process_name_t* peer,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: send_buffer_nb(%s, %d )",
ORTE_NAME_PRINT(peer), tag);
if( NULL != orte_rml_ftrm_wrapped_module.send_buffer_nb ) {
if(ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send_buffer_nb(peer, buffer, tag, cbfunc, cbdata))) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* Recv Non-blocking
*/
void orte_rml_ftrm_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: recv_nb(%s, %d, %d )",
ORTE_NAME_PRINT(peer), tag, persistent);
if( NULL != orte_rml_ftrm_wrapped_module.recv_nb ) {
orte_rml_ftrm_wrapped_module.recv_nb(peer, tag, persistent, cbfunc, cbdata);
}
}
/*
* Recv Buffer Non-blocking
*/
void orte_rml_ftrm_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: recv_buffer_nb(%s, %d, %d)",
ORTE_NAME_PRINT(peer), tag, persistent);
if( NULL != orte_rml_ftrm_wrapped_module.recv_buffer_nb ) {
orte_rml_ftrm_wrapped_module.recv_buffer_nb(peer, tag, persistent, cbfunc, cbdata);
}
}
/*
* Recv Cancel
*/
void orte_rml_ftrm_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
{
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: recv_cancel()");
if( NULL != orte_rml_ftrm_wrapped_module.recv_cancel ) {
orte_rml_ftrm_wrapped_module.recv_cancel(peer, tag);
}
}
/*
* Register a callback on loss of connection
*/
int orte_rml_ftrm_add_exception_handler(orte_rml_exception_callback_t cbfunc)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: add_exception_handler()");
if( NULL != orte_rml_ftrm_wrapped_module.add_exception_handler ) {
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.add_exception_handler(cbfunc) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
int orte_rml_ftrm_del_exception_handler(orte_rml_exception_callback_t cbfunc)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: del_exception_handler()");
if( NULL != orte_rml_ftrm_wrapped_module.del_exception_handler ) {
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.del_exception_handler(cbfunc) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* FT Event
*/
int orte_rml_ftrm_ft_event(int state)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: ft_event()");
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
/*
* The wrapped component is responsible for calling the OOB modules
*/
if( NULL != orte_rml_ftrm_wrapped_module.ft_event ) {
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.ft_event(state))) {
return ret;
}
}
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return ORTE_SUCCESS;
}
void orte_rml_ftrm_purge(orte_process_name_t *peer)
{
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: purge()");
if( NULL != orte_rml_ftrm_wrapped_module.purge ) {
orte_rml_ftrm_wrapped_module.purge(peer);
}
}

Просмотреть файл

@ -10,6 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -23,7 +24,6 @@ sources = \
rml_oob_contact.c \
rml_oob_exception.c \
rml_oob_ping.c \
rml_oob_recv.c \
rml_oob_send.c
# Make the output library in this directory, and name it either

Просмотреть файл

@ -14,7 +14,7 @@
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 -2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -37,7 +37,7 @@
BEGIN_C_DECLS
typedef struct {
struct orte_rml_module_t super;
struct orte_rml_base_module_t super;
opal_list_t exceptions;
opal_list_t queued_routing_messages;
opal_event_t *timer_event;
@ -48,7 +48,7 @@ ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component;
extern orte_rml_oob_module_t orte_rml_oob_module;
int orte_rml_oob_init(void);
int orte_rml_oob_fini(void);
void orte_rml_oob_fini(void);
int orte_rml_oob_ft_event(int state);
int orte_rml_oob_send_nb(orte_process_name_t* peer,
@ -64,43 +64,6 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
void orte_rml_oob_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
void orte_rml_oob_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
void orte_rml_oob_recv_cancel(orte_process_name_t* peer,
orte_rml_tag_t tag);
int orte_rml_oob_open_channel(orte_process_name_t * peer,
opal_list_t * qos_attributes,
orte_rml_channel_callback_fn_t cbfunc,
void *cbdata);
int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_send_channel_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_send_buffer_channel_nb (orte_rml_channel_num_t channel,
opal_buffer_t *buffer,
orte_rml_tag_t tag,
orte_rml_send_buffer_channel_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_close_channel (orte_rml_channel_num_t channel,
orte_rml_channel_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_ping(const char* uri,
const struct timeval* tv);

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -51,7 +51,7 @@
#include "orte/mca/oob/base/base.h"
#include "rml_oob.h"
static orte_rml_module_t* rml_oob_init(int* priority);
static orte_rml_base_module_t* rml_oob_init(int* priority);
static int rml_oob_open(void);
static int rml_oob_close(void);
@ -80,7 +80,6 @@ orte_rml_component_t mca_rml_oob_component = {
orte_rml_oob_module_t orte_rml_oob_module = {
{
.enable_comm = orte_rml_oob_init,
.finalize = orte_rml_oob_fini,
.get_contact_info = orte_rml_oob_get_uri,
@ -91,20 +90,10 @@ orte_rml_oob_module_t orte_rml_oob_module = {
.send_nb = orte_rml_oob_send_nb,
.send_buffer_nb = orte_rml_oob_send_buffer_nb,
.recv_nb = orte_rml_oob_recv_nb,
.recv_buffer_nb = orte_rml_oob_recv_buffer_nb,
.recv_cancel = orte_rml_oob_recv_cancel,
.add_exception_handler = orte_rml_oob_add_exception,
.del_exception_handler = orte_rml_oob_del_exception,
.ft_event = orte_rml_oob_ft_event,
.purge = orte_rml_oob_purge,
.open_channel = orte_rml_oob_open_channel,
.send_channel_nb = orte_rml_oob_send_channel_nb,
.send_buffer_channel_nb = orte_rml_oob_send_buffer_channel_nb,
.close_channel = orte_rml_oob_close_channel
.purge = orte_rml_oob_purge
}
};
@ -124,7 +113,8 @@ rml_oob_close(void)
return ORTE_SUCCESS;
}
static orte_rml_module_t*
static orte_rml_base_module_t*
rml_oob_init(int* priority)
{
if (init_done) {
@ -150,7 +140,7 @@ orte_rml_oob_init(void)
}
int
void
orte_rml_oob_fini(void)
{
opal_list_item_t *item;
@ -163,8 +153,6 @@ orte_rml_oob_fini(void)
/* clear the base receive */
orte_rml_base_comm_stop();
return ORTE_SUCCESS;
}
#if OPAL_ENABLE_FT_CR == 1

Просмотреть файл

@ -1,99 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/mca/rml/base/base.h"
#include "rml_oob.h"
void orte_rml_oob_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_recv_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_recv_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag));
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = false;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.iov = cbfunc;
req->post->cbdata = cbdata;
opal_event_set(orte_event_base, &req->ev, -1,
OPAL_EV_WRITE,
orte_rml_base_post_recv, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
}
void orte_rml_oob_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_recv_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_recv_buffer_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag));
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.buffer = cbfunc;
req->post->cbdata = cbdata;
opal_event_set(orte_event_base, &req->ev, -1,
OPAL_EV_WRITE,
orte_rml_base_post_recv, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
}
void orte_rml_oob_recv_cancel(orte_process_name_t* peer,
orte_rml_tag_t tag)
{
orte_rml_recv_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_recv_cancel for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag));
req = OBJ_NEW(orte_rml_recv_request_t);
req->cancel = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
opal_event_set(orte_event_base, &req->ev, -1,
OPAL_EV_WRITE,
orte_rml_base_post_recv, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
}

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,7 +34,7 @@
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/rml_types.h"
#include "rml_oob.h"
#include "orte/mca/qos/base/base.h"
typedef struct {
opal_object_t object;
opal_event_t ev;
@ -95,8 +95,8 @@ static void send_self_exe(int fd, short args, void* data)
static void send_msg(int fd, short args, void *cbdata)
{
orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata;
orte_process_name_t *peer = &(req->post.send.dst);
orte_rml_tag_t tag = req->post.send.tag;
orte_process_name_t *peer = &(req->send.dst);
orte_rml_tag_t tag = req->send.tag;
orte_rml_recv_t *rcv;
orte_rml_send_t *snd;
int bytes;
@ -135,16 +135,16 @@ static void send_msg(int fd, short args, void *cbdata)
/* setup the send callback */
xfer = OBJ_NEW(orte_self_send_xfer_t);
if (NULL != req->post.send.iov) {
xfer->iov = req->post.send.iov;
xfer->count = req->post.send.count;
xfer->cbfunc.iov = req->post.send.cbfunc.iov;
if (NULL != req->send.iov) {
xfer->iov = req->send.iov;
xfer->count = req->send.count;
xfer->cbfunc.iov = req->send.cbfunc.iov;
} else {
xfer->buffer = req->post.send.buffer;
xfer->cbfunc.buffer = req->post.send.cbfunc.buffer;
xfer->buffer = req->send.buffer;
xfer->cbfunc.buffer = req->send.cbfunc.buffer;
}
xfer->tag = tag;
xfer->cbdata = req->post.send.cbdata;
xfer->cbdata = req->send.cbdata;
/* setup the event for the send callback */
opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer);
opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI);
@ -154,11 +154,11 @@ static void send_msg(int fd, short args, void *cbdata)
rcv = OBJ_NEW(orte_rml_recv_t);
rcv->sender = *peer;
rcv->tag = tag;
if (NULL != req->post.send.iov) {
if (NULL != req->send.iov) {
/* get the total number of bytes in the iovec array */
bytes = 0;
for (i = 0 ; i < req->post.send.count ; ++i) {
bytes += req->post.send.iov[i].iov_len;
for (i = 0 ; i < req->send.count ; ++i) {
bytes += req->send.iov[i].iov_len;
}
/* get the required memory allocation */
if (0 < bytes) {
@ -166,15 +166,15 @@ static void send_msg(int fd, short args, void *cbdata)
rcv->iov.iov_len = bytes;
/* transfer the bytes */
ptr = (char*)rcv->iov.iov_base;
for (i = 0 ; i < req->post.send.count ; ++i) {
memcpy(ptr, req->post.send.iov[i].iov_base, req->post.send.iov[i].iov_len);
ptr += req->post.send.iov[i].iov_len;
for (i = 0 ; i < req->send.count ; ++i) {
memcpy(ptr, req->send.iov[i].iov_base, req->send.iov[i].iov_len);
ptr += req->send.iov[i].iov_len;
}
}
} else if (0 < req->post.send.buffer->bytes_used) {
rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->post.send.buffer->bytes_used);
memcpy(rcv->iov.iov_base, req->post.send.buffer->base_ptr, req->post.send.buffer->bytes_used);
rcv->iov.iov_len = req->post.send.buffer->bytes_used;
} else if (0 < req->send.buffer->bytes_used) {
rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->send.buffer->bytes_used);
memcpy(rcv->iov.iov_base, req->send.buffer->base_ptr, req->send.buffer->bytes_used);
rcv->iov.iov_len = req->send.buffer->bytes_used;
}
/* post the message for receipt - since the send callback was posted
* first and has the same priority, it will execute first
@ -188,32 +188,21 @@ static void send_msg(int fd, short args, void *cbdata)
snd->dst = *peer;
snd->origin = *ORTE_PROC_MY_NAME;
snd->tag = tag;
if (NULL != req->post.send.iov) {
snd->iov = req->post.send.iov;
snd->count = req->post.send.count;
snd->cbfunc.iov = req->post.send.cbfunc.iov;
if (NULL != req->send.iov) {
snd->iov = req->send.iov;
snd->count = req->send.count;
snd->cbfunc.iov = req->send.cbfunc.iov;
} else {
snd->buffer = req->post.send.buffer;
snd->cbfunc.buffer = req->post.send.cbfunc.buffer;
}
snd->cbdata = req->post.send.cbdata;
snd->channel = req->post.send.channel;
/* call send prep to prep the Qos channel for send */
if (NULL != snd->channel)
{
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s send_msg sending on channel %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), snd->channel->channel_num));
orte_rml_base_prep_send_channel (snd->channel, snd);
snd->buffer = req->send.buffer;
snd->cbfunc.buffer = req->send.cbfunc.buffer;
}
snd->cbdata = req->send.cbdata;
/* activate the OOB send state */
ORTE_OOB_SEND(snd);
OBJ_RELEASE(req);
}
int orte_rml_oob_send_nb(orte_process_name_t* peer,
struct iovec* iov,
int count,
@ -243,12 +232,12 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer,
* race conditions and threads
*/
req = OBJ_NEW(orte_rml_send_request_t);
req->post.send.dst = *peer;
req->post.send.iov = iov;
req->post.send.count = count;
req->post.send.tag = tag;
req->post.send.cbfunc.iov = cbfunc;
req->post.send.cbdata = cbdata;
req->send.dst = *peer;
req->send.iov = iov;
req->send.count = count;
req->send.tag = tag;
req->send.cbfunc.iov = cbfunc;
req->send.cbdata = cbdata;
/* setup the event for the send callback */
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
@ -257,7 +246,6 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer,
return ORTE_SUCCESS;
}
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
@ -286,11 +274,11 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
* race conditions and threads
*/
req = OBJ_NEW(orte_rml_send_request_t);
req->post.send.dst = *peer;
req->post.send.buffer = buffer;
req->post.send.tag = tag;
req->post.send.cbfunc.buffer = cbfunc;
req->post.send.cbdata = cbdata;
req->send.dst = *peer;
req->send.buffer = buffer;
req->send.tag = tag;
req->send.cbfunc.buffer = cbfunc;
req->send.cbdata = cbdata;
/* setup the event for the send callback */
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
@ -298,145 +286,3 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
return ORTE_SUCCESS;
}
int orte_rml_oob_open_channel(orte_process_name_t * peer,
opal_list_t *qos_attributes,
orte_rml_channel_callback_fn_t cbfunc,
void *cbdata)
{
orte_rml_send_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel to peer %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
if( NULL == peer ||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) {
/* cannot send to an invalid peer */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* process the request in an event to be safe */
req = OBJ_NEW(orte_rml_send_request_t);
req->post.open_channel.dst = *peer;
req->post.open_channel.qos_attributes = qos_attributes;
req->post.open_channel.cbfunc = cbfunc;
req->post.open_channel.cbdata = cbdata;
/* setup the event for the open callback */
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, orte_rml_base_open_channel, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_open_channel to peer %s - set event done",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
return ORTE_SUCCESS;
}
int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel_num,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_send_channel_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_send_request_t *req;
orte_rml_channel_t *channel;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_send_buffer to channel %d at tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
channel_num, tag));
if (ORTE_RML_TAG_INVALID == tag) {
/* cannot send to an invalid tag */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
channel = (orte_rml_channel_t*) orte_rml_base_get_channel (channel_num);
if (NULL == channel) {
/* cannot send to a non existing or closed channel */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* get ourselves into an event to protect against
* race conditions and threads
*/
req = OBJ_NEW(orte_rml_send_request_t);
req->post.send.dst = channel->peer;
req->post.send.iov = msg;
req->post.send.count = count;
req->post.send.tag = tag;
req->post.send.cbfunc.iov_chan = cbfunc;
req->post.send.cbdata = cbdata;
req->post.send.channel = channel;
/* setup the event for the send callback */
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
return ORTE_SUCCESS;
}
int orte_rml_oob_send_buffer_channel_nb (orte_rml_channel_num_t channel_num,
opal_buffer_t *buffer,
orte_rml_tag_t tag,
orte_rml_send_buffer_channel_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_send_request_t *req;
orte_rml_channel_t *channel;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_send_buffer to channel %d at tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
channel_num, tag));
if (ORTE_RML_TAG_INVALID == tag) {
/* cannot send to an invalid tag */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
channel = (orte_rml_channel_t*) orte_rml_base_get_channel (channel_num);
if (NULL == channel) {
/* cannot send to a non existing or closed channel */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* get ourselves into an event to protect against
* race conditions and threads
*/
req = OBJ_NEW(orte_rml_send_request_t);
req->post.send.dst = channel->peer;
req->post.send.buffer = buffer;
req->post.send.tag = tag;
req->post.send.cbfunc.buf_chan = cbfunc;
req->post.send.cbdata = cbdata;
req->post.send.channel = channel;
/* setup the event for the send callback */
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
return ORTE_SUCCESS;
}
int orte_rml_oob_close_channel (orte_rml_channel_num_t channel_num,
orte_rml_channel_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_channel_t *channel;
orte_rml_send_request_t *req;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_close_channel channel num %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
channel_num));
channel = orte_rml_base_get_channel (channel_num);
if (NULL == channel)
return ORTE_ERR_BAD_PARAM;
/* process the request in an event to be safe */
req = OBJ_NEW(orte_rml_send_request_t);
req->post.close_channel.channel = channel;
req->post.close_channel.cbfunc = cbfunc;
req->post.close_channel.cbdata = cbdata;
/* setup the event for the open callback */
opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, orte_rml_base_close_channel, req);
opal_event_set_priority(&req->ev, ORTE_MSG_PRI);
opal_event_active(&req->ev, OPAL_EV_WRITE, 1);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
@ -57,7 +57,8 @@ BEGIN_C_DECLS
struct opal_buffer_t;
struct orte_rml_module_t;
struct orte_rml_base_module_t;
struct orte_rml_API_module_t;
typedef struct {
opal_object_t super;
orte_process_name_t name;
@ -109,8 +110,7 @@ ORTE_DECLSPEC void orte_rml_close_channel_recv_callback(int status,
* @retval NULL An error occurred and initialization did not occur
* @retval non-NULL The module was successfully initialized
*/
typedef struct orte_rml_module_t* (*orte_rml_component_init_fn_t)(int *priority);
typedef struct orte_rml_base_module_t* (*orte_rml_component_init_fn_t)(int *priority);
/**
* RML component interface
@ -235,14 +235,8 @@ typedef int (*orte_rml_module_enable_comm_fn_t)(void);
* all resources associated with the module. After the finalize
* function is called, all interface functions (and the module
* structure itself) are not available for use.
*
* @note Whether or not the finalize function returns successfully,
* the module should not be used once this function is called.
*
* @retval ORTE_SUCCESS Success
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_finalize_fn_t)(void);
typedef void (*orte_rml_module_finalize_fn_t)(void);
/**
@ -595,12 +589,8 @@ typedef int (*orte_rml_module_close_channel_fn_t)( orte_rml_channel_num_t channe
/**
* RML module interface
*
* Module interface to the RML communication system. A global
* instance of this module, orte_rml, provices an interface into the
* active RML interface.
*/
struct orte_rml_module_t {
struct orte_rml_base_module_t {
/** Enable communication once a process name has been assigned */
orte_rml_module_enable_comm_fn_t enable_comm;
/** Shutdown the communication system and clean up resources */
@ -638,24 +628,13 @@ struct orte_rml_module_t {
/** Purge information */
orte_rml_module_purge_fn_t purge;
/** Open a qos messaging channel to a peer*/
orte_rml_module_open_channel_fn_t open_channel;
/** send a non blocking iovec message over a channel */
orte_rml_module_send_channel_nb_fn_t send_channel_nb;
/** send a non blocking buffer message over a channel */
orte_rml_module_send_buffer_channel_nb_fn_t send_buffer_channel_nb;
/** close a qos messaging channel */
orte_rml_module_close_channel_fn_t close_channel;
};
/** Convienence typedef */
typedef struct orte_rml_module_t orte_rml_module_t;
/** Convenience typedef */
typedef struct orte_rml_base_module_t orte_rml_base_module_t;
/** Interface for RML communication */
ORTE_DECLSPEC extern orte_rml_module_t orte_rml;
ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml;
/* ******************************************************************** */