diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 4beadbf000..9fe39c7fc9 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -14,7 +14,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,7 +50,6 @@ #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/qos/base/base.h" #include "orte/mca/dfs/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" @@ -394,17 +393,6 @@ int orte_ess_base_orted_setup(char **hosts) /* add our contact info */ proc->rml_uri = orte_rml.get_contact_info(); - /* Messaging QoS Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_select"; - goto error; - } /* select the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index ea60dc72d8..bc10a1ec25 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -11,10 +11,10 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +43,6 @@ #include "orte/mca/oob/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/errmgr/base/base.h" #include "orte/mca/iof/base/base.h" @@ -120,17 +119,6 @@ int orte_ess_base_tool_setup(void) error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_select"; - goto error; - } /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index b125610e03..303e279aa8 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,7 +55,6 @@ #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" @@ -342,16 +341,6 @@ static int rte_init(void) goto error; } - /* Messaging QoS Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { - error = "orte_qos_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { - error = "orte_qos_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { error = "orte_errmgr_base_select"; goto error; diff --git a/orte/mca/oob/oob.h b/orte/mca/oob/oob.h index 584e58268b..6f417c6e17 100644 --- a/orte/mca/oob/oob.h +++ b/orte/mca/oob/oob.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -45,7 +45,7 @@ #include "orte/mca/mca.h" #include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" + BEGIN_C_DECLS typedef int (*mca_oob_base_component_avail_fn_t)(void); diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index 8f9d657e2f..1d996e5c17 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -398,10 +398,10 @@ static void process_send(int fd, short args, void *cbdata) orte_process_name_t hop; opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s:[%s:%d] processing send to peer %s:%d to channel =%d seq_num = %d", + "%s:[%s:%d] processing send to peer %s:%d seq_num = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, - ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag, op->msg->dst_channel, op->msg->seq_num); + ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag, op->msg->seq_num); /* do we have a route to this peer (could be direct)? */ hop = orte_routed.get_route(&op->msg->dst); diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 5be6bcfa01..5bd00385c9 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -704,9 +704,9 @@ static void component_shutdown(void) static int component_send(orte_rml_send_t *msg) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s oob:tcp:send_nb to peer %s:%d to channel=%d seq = %d", + "%s oob:tcp:send_nb to peer %s:%d seq = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&msg->dst), msg->tag,msg->dst_channel, msg->seq_num ); + ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num ); /* the module is potentially running on its own event * base, so all it can do is push our send request @@ -1093,7 +1093,6 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) snd->dst = mop->snd->hdr.dst; snd->origin = mop->snd->hdr.origin; snd->tag = mop->snd->hdr.tag; - snd->dst_channel = mop->snd->hdr.channel; snd->seq_num = mop->snd->hdr.seq_num; snd->data = mop->snd->data; snd->count = mop->snd->hdr.nbytes; diff --git a/orte/mca/oob/tcp/oob_tcp_hdr.h b/orte/mca/oob/tcp/oob_tcp_hdr.h index 057ec2cb68..692a294cbe 100644 --- a/orte/mca/oob/tcp/oob_tcp_hdr.h +++ b/orte/mca/oob/tcp/oob_tcp_hdr.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 -2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -56,8 +56,6 @@ typedef struct { mca_oob_tcp_msg_type_t type; /* the rml tag where this message is headed */ orte_rml_tag_t tag; - /* the rml channel where this message is headed */ - orte_rml_channel_num_t channel; /* the seq number of this message */ uint32_t seq_num; /* number of bytes in message */ diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 35e72a702e..f649cb2e97 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -196,12 +196,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name))); opal_event_del(&peer->send_event); msg->msg->status = rc; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; goto next; @@ -228,12 +223,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), (int)ntohl(msg->hdr.nbytes), peer->sd); msg->msg->status = ORTE_SUCCESS; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; } else if (NULL != msg->msg->data) { @@ -268,12 +258,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), (int)ntohl(msg->hdr.nbytes), peer->sd); msg->msg->status = ORTE_SUCCESS; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; } @@ -290,12 +275,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), peer->sd); opal_event_del(&peer->send_event); msg->msg->status = rc; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; ORTE_FORCED_TERMINATE(1); @@ -570,12 +550,13 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, - "%s DELIVERING TO RML tag = %d channel = %d seq_num = %d", + "%s DELIVERING TO RML tag = %d seq_num = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - peer->recv_msg->hdr.tag, peer->recv_msg->hdr.channel, + peer->recv_msg->hdr.tag, peer->recv_msg->hdr.seq_num); - ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, - peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num, + ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, + peer->recv_msg->hdr.tag, + peer->recv_msg->hdr.seq_num, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); OBJ_RELEASE(peer->recv_msg); @@ -591,7 +572,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) snd->origin = peer->recv_msg->hdr.origin; snd->tag = peer->recv_msg->hdr.tag; snd->data = peer->recv_msg->data; - snd->dst_channel = peer->recv_msg->hdr.channel; snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; snd->cbfunc.iov = NULL; diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index d8ac555b96..03e48303ec 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,18 +107,16 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); mca_oob_tcp_send_t *msg; \ int i; \ opal_output_verbose(5, orte_oob_base_framework.framework_output, \ - "%s:[%s:%d] queue send to %s channel =%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - ORTE_NAME_PRINT(&((m)->dst)), \ - (m)->dst_channel); \ + "%s:[%s:%d] queue send to %s", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + ORTE_NAME_PRINT(&((m)->dst))); \ msg = OBJ_NEW(mca_oob_tcp_send_t); \ /* setup the header */ \ msg->hdr.origin = (m)->origin; \ msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_TCP_USER; \ msg->hdr.tag = (m)->tag; \ - msg->hdr.channel = (m)->dst_channel; \ msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ @@ -163,7 +161,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_TCP_USER; \ msg->hdr.tag = (m)->tag; \ - msg->hdr.channel = (m)->dst_channel; \ msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ diff --git a/orte/mca/qos/Makefile.am b/orte/mca/qos/Makefile.am deleted file mode 100644 index 94a8e9d148..0000000000 --- a/orte/mca/qos/Makefile.am +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_qos.la -libmca_qos_la_SOURCES = - -# pkgdata setup -dist_ortedata_DATA = - -# local files -headers = qos.h -libmca_qos_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ortedir = $(orteincludedir)/$(subdir) -nobase_orte_HEADERS = $(headers) -endif - -include base/Makefile.am - - -distclean-local: - rm -f base/static-components.h diff --git a/orte/mca/qos/ack/Makefile.am b/orte/mca/qos/ack/Makefile.am deleted file mode 100644 index d71b9a6f2c..0000000000 --- a/orte/mca/qos/ack/Makefile.am +++ /dev/null @@ -1,34 +0,0 @@ -# -# Copyright (c) 2015 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - qos_ack.h \ - qos_ack_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_qos_ack_DSO -component_noinst = -component_install = mca_qos_ack.la -else -component_noinst = libmca_qos_ack.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_qos_ack_la_SOURCES = $(sources) -mca_qos_ack_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_qos_ack_la_SOURCES = $(sources) -libmca_qos_ack_la_LDFLAGS = -module -avoid-version - diff --git a/orte/mca/qos/ack/qos_ack.h b/orte/mca/qos/ack/qos_ack.h deleted file mode 100644 index 0b0e8ef276..0000000000 --- a/orte/mca/qos/ack/qos_ack.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * QoS Ack Component interface - * - * - * - */ - -#ifndef MCA_QOS_ACK_H -#define MCA_QOS_ACK_H - -#include "orte_config.h" -#include "orte/mca/qos/qos.h" -#include "orte/mca/qos/base/base.h" -#include "opal/class/opal_hotel.h" - -BEGIN_C_DECLS - -#define QOS_ACK_SEQ_NUM_UNINITIALIZED 0 -#define QOS_ACK_MAX_WINDOW 100 -#define QOS_ACK_MAX_OUTSTANDING_MSGS (QOS_ACK_MAX_WINDOW *2) -/* window timeout in secs - 100 seconds ok? - TO DO: make this a QOS attribute that can be specified by the user */ -#define QOS_ACK_WINDOW_TIMEOUT_IN_SECS 1 -#define ACK_WINDOW_COMPLETE 0 -#define ACK_TIMEOUT 1 -#define ACK_OUT_OF_ORDER 2 -#define ACK_RECV_MISSED_MSG 3 /* received previously missed msgs*/ - -typedef enum { - orte_qos_ack_channel_state_inactive = 0, - orte_qos_ack_channel_state_filling_window = 1, - orte_qos_ack_channel_state_window_completed = 2, - orte_qos_ack_channel_state_awaiting_ack = 3, - orte_qos_ack_channel_state_received_ack = 4, -}orte_qos_ack_channel_state_t ; - -/* Ack Qos channel data structure */ -typedef struct orte_qos_ack_channel { - opal_list_item_t super; - uint32_t channel_num; - // we retain the attributes so we can compare channels - we can get rid of this and compare incoming attributes - // with attributes of interest to this channel type - opal_list_t attributes; - /* size of the message window */ - uint32_t window; - /* window timeout in secs.*/ - uint32_t timeout_secs; - /* retry msg window on ack fail */ - bool retry; - /* seq number of the first msg in the active window */ - uint32_t window_first_seq_num; - /* sequence number of last outgoing msg */ - uint32_t out_msg_seq_num; - /* sequence number of last incoming msg */ - uint32_t in_msg_seq_num; - /* sequence number of the last message acked */ - uint32_t ack_msg_seq_num; - /* ACK outstanding msgs hotel */ - opal_hotel_t outstanding_msgs; - /* array for mapping msg seq num to room num for outgoing msgs in hotels */ - int seq_num_to_room_num[QOS_ACK_MAX_OUTSTANDING_MSGS]; - /* channel state */ - orte_qos_ack_channel_state_t state; - /* window timer event */ - opal_event_t msg_ack_timer_event; -}orte_qos_ack_channel_t; - -OBJ_CLASS_DECLARATION(orte_qos_ack_channel_t); - -extern orte_qos_module_t orte_qos_ack_module; -static inline int orte_qos_ack_channel_get_msg_room (orte_qos_ack_channel_t * ack_chan, - uint32_t seq_num) -{ - return ack_chan->seq_num_to_room_num[(seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)]; -} - -static inline void orte_qos_ack_channel_set_msg_room (orte_qos_ack_channel_t * ack_chan, - uint32_t seq_num, int room_num) -{ - ack_chan->seq_num_to_room_num[(seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)] = room_num; -} - -ORTE_DECLSPEC void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel, - int room_num, void *occupant); -ORTE_DECLSPEC void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata); -ORTE_DECLSPEC void orte_qos_ack_recv_msg_timeout_callback (struct opal_hotel_t *hotel, - int room_num, void *occupant); -END_C_DECLS - -#endif /* MCA_QOS_ACK_H */ diff --git a/orte/mca/qos/ack/qos_ack_component.c b/orte/mca/qos/ack/qos_ack_component.c deleted file mode 100644 index a98929f942..0000000000 --- a/orte/mca/qos/ack/qos_ack_component.c +++ /dev/null @@ -1,718 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/oob/base/base.h" -#include "orte/mca/qos/base/base.h" -#include "orte/mca/qos/qos.h" -#include "qos_ack.h" - -/* ack module functions */ -static int qos_ack_start (void); -static void qos_ack_shutdown (void); -static void* ack_create (opal_list_t *qos_attributes, uint32_t channel_num); -static int ack_open (void *qos_channel, - opal_buffer_t * buf); -static int ack_send ( void *qos_channel, orte_rml_send_t *msg); -static int ack_recv (void *channel, orte_rml_recv_t *msg); -static int ack_close (void * channel); -static int ack_init_recv (void *channel, opal_list_t *attributes); -static int ack_cmp (void *channel, opal_list_t *attributes); -static void ack_send_callback (orte_rml_send_t *msg); - -/* utility functions */ -static inline int send_ack (orte_qos_ack_channel_t * channel, - orte_rml_channel_num_t channel_num, - uint32_t ack_type, - uint32_t last_msg_seq_num); - -void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, - opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata); - -void orte_qos_ack_msg_send_callback ( int status, - orte_process_name_t *peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata); -static inline int process_out_of_order_msg ( orte_qos_ack_channel_t *channel, - orte_rml_recv_t *msg); -/** - * ack module definition - */ -orte_qos_module_t orte_qos_ack_module = { - ack_create, - ack_open, - ack_send, - ack_recv, - ack_close, - ack_init_recv, - ack_cmp, - ack_send_callback -}; - -/** - * component definition - */ -mca_qos_base_component_t mca_qos_ack_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { - MCA_QOS_BASE_VERSION_2_0_0, - - "ack", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - NULL, - NULL, - }, - qos_ack_start, - qos_ack_shutdown, - orte_qos_ack, - { - ack_create, - ack_open, - ack_send, - ack_recv, - ack_close, - ack_init_recv, - ack_cmp, - ack_send_callback - } -}; - -static int qos_ack_start(void) { - orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_MSG_ACK, - ORTE_RML_PERSISTENT, orte_qos_ack_channel_process_ack, - NULL); - /* post a persistent recieve for ACK TAG */ - return ORTE_SUCCESS; -} - -static void qos_ack_shutdown (void) { -} - -static void* ack_create (opal_list_t *qos_attributes, uint32_t channel_num) { - orte_qos_ack_channel_t * ack_chan; - int32_t rc; - uint32_t *type, type_val, *attribute, attribute_val; - type_val = orte_qos_ack; - ack_chan = OBJ_NEW (orte_qos_ack_channel_t); - ack_chan->channel_num = channel_num; - type = &type_val; - attribute = &attribute_val; - /* validate and store ack specific channel attributes */ - /* set channel type */ - if (ORTE_SUCCESS == (rc = orte_set_attribute( &ack_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { - if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&attribute, OPAL_UINT32)) { - if ( QOS_ACK_MAX_WINDOW < (*attribute)) { - ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS); - OBJ_RELEASE(ack_chan); - } - else { - ack_chan->window = *attribute; - if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_WINDOW_SIZE, - ORTE_ATTR_GLOBAL, (void*)attribute, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ack_chan); - } else { - if( orte_get_attribute (qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, (void**)&attribute, OPAL_UINT32)) { - ack_chan->timeout_secs = *attribute; - if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_ACK_NACK_TIMEOUT, - ORTE_ATTR_GLOBAL, (void*)attribute, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ack_chan); - } else { - if( orte_get_attribute (qos_attributes, ORTE_QOS_MSG_RETRY, NULL, OPAL_BOOL)) { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_create created channel = %p window = %d timeout =%d retry = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)ack_chan, - ack_chan->window, - ack_chan->timeout_secs, - ack_chan->retry)); - ack_chan->retry = true; - if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_MSG_RETRY, - ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ack_chan); - } - } else { - ack_chan->retry = false; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_create created channel = %p window = %d timeout =%d retry = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)ack_chan, - ack_chan->window, - ack_chan->timeout_secs, - ack_chan->retry)); - } - } - }else { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ack_chan); - } - } - } - }else - OBJ_RELEASE(ack_chan); - }else { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(ack_chan); - } - return ack_chan; -} - -static int ack_open (void *qos_channel, opal_buffer_t * buf) { - int32_t rc = ORTE_SUCCESS; - uint32_t eviction_timeout; - orte_qos_ack_channel_t *ack_chan; - ack_chan = (orte_qos_ack_channel_t*) (qos_channel); - /* TO DO - need to adjust eviction timeout according to window size - lets keep max time out for the first pass */ - eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000; - /* init outstanding msg hotel */ - opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS, - orte_event_base, eviction_timeout, 0, - orte_qos_ack_msg_ack_timeout_callback); - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_open channel = %p init hotel timeout =%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)ack_chan, eviction_timeout)); - /* set the message window timer event, but don't activate it */ - /*opal_event_set(opal_event_base, - &ack_chan->msg_window_timer_event, - -1, 0, orte_qos_ack_msg_window_timeout_callback, - ack_chan); - opal_event_set_priority(&ack_chan->msg_window_timer_event, ORTE_MSG_PRI);*/ - /* the Qos module puts the non local attributes to be sent to the peer in a list at the time of create. - pack those attributes into the buffer.*/ - if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buf, &ack_chan->attributes))) - ORTE_ERROR_LOG(rc); - return rc; -} - -static int ack_send ( void *qos_channel, orte_rml_send_t *msg) { - int32_t room_num; - orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) (qos_channel); - if (ack_chan->out_msg_seq_num == ack_chan->window_first_seq_num -1 ) { - /* begining msg window */ - ack_chan->out_msg_seq_num = ack_chan->window_first_seq_num; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_send msg = %p to peer = %s\n begining window at seq_num = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, ORTE_NAME_PRINT(&msg->dst), ack_chan->out_msg_seq_num)); - ack_chan->state = orte_qos_ack_channel_state_filling_window; - } - else - ack_chan->out_msg_seq_num++; - if(ack_chan->out_msg_seq_num - ack_chan->window_first_seq_num == ack_chan->window - 1) { - /* we are at the end of the window. */ - /* update state */ - ack_chan->state = orte_qos_ack_channel_state_window_completed; - /* set begin window for next sequence */ - ack_chan->window_first_seq_num = ack_chan->out_msg_seq_num + 1; - } - msg->seq_num = ack_chan->out_msg_seq_num; - /* check msg into hotel */ - if( OPAL_SUCCESS == (opal_hotel_checkin(&ack_chan->outstanding_msgs, msg, &room_num ))) { - /* store room number */ - orte_qos_ack_channel_set_msg_room(ack_chan, msg->seq_num, room_num); - } else { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_send msg = %p to peer = %s returned with error %d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, ORTE_NAME_PRINT(&msg->dst), - ORTE_ERR_QOS_ACK_WINDOW_FULL)); - return ORTE_ERR_QOS_ACK_WINDOW_FULL; - } - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_send msg = %p to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, ORTE_NAME_PRINT(&msg->dst))); - return ORTE_SUCCESS; -} - -static inline int send_ack (orte_qos_ack_channel_t * ack_chan, - orte_rml_channel_num_t channel_num, - uint32_t ack_type, uint32_t last_msg_seq_num) -{ - int rc; - orte_rml_channel_t *rml_channel; - opal_buffer_t *buffer; - uint32_t num_msgs_to_ack = 0; - uint32_t *ack_seq_num_array; - uint32_t i; - rml_channel = orte_rml_base_get_channel (channel_num); - num_msgs_to_ack = ack_chan->in_msg_seq_num - ack_chan->ack_msg_seq_num + 1; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s sending ack type = %d \n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ack_type)); - if ( NULL != (ack_seq_num_array = malloc (sizeof(uint32_t) * num_msgs_to_ack))) { - for (i = 1; i <= num_msgs_to_ack ; i++) { - ack_seq_num_array[i-1] = ack_chan->ack_msg_seq_num + i; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv acking msg %d to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ack_seq_num_array[i-1], - ORTE_NAME_PRINT(&rml_channel->peer))); - } - ack_seq_num_array[num_msgs_to_ack - 1] = last_msg_seq_num; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv acking last msg %d to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ack_seq_num_array[num_msgs_to_ack - 1], - ORTE_NAME_PRINT(&rml_channel->peer))); - } - else { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv cannot allocate ack array to send ack to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&rml_channel->peer))); - rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE; - return rc; - } - buffer = OBJ_NEW (opal_buffer_t); - /* pack channel number */ - opal_dss.pack (buffer, &rml_channel->peer_channel, 1, OPAL_UINT32); - /* pack ack type */ - opal_dss.pack (buffer, &ack_type, 1, OPAL_UINT32); - /* pack num messages */ - opal_dss.pack (buffer, &num_msgs_to_ack, 1, OPAL_UINT32); - /* pack seq number array */ - for (i =0; ipeer, buffer, ORTE_RML_TAG_MSG_ACK, - orte_qos_ack_msg_send_callback, rml_channel); - if(ORTE_SUCCESS == rc) { - /* update last acked msg */ - ack_chan->ack_msg_seq_num = last_msg_seq_num; - } else { - //TO DO - } - return rc; -} - -static inline int process_out_of_order_msg ( orte_qos_ack_channel_t *ack_chan, - orte_rml_recv_t *msg) -{ - int32_t rc, room_num, first_lost_msg_seq_num, num_lost_msgs, i; - orte_rml_recv_t *out_msg; - void *occupant = NULL; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s process_out_of_order_msg msg %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - msg->seq_num)); - /* if this msg is a duplicate - then do nothing */ - if ((orte_qos_ack_channel_get_msg_room(ack_chan, msg->seq_num)) != -1) { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s process_out_of_order_msg msg %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - msg->seq_num)); - rc = ORTE_ERR_DUPLICATE_MSG; - } - else { - if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)msg, &room_num))) { - return rc; - } - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "process_out_of_order_msg checked in msg %d in room %d\n", - msg->seq_num, room_num)); - orte_qos_ack_channel_set_msg_room (ack_chan, msg->seq_num, room_num); - rc = ORTE_ERR_OUT_OF_ORDER_MSG; - /* check if we need to send an ACK */ - if (ack_chan->ack_msg_seq_num <= ack_chan->in_msg_seq_num) { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s process_out_of_order_msg sending ack last seq_num = %d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - msg->seq_num)); - /* send ACK. */ - send_ack (ack_chan, msg->channel_num, ACK_OUT_OF_ORDER, msg->seq_num); - /* stop window ack timer */ - opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); - } - else { - /* if we got a lost msg - any seq num between in_msg_seq_num and ack_seq_num*/ - if (ack_chan->ack_msg_seq_num > msg->seq_num) { - /* check if we have got all lost msgs */ - first_lost_msg_seq_num = ack_chan->in_msg_seq_num + 1; - num_lost_msgs = ack_chan->ack_msg_seq_num - ack_chan->in_msg_seq_num; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s process_out_of_order_msg msg %d first_lost_msg =%d num_lost_msgs =%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - msg->seq_num, first_lost_msg_seq_num, num_lost_msgs)); - for (i =0 ; i < num_lost_msgs; i++) { - if ((orte_qos_ack_channel_get_msg_room(ack_chan, first_lost_msg_seq_num +i)) == -1) - break; - } - if (i == num_lost_msgs) { - - /* we got all the lost msgs so we can complete all the msgs in the hotel now */ - /* reset ack_seq_num */ - ack_chan->ack_msg_seq_num = first_lost_msg_seq_num -1; - room_num = 0; - for ( i = 0; room_num != -1; i++) { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s process_out_of_order_msg got all lost msgs completing outstanding msgs %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (first_lost_msg_seq_num + i))); - /* evict msg and complete it */ - room_num = orte_qos_ack_channel_get_msg_room (ack_chan, first_lost_msg_seq_num +i); - opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); - orte_qos_ack_channel_set_msg_room(ack_chan, first_lost_msg_seq_num +i, -1); - out_msg = (orte_rml_recv_t *) occupant; - if ((NULL != out_msg) && (room_num != -1)) { - // set in seq num */ - ack_chan->in_msg_seq_num = out_msg->seq_num; - orte_rml_base_complete_recv_msg(&out_msg); - /* completing recv msg to rml */ - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "process_out_of_order_msg completed recv msg %d", - (first_lost_msg_seq_num + i))); - } else { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s process_out_of_order_msg lost msg %d not in hotel", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (first_lost_msg_seq_num + i))); - } - } //end for - /* send ACK */ - send_ack (ack_chan, ack_chan->channel_num, ACK_RECV_MISSED_MSG, - ack_chan->in_msg_seq_num); - } //end if (i== num_lost_msgs) - } // if (ack_chan->ack_msg_seq_num > msg->seq_num) - } //end else - } // end duplicate else - return rc; -} - -static int ack_recv (void *qos_channel, orte_rml_recv_t *msg) { - orte_qos_ack_channel_t *ack_chan; - ack_chan = (orte_qos_ack_channel_t*) (qos_channel); - int32_t rc; - struct timeval ack_timeout; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv msg = %p seq_num = %d from peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, msg->seq_num, - ORTE_NAME_PRINT(&msg->sender))); - /** HACK - drop every third msg to stimulate lost msg */ - /* if ((msg->seq_num == 3) && (hack == 0)) { - OBJ_RELEASE(msg); - hack = 1; - return ORTE_ERROR; - }*/ - /* check if this is the next expected msg*/ - if((ack_chan->in_msg_seq_num + 1 == msg->seq_num) && (ack_chan->ack_msg_seq_num < msg->seq_num)) - { - /* check if we are at the end of the window */ - if(ack_chan->window == (msg->seq_num - ack_chan->ack_msg_seq_num)) { - /* stop window ack timer */ - opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); - rc = send_ack (ack_chan, msg->channel_num, ACK_WINDOW_COMPLETE, msg->seq_num); - } else { - if(ack_chan->in_msg_seq_num == ack_chan->ack_msg_seq_num) { - /* begining window -start window ack timer */ - ack_timeout.tv_sec = ack_chan->timeout_secs; - ack_timeout.tv_usec = 0; - opal_event_evtimer_add (&ack_chan->msg_ack_timer_event, &ack_timeout); - } - rc = ORTE_SUCCESS; - } - ack_chan->in_msg_seq_num = msg->seq_num; - } - else { - rc = process_out_of_order_msg(ack_chan, msg); - } - return rc; -} - -static int ack_close (void * channel) { - int32_t rc = ORTE_SUCCESS; - orte_qos_ack_channel_t *ack_chan; - ack_chan = (orte_qos_ack_channel_t*) (channel); - /* check if channel is busy (no outstanding msgs */ - if (opal_hotel_is_empty (&ack_chan->outstanding_msgs)) { - /* no outstanding msgs, release channel */ - OBJ_RELEASE(ack_chan); - rc = ORTE_SUCCESS; - } else - rc = ORTE_ERR_CHANNEL_BUSY; - return rc; -} - -static int ack_init_recv (void *channel, opal_list_t *attributes) { - int32_t rc = ORTE_SUCCESS; - uint32_t eviction_timeout; - orte_qos_ack_channel_t *ack_chan; - ack_chan = (orte_qos_ack_channel_t*) channel; - /* TO DO - need to adjust eviction timeout according to window size - lets keep max time out for the first pass */ - eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000; - /* init outstanding msg hotel */ - opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS, - orte_event_base, eviction_timeout, 0, - orte_qos_ack_recv_msg_timeout_callback); - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_open channel = %p init hotel timeout =%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)ack_chan, eviction_timeout)); - opal_event_evtimer_set (orte_event_base, &ack_chan->msg_ack_timer_event, - orte_qos_ack_msg_window_timeout_callback, (void *) ack_chan); - return rc; -} - -static int ack_cmp (void *channel, opal_list_t *attributes) { - return false; - -} - -static void ack_send_callback (orte_rml_send_t *msg) -{ - /* complete the request back to the user only upon receiving the ack - nothing to do here, just make sure that the request is in the hotel */ - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_send_callback for msg = %p seq num =%d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, msg->seq_num)); - /* if msg->status != SUCCESS - then evict all messages in the window and - complete them?? */ - if(ORTE_SUCCESS == msg->status) { -#if OPAL_ENABLE_DEBUG - orte_qos_ack_channel_t *ack_chan; - ack_chan = (orte_qos_ack_channel_t *) msg->channel->qos_channel_ptr; -#endif - // nothing to do - assert((orte_qos_ack_channel_get_msg_room(ack_chan, msg->seq_num)) != -1); - } else { - // TO DO : error handling - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_send_callback for msg = %p seq num =%d SEND FAILED status = %d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, msg->seq_num, msg->status)); - /* evict message from hotel and send end of window to receiver?? */ - - } -} - -void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel, - int room_num, void *occupant) -{ - orte_rml_send_t *msg; - orte_qos_ack_channel_t *ack_chan; - msg = (orte_rml_send_t *) occupant; - ack_chan = (orte_qos_ack_channel_t*) msg->channel->qos_channel_ptr; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s orte_qos_ack_msg_ack_timeout_callback for msg = %p seq num =%d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, msg->seq_num)); - /* for now complete only the msg that timed out - TO DO : handle the completion of all messages in the window */ - msg->status = ORTE_ERR_ACK_TIMEOUT_SENDER; - // set room num to -1 for the msg's seq number - orte_qos_ack_channel_set_msg_room (ack_chan, msg->seq_num , -1); - // complete the msg - ORTE_RML_SEND_COMPLETE(msg); -} - -void orte_qos_ack_recv_msg_timeout_callback (struct opal_hotel_t *hotel, - int room_num, void *occupant) -{ -#if OPAL_ENABLE_DEBUG - orte_rml_recv_t *msg = (orte_rml_recv_t *) occupant; -#endif -#if 0 - orte_qos_ack_channel_t *ack_chan; - orte_rml_channel_t *channel; - - channel = orte_rml_base_get_channel(msg->channel_num); - ack_chan = (orte_qos_ack_channel_t*) channel->qos_channel_ptr; -#endif - - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s OOPS received msg = %p seq num =%d timed out on ACK Queue\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, msg->seq_num)); - /* Need to determine correct action here as the sender hasn't responded yet to - a lost msg event */ - /* This is highly unlikely - lets assert to enable debug*/ - assert(0); - /* - // set room num to -1 for the msg's seq number - ack_chan->seq_num_to_room_num[msg->seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS] = -1; - // complete the msg - ORTE_RML_REACTIVATE_MESSAGE(msg);*/ -} - -void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata) -{ - /* process ack received for the msg */ - uint32_t num_msgs_acked, channel_num, i; - int32_t num_values, room_num; - orte_rml_send_t *msg, *missed_msg; - void *occupant = NULL; - orte_rml_channel_t *channel; - orte_qos_ack_channel_t *ack_chan; - uint32_t *seq_num_array; - uint32_t ack_type; - uint32_t missed_msg_seq_num = 0; - num_values = 1; - /* unpack channel number first */ - opal_dss.unpack(buffer, (void*) &channel_num, &num_values, OPAL_UINT32); - OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, - "orte_qos_ack_channel_process_ack recieved ack on channel = %d", - channel_num)); - channel = orte_rml_base_get_channel (channel_num); - if ((NULL != channel) || (NULL != channel->qos_channel_ptr)) { - ack_chan = (orte_qos_ack_channel_t *) (channel->qos_channel_ptr); - seq_num_array = malloc (sizeof(uint32_t) * ack_chan->window); - num_values = 1; - /* unpack ack type */ - opal_dss.unpack(buffer, (void*) &ack_type, &num_values, OPAL_UINT32); - num_values = 1; - /* unpack num messages acked */ - opal_dss.unpack(buffer, (void*) &num_msgs_acked, &num_values, OPAL_UINT32); - OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, - "orte_qos_ack_channel_process_ack recieved ack type %d for %d msgs on channel = %d", - ack_type, num_msgs_acked, channel_num)); - if (ACK_OUT_OF_ORDER != ack_type) { - //handle normal ACK - for (i = 0; i < num_msgs_acked; i++) - { - opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); - room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); - opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); - orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); - if((occupant != NULL) && (room_num != -1)) { - msg = (orte_rml_send_t*) occupant; - OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, - "Releasing sent message with tag %d and seq_num %d after receiving Ack from dest ", - msg->tag, msg->seq_num )); - msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(msg); - } else { - OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, - "OOPS received an ACK for already completed seq_num =%d ", - seq_num_array[i] )); - } - } - } else { - // handle out of order ACK - complete msgs received in order, retry the lost msg. - for (i = 0; i < num_msgs_acked; i++) - { - opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); - room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); - opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); - orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); - if ((NULL != occupant) && ((i == 0 )|| (seq_num_array[i] == seq_num_array[i-1] +1 ))) { - msg = (orte_rml_send_t*) occupant; - msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(msg); - } else { - if (NULL != occupant) { - // num_missed_msgs = (seq_num_array[i] - seq_num_array [i-1] - 1); - assert( i == num_msgs_acked -1); - /* recheck the ith msg */ - opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)occupant, &room_num); - orte_qos_ack_channel_set_msg_room (ack_chan, seq_num_array[i], room_num); - /* resend and recheck all the missed msgs*/ - missed_msg_seq_num = seq_num_array[i-1] + 1; - for (; missed_msg_seq_num < seq_num_array[i]; missed_msg_seq_num++) { - room_num = orte_qos_ack_channel_get_msg_room (ack_chan, missed_msg_seq_num); - opal_hotel_checkout_and_return_occupant (&ack_chan->outstanding_msgs, room_num, &occupant); - assert ( NULL != occupant); - missed_msg = (orte_rml_send_t*) occupant; - missed_msg->status = ORTE_ERR_LOST_MSG_IN_WINDOW; - opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)missed_msg, &room_num); - orte_qos_ack_channel_set_msg_room (ack_chan, missed_msg_seq_num, room_num); - /* send this out on wire directly */ - ORTE_OOB_SEND (missed_msg); - } //end for - } else { - OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, - "OOPS received an ACK for already completed seq_num =%d ", - seq_num_array[i] )); - }//end if (NULL != occupant) - } //end else - } // end for - }//end out of order ack processing - free(seq_num_array); - }else { - OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, - "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on non existent channel = %d", - channel_num)); - } -} - - -void orte_qos_ack_msg_send_callback ( int status, - orte_process_name_t *peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ -#if OPAL_ENABLE_DEBUG - orte_rml_channel_t *channel = (orte_rml_channel_t*) cbdata; -#endif - OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output, - " orte_qos_ack_msg_send_callback channel num =%d status =%d", - channel->channel_num, status)); -} - -void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata) -{ - // int32_t rc; - orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) cbdata; - OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output, - " orte_qos_ack_msg_window_timeout_callback for channel = %p last acked seq num = %d, last received seq num =%d", - (void*)ack_chan, ack_chan->ack_msg_seq_num, ack_chan->in_msg_seq_num )); - /* send ack message */ - send_ack(ack_chan, ack_chan->channel_num, ACK_TIMEOUT, ack_chan->in_msg_seq_num); - -} - - - -/*** ACK QOS CLASS INSTANCES ***/ - -static void channel_cons (orte_qos_ack_channel_t *ptr) -{ - int i; - OBJ_CONSTRUCT (&ptr->attributes, opal_list_t); - ptr->out_msg_seq_num = 0; - ptr->window_first_seq_num = 1; - ptr->in_msg_seq_num = 0; - ptr->ack_msg_seq_num = 0; - /* init seq num to room num array to -1 */ - for (i =0; i< QOS_ACK_MAX_OUTSTANDING_MSGS; i++) - ptr->seq_num_to_room_num[i] = -1; - OBJ_CONSTRUCT (&ptr->outstanding_msgs, opal_hotel_t); - ptr->state = orte_qos_ack_channel_state_inactive; -} -static void channel_des (orte_qos_ack_channel_t *ptr) -{ - // OPAL_LIST_DESTRUCT(&ptr->attributes); - //OBJ_DESTRUCT (&ptr->outstanding_msgs); -} -OBJ_CLASS_INSTANCE (orte_qos_ack_channel_t, - opal_list_item_t, - channel_cons, channel_des); diff --git a/orte/mca/qos/base/Makefile.am b/orte/mca/qos/base/Makefile.am deleted file mode 100644 index 5e241cc154..0000000000 --- a/orte/mca/qos/base/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ortedata_DATA += base/help-qos-base.txt - -headers += \ - base/base.h - -libmca_qos_la_SOURCES += \ - base/qos_base_frame.c \ - base/qos_base_select.c \ - base/qos_base_channel_handlers.c diff --git a/orte/mca/qos/base/base.h b/orte/mca/qos/base/base.h deleted file mode 100644 index d0918b5338..0000000000 --- a/orte/mca/qos/base/base.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * QoS Framework maintenence interface - * - * - * - */ - -#ifndef MCA_QOS_BASE_H -#define MCA_QOS_BASE_H - -#include "orte_config.h" -#include "orte/mca/qos/qos.h" -#include "orte/mca/rml/base/base.h" -#include "opal/class/opal_list.h" - - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_qos_base_framework; -/* select a component */ -ORTE_DECLSPEC int orte_qos_base_select(void); - -/* a global struct containing framework-level values */ -typedef struct { - opal_list_t open_channels; - opal_pointer_array_t actives; -#if OPAL_ENABLE_TIMING - bool timing; -#endif -} orte_qos_base_t; -ORTE_DECLSPEC extern orte_qos_base_t orte_qos_base; - -#define ORTE_QOS_MAX_WINDOW_SIZE 1000 - -typedef struct orte_qos_base_channel { - opal_list_item_t super; - uint32_t channel_num; - opal_list_t attributes; -} orte_qos_base_channel_t; -OBJ_CLASS_DECLARATION(orte_qos_base_channel_t); - -/* common implementations */ -ORTE_DECLSPEC void* orte_qos_get_module ( opal_list_t *qos_attributes); -int orte_qos_base_pack_attributes (opal_buffer_t * buffer, opal_list_t * qos_attributes); - -#define ORTE_QOS_SEND_COMPLETE(m) \ - do { \ - orte_qos_module_t *mod; \ - opal_output_verbose(5, orte_qos_base_framework.framework_output, \ - "%s-%s Send message complete at %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - ORTE_NAME_PRINT(&((m)->dst)), \ - __FILE__, __LINE__); \ - mod = (orte_qos_module_t*) m->channel->qos; \ - if (NULL != mod) \ - mod->send_callback(m); \ - else \ - ORTE_RML_SEND_COMPLETE(m); \ - } while(0); - -END_C_DECLS - -#endif /* MCA_QOS_BASE_H */ diff --git a/orte/mca/qos/base/help-qos-base.txt b/orte/mca/qos/base/help-qos-base.txt deleted file mode 100644 index dbf3c8ccd8..0000000000 --- a/orte/mca/qos/base/help-qos-base.txt +++ /dev/null @@ -1,12 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2014 Intel, Inc. All rights reserved -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# -[no-qos-avail] -No Qos protocols available. diff --git a/orte/mca/qos/base/qos_base_channel_handlers.c b/orte/mca/qos/base/qos_base_channel_handlers.c deleted file mode 100644 index 51e8afa183..0000000000 --- a/orte/mca/qos/base/qos_base_channel_handlers.c +++ /dev/null @@ -1,163 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * qos_base_channel_handlers.c - contains base functions handlers for open, send and close channel requests. - */ - -/* - * includes - */ -#include "orte_config.h" - -#include - -#include "orte/constants.h" -#include "orte/types.h" - -#include "opal/dss/dss.h" -#include "opal/util/output.h" -#include "opal/util/timings.h" -#include "opal/class/opal_list.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/qos/qos.h" -#include "orte/mca/qos/base/base.h" -#include "orte/mca/rml/base/base.h" - - -int orte_qos_base_pack_attributes (opal_buffer_t * buffer, - opal_list_t * qos_attributes) -{ - int32_t num_attributes; - int32_t rc= ORTE_SUCCESS; - orte_attribute_t *kv; - num_attributes = opal_list_get_size (qos_attributes); - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s orte_qos_base_pack_attributes num_attributes = %d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - num_attributes)); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)(&num_attributes), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG (rc); - return rc; - } - OPAL_LIST_FOREACH(kv, qos_attributes, orte_attribute_t) { - if (ORTE_ATTR_GLOBAL == kv->local) { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s orte_qos_base_pack_attributes attribute key = %d value =%d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - kv->key, kv->data.uint8)); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - return rc; -} - -void* orte_qos_get_module (opal_list_t *qos_attributes) -{ - int32_t * type, type_val =0; - mca_qos_base_component_t *qos_comp; - type = &type_val; - if(!orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) - return NULL; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s orte_qos_get_module channel type = %d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - type_val)); - //check if type is valid - if (type_val < 0 || ORTE_QOS_MAX_COMPONENTS <= type_val) - return NULL; - // associate the qos module - qos_comp = (mca_qos_base_component_t *) opal_pointer_array_get_item(&orte_qos_base.actives, type_val); - if (NULL != qos_comp) - { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s qos_base_get_module returning qos module %p type =%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)&qos_comp->mod, type_val)); - return (void*)(&qos_comp->mod); - } else { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s qos_base_get_module failed to get qos component of type =%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - type_val)); - } - return NULL; -} - -void * orte_qos_create_channel (void *qos_mod, opal_list_t *qos_attributes, uint32_t channel_num) { - orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if (NULL != qos) - return qos->create(qos_attributes, channel_num); - else - ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); - return NULL; -} - -int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buffer) { - orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if (NULL != qos) - return (qos->open (qos_channel, buffer)); - else - ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; -} - -int orte_qos_close_channel (void *qos_mod, void *qos_channel) { - orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if ((NULL != qos) && (NULL != qos_channel)) - return (qos->close (qos_channel)); - else - ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); - return (ORTE_ERR_BAD_PARAM); -} - -void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t * qos_attributes) { - orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if (NULL != qos) - qos->init_recv (qos_channel, qos_attributes); - else - ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); -} - -int orte_qos_cmp_channel (void *qos_mod, void *qos_channel, opal_list_t * qos_attributes) { - orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if (NULL != qos) - return (qos->cmp (qos_channel, qos_attributes)); - ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); - return -1; -} - -int orte_qos_send_channel (void *qos_mod, void *qos_channel, orte_rml_send_t *msg) { - orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if (NULL != qos) - return(qos->send (qos_channel, msg)); - else - ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); - return ORTE_ERROR; -} - -int orte_qos_recv_channel (void *qos_mod, void *qos_channel, orte_rml_recv_t *msg) { - orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if (NULL != qos) - return(qos->recv(qos_channel, msg)); - else { - ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); - return ORTE_ERROR; - } -} - - diff --git a/orte/mca/qos/base/qos_base_frame.c b/orte/mca/qos/base/qos_base_frame.c deleted file mode 100644 index 620a15f492..0000000000 --- a/orte/mca/qos/base/qos_base_frame.c +++ /dev/null @@ -1,120 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/class/opal_bitmap.h" -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" -#include "orte/mca/qos/qos.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/state/state.h" -#endif - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/qos/base/static-components.h" - -/* - * Global variables - */ -orte_qos_base_t orte_qos_base = {{{0}}}; -OPAL_TIMING_DECLARE(tm_qos) - -static int orte_qos_base_register(mca_base_register_flag_t flags) -{ -#if OPAL_ENABLE_TIMING - /* Detailed timing setup */ - orte_qos_base.timing = false; - (void) mca_base_var_register ("orte", "qos", "base", "timing", - "Enable QOS timings", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &orte_qos_base.timing); -#endif - return ORTE_SUCCESS; -} - -static int orte_qos_base_close(void) -{ - - - /* shutdown all active transports */ - /*while (NULL != (cli = (mca_base_component_list_item_t *) opal_list_remove_first (&orte_qos_base.actives))) { - component = (mca_qos_base_component_t*)cli->cli_component; - if (NULL != component->shutdown) { - component->shutdown(); - } - OBJ_RELEASE(cli); - }*/ - // TO DO - - /* destruct our internal lists */ - OBJ_DESTRUCT(&orte_qos_base.actives); - OPAL_TIMING_EVENT((&tm_qos, "Finish")); - OPAL_TIMING_REPORT(orte_qos_base.timing, &tm_qos); - - return mca_base_framework_components_close(&orte_qos_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, - * or the one that was specifically requested via a MCA parameter. - */ -static int orte_qos_base_open(mca_base_open_flag_t flags) -{ - /* setup globals */ - OBJ_CONSTRUCT(&orte_qos_base.actives, opal_pointer_array_t); - opal_pointer_array_init(&orte_qos_base.actives, ORTE_QOS_MAX_COMPONENTS, INT_MAX, 1); - -/* -#if OPAL_ENABLE_FT_CR == 1 - - orte_state.add_job_state(ORTE_JOB_STATE_FT_CHECKPOINT, orte_qos_base_ft_event, ORTE_ERROR_PRI); - orte_state.add_job_state(ORTE_JOB_STATE_FT_CONTINUE, orte_qos_base_ft_event, ORTE_ERROR_PRI); - orte_state.add_job_state(ORTE_JOB_STATE_FT_RESTART, orte_qos_base_ft_event, ORTE_ERROR_PRI); -#endif*/ - - OPAL_TIMING_INIT(&tm_qos); - - /* Open up all available components */ - return mca_base_framework_components_open(&orte_qos_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, qos, "Messaging Quality of Service Subsystem", - orte_qos_base_register, orte_qos_base_open, orte_qos_base_close, - mca_qos_base_static_components, 0); - -/*** QOS CLASS INSTANCES ***/ - -static void channel_cons (orte_qos_base_channel_t *ptr) -{ - OBJ_CONSTRUCT(&ptr->attributes, opal_list_t); -} -static void channel_des (orte_qos_base_channel_t *ptr) -{ - OPAL_LIST_DESTRUCT(&ptr->attributes); -} -OBJ_CLASS_INSTANCE (orte_qos_base_channel_t, - opal_list_item_t, - channel_cons, channel_des); - - diff --git a/orte/mca/qos/base/qos_base_select.c b/orte/mca/qos/base/qos_base_select.c deleted file mode 100644 index 26fe71bfca..0000000000 --- a/orte/mca/qos/base/qos_base_select.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "orte/util/show_help.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/mca/qos/qos.h" -#include "orte/mca/qos/base/base.h" - - -/** - * Function for selecting all runnable modules from those that are - * available. - * - * Call the init function on all available modules. - */ -int orte_qos_base_select(void) -{ - mca_base_component_list_item_t *cli; - mca_qos_base_component_t *component; - int count = 0; - - /* Query all available components and ask if their transport is available */ - OPAL_LIST_FOREACH(cli, &orte_qos_base_framework.framework_components, mca_base_component_list_item_t) { - component = (mca_qos_base_component_t *) cli->cli_component; - - opal_output_verbose(5, orte_qos_base_framework.framework_output, - "mca:qos:select: checking available component %s", - component->qos_base.mca_component_name); - if (NULL == component->start ) - opal_output_verbose(5, orte_qos_base_framework.framework_output, - "mca:qos:select: component %s start function is null, type =%d", - component->qos_base.mca_component_name, component->type); - else { - /* if it fails to startup, then skip it */ - if (ORTE_SUCCESS != component->start()) { - opal_output_verbose(5, orte_qos_base_framework.framework_output, - "mca:qos:select: Skipping component [%s] - failed to initialize", - component->qos_base.mca_component_name ); - continue; - } - } - count++; - /* store each qos componenet in the actives pointer array at the index of that component type */ - opal_pointer_array_set_item(&orte_qos_base.actives, - component->type, component); - } - - if (0 == count) { - /* no support available means we really cannot run */ - opal_output_verbose(5, orte_qos_base_framework.framework_output, - "mca:qos:select: Init failed to return any available QoS components"); - orte_show_help("help-qos-base.txt", "no-interfaces-avail", true); - return ORTE_ERR_SILENT; - } - opal_output_verbose(5, orte_qos_base_framework.framework_output, - "mca:qos:select: Found %d active QoS components", - count); - return ORTE_SUCCESS; -} diff --git a/orte/mca/qos/noop/Makefile.am b/orte/mca/qos/noop/Makefile.am deleted file mode 100644 index 54c3b4348a..0000000000 --- a/orte/mca/qos/noop/Makefile.am +++ /dev/null @@ -1,34 +0,0 @@ -# -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - qos_noop.h \ - qos_noop_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_qos_noop_DSO -component_noinst = -component_install = mca_qos_noop.la -else -component_noinst = libmca_qos_noop.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_qos_noop_la_SOURCES = $(sources) -mca_qos_noop_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_qos_noop_la_SOURCES = $(sources) -libmca_qos_noop_la_LDFLAGS = -module -avoid-version - diff --git a/orte/mca/qos/noop/qos_noop.h b/orte/mca/qos/noop/qos_noop.h deleted file mode 100644 index 350d3110d7..0000000000 --- a/orte/mca/qos/noop/qos_noop.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * QoS No-op Component interface - * - * - * - */ - -#ifndef MCA_QOS_NOOP_H -#define MCA_QOS_NOOP_H - -#include "orte_config.h" -#include "orte/mca/qos/qos.h" -#include "orte/mca/qos/base/base.h" - -BEGIN_C_DECLS - - -ORTE_MODULE_DECLSPEC extern orte_qos_component_t mca_qos_noop_component; - -extern orte_qos_module_t orte_qos_noop_module; - -END_C_DECLS - -#endif /* MCA_QOS_NOOP_H */ diff --git a/orte/mca/qos/noop/qos_noop_channel_handlers.c b/orte/mca/qos/noop/qos_noop_channel_handlers.c deleted file mode 100644 index ad9678ff3f..0000000000 --- a/orte/mca/qos/noop/qos_noop_channel_handlers.c +++ /dev/null @@ -1,337 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * qos_base_channel_handlers.c - contains base functions handlers for open, send and close channel requests. - */ - -/* - * includes - */ -#include "orte_config.h" - -#include - -#include "orte/constants.h" -#include "orte/types.h" - -#include "opal/dss/dss.h" -#include "opal/util/output.h" -#include "opal/util/timings.h" -#include "opal/class/opal_list.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/qos/qos.h" -#include "orte/mca/qos/base/base.h" - - -static int orte_qos_base_pack_attributes (opal_buffer_t * buffer, - opal_list_t * qos_attributes) -{ - int32_t num_attributes; - int32_t rc= ORTE_SUCCESS; - orte_attribute_t *kv; - num_attributes = opal_list_get_size (qos_attributes); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)(&num_attributes), 1, ORTE_STD_CNTR))) { - ORTE_LOG_ERROR (rc); - return rc; - } - OPAL_LIST_FOREACH(kv, qos_attributes, orte_attribute_t) { - if (ORTE_ATTR_GLOBAL == kv->local) { - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - return rc; -} - -static int orte_qos_base_unpack_attributes (opal_buffer_t *buffer, - opal_list_t *qos_attributes) -{ - orte_attribute_t *kv; - int32_t count, n, k; - int32_t rc=ORTE_SUCCESS; - /* unpack the attributes */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &count, - &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - for (k=0; k < count; k++) { - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv, - &n, ORTE_ATTRIBUTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - opal_list_append(qos_attributes, &kv->super); - } - return rc; -} - -void* orte_qos_base_create_channel ( orte_rml_channel_t *channel, - opal_list_t *qos_attributes) -{ - int32_t * type, type_val; - mca_qos_base_component_t *qos_comp; - if(!orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) - return NULL; - type_val = *type; - //check if type is valid - if (0 < type_val || ORTE_QOS_MAX_COMPONENTS <= type_val) - return NULL; - // associate the qos module - qos_comp = (mca_qos_base_component_t *) opal_pointer_array_get_item(&orte_qos_base.actives, type_val); - channel->qos = (void*) &qos_comp->mod; - // call create channel function of the module. - return (qos_comp->mod.create( qos_attributes)); -} - -void * orte_qos_base_create (opal_list_t *qos_attributes) -{ - orte_qos_base_channel_t * base_chan; - int32_t num_attributes; - int32_t rc, *window; - orte_qos_type_t *type; - orte_attribute_t *kv; - base_chan = OBJ_NEW (orte_qos_base_channel_t); - *type = orte_qos_noop; - // TBD _ we ignore inapplicable attributes for now - need to return error? - // get attributes of interest to the base and store them locally. - if (ORTE_SUCCESS == (rc = orte_set_attribute( &base_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { - // window size?? - if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&window, OPAL_UINT32)) { - if ( ORTE_QOS_MAX_WINDOW_SIZE > (*window)) { - ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS); - OBJ_RELEASE(base_chan); - } - else { - if (ORTE_SUCCESS != (rc = orte_set_attribute(&base_chan->attributes, ORTE_QOS_WINDOW_SIZE, - ORTE_ATTR_GLOBAL, (void*)window, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(base_chan); - } - } - } else - OBJ_RELEASE(base_chan); - } else { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(base_chan); - } - return base_chan; -} - -int orte_qos_base_open_channel ( void * qos_channel, - opal_buffer_t *buffer) -{ - int32_t rc = ORTE_SUCCESS; - orte_qos_base_channel_t *base_chan; - base_chan = (orte_qos_base_channel_t*) (qos_channel); - // the Qos module puts the non local attributes to be sent to the peer in a list at the time of create. - // pack those attributes into the buffer. - if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buffer, &base_chan->attributes))) - ORTE_ERROR_LOG(rc); - return rc; -} - -void orte_qos_base_chan_recv_init ( void * qos_channel, - opal_list_t *qos_attributes) -{ - // nothing to do for no op channel. -} - -void orte_qos_base_close_channel ( void * qos_channel) -{ - qos_channel = (orte_qos_base_channel_t*) (qos_channel); - OBJ_RELEASE(qos_channel); -} - -int orte_qos_base_comp_channel (void *qos_channel, - opal_list_t *qos_attributes) -{ - int32_t chan_typea, chan_typeb, *ptr, window_sizea, window_sizeb; - orte_qos_base_channel_t *base_chan = (orte_qos_base_channel_t*) qos_channel; - ptr = &chan_typea; - if (!orte_get_attribute(&base_chan->attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) - return ORTE_ERROR; - ptr = &chan_typeb; - if (!orte_get_attribute(qos_attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) - return ORTE_ERROR; - if (chan_typea == chan_typeb) { - ptr = &window_sizea; - if (!orte_get_attribute(&base_chan->attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) - return ORTE_ERROR; - ptr = &window_sizeb; - if (!orte_get_attribute(qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) - return ORTE_ERROR; - return (window_sizea != window_sizeb); - } - else - return ORTE_ERROR; -} -/*static void orte_qos_open_channel_reply_send_callback ( int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - // this is the send call back for open channel reply - orte_qos_channel_t *channel = (orte_qos_channel_t*) cbdata; - // if the message was not sent we should retry or complete the request appropriately - if (status!= ORTE_SUCCESS) - { - //retry request. - } - // if success then release the buffer and do open channel request completion after receiving response from peer - OBJ_RELEASE(buffer); -} - -static void orte_qos_open_channel_send_callback ( int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - // this is the send call back for open channel request - orte_qos_open_channel_t *req = (orte_qos_open_channel_t*) cbdata; - // if the message was not sent we should retry or complete the request appropriately - if (status!= ORTE_SUCCESS) - { - // retry if retriable failure. - // else call completion handler. - //remove channel from list - opal_list_remove_item(&orte_qos_base.open_channels, &req->channel->super); - OBJ_RELEASE(req->channel); - // update msg status and channel num so end point can have appropriate info - req->msg->status = status; - req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM; - ORTE_RML_OPEN_CHANNEL_COMPLETE(req->msg); - OBJ_RELEASE(req); - } - // if success then release the buffer and do open channel request completion after receiving response from peer - OBJ_RELEASE(buffer); -} - -void orte_qos_base_open_channel(int sd, short args, void *cbdata) -{ - opal_buffer_t *buffer; int rc; - orte_qos_open_channel_t *open_channel; - orte_qos_open_channel_request_t *req = (orte_qos_open_channel_request_t*)cbdata; - // create channel on sender side by calling the respective qos module. - req->post.channel = orte_qos_base_create_channel(req->post.msg->dst, req->post.msg->qos_attributes); - buffer = OBJ_NEW(opal_buffer_t); - //pack qos attributes list in buffer - if (ORTE_SUCCESS != orte_qos_base_pack_attributes(buffer, req->post.msg->qos_attributes)) { - //invalid attributes complete request with error - } - open_channel = OBJ_NEW(orte_qos_open_channel_t); - open_channel->msg = req->post.msg; - open_channel->channel = req->post.channel; - open_channel->msg->channel_num = open_channel->channel->channel_num; - OBJ_RELEASE(req); - // send request to peer to open channel - orte_rml.send_buffer_nb( &open_channel->msg->dst, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REQ, - orte_qos_open_channel_send_callback, - open_channel); - // now post a recieve for open_channel_response tag - orte_rml.recv_buffer_nb(&open_channel->msg->dst, ORTE_RML_TAG_OPEN_CHANNEL_REPLY, - ORTE_RML_NON_PERSISTENT, orte_qos_open_channel_reply_callback, open_channel); - -} */ - - -/* -void orte_qos_open_channel_recv_callback (int status, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - int32_t rc; - opal_list_t *qos_attributes = OBJ_NEW(opal_list_t); - orte_qos_channel_t *channel; - // un pack attributes first - if ( ORTE_SUCCESS == orte_qos_base_unpack_attributes( buffer, qos_attributes)) { - // create channel - if (NULL != (channel = orte_qos_base_create_channel ( *peer, qos_attributes)) ) { - buffer = OBJ_NEW (opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &channel->channel_num , 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - // send channel accept to sender with local channel num - orte_rml.send_buffer_nb ( peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REPLY, - orte_qos_open_channel_reply_send_callback, - channel); - } - else { - // reply with error message - } - } - else { - //reply with error message - } -} - -void orte_qos_open_channel_reply_callback (int status, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - orte_qos_open_channel_t *req = (orte_qos_open_channel_t*) cbdata; - orte_qos_channel_t * channel = req->channel; - int32_t count = 1; - int32_t rc; - // process open_channel response from a peer for a open channel request - if (ORTE_SUCCESS == status) { - // unpack buffer and get peer channel number. - - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel->peer_channel_num, &count, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - // do error completion - channel->state = orte_qos_channel_closed; - //remove channel from list - opal_list_remove_item(&orte_qos_base.open_channels, &channel->super); - OBJ_RELEASE(channel); - // update msg status and channel num so end point can have appropriate info - req->msg->status = ORTE_ERR_OPEN_CHANNEL_PEER_RESPONSE_INV; - req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM; - } - else { - channel->state = orte_qos_channel_open; - req->msg->status = ORTE_SUCCESS; - req->msg->channel_num = channel->channel_num; - } - } - else { - channel->state = orte_qos_channel_closed; - //remove channel from list - opal_list_remove_item(&orte_qos_base.open_channels, &channel->super); - OBJ_RELEASE(channel); - // update msg status and channel num so end point can have appropriate info - req->msg->status = ORTE_ERR_OPEN_CHANNEL_PEER_FAIL; - req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM; - } - ORTE_RML_OPEN_CHANNEL_COMPLETE(req->msg); - OBJ_RELEASE(req); - OBJ_RELEASE(buffer); - // 1: If success record peer channel number, update channel state. - //2: If not destroy channel. - //3: complete openchannel request. -} */ - - diff --git a/orte/mca/qos/noop/qos_noop_component.c b/orte/mca/qos/noop/qos_noop_component.c deleted file mode 100644 index a6dad64c65..0000000000 --- a/orte/mca/qos/noop/qos_noop_component.c +++ /dev/null @@ -1,198 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - - -#include "orte/mca/qos/base/base.h" -#include "orte/mca/qos/qos.h" - -static int qos_noop_start (void); -static void qos_noop_shutdown (void); -static void* noop_create (opal_list_t *qos_attributes, uint32_t channel_num); -static int noop_open (void *qos_channel, - opal_buffer_t * buf); -static int noop_send ( void *qos_channel, orte_rml_send_t *msg); -static int noop_recv (void *channel, orte_rml_recv_t *msg); -static int noop_close (void * channel); -static int noop_init_recv (void *channel, opal_list_t *attributes); -static int noop_cmp (void *channel, opal_list_t *attributes); -static void noop_send_callback (orte_rml_send_t *msg); - -/** - * noop module definition - */ -orte_qos_module_t orte_qos_noop_module = { - noop_create, - noop_open, - noop_send, - noop_recv, - noop_close, - noop_init_recv, - noop_cmp, - noop_send_callback -}; - -/** - * component definition - */ -mca_qos_base_component_t mca_qos_noop_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { - MCA_QOS_BASE_VERSION_2_0_0, - - "noop", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - NULL, - NULL, - }, - qos_noop_start, - qos_noop_shutdown, - orte_qos_noop, - { - noop_create, - noop_open, - noop_send, - noop_recv, - noop_close, - noop_init_recv, - noop_cmp, - noop_send_callback - } -}; - -static int qos_noop_start(void) { - return ORTE_SUCCESS; -} - -static void qos_noop_shutdown (void) { -} - -static void* noop_create (opal_list_t *qos_attributes, uint32_t channel_num) { - orte_qos_base_channel_t * noop_chan; - int32_t rc, *window, window_val; - orte_qos_type_t type_val = orte_qos_noop; - orte_qos_type_t *type; - - noop_chan = OBJ_NEW (orte_qos_base_channel_t); - noop_chan->channel_num = channel_num; - type = &type_val; - window = &window_val; - // TBD _ we ignore inapplicable attributes for now - need to return error? - // get attributes of interest to the base and store them locally. - if (ORTE_SUCCESS == (rc = orte_set_attribute( &noop_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { - // window size?? - if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&window, OPAL_UINT32)) { - if ( ORTE_QOS_MAX_WINDOW_SIZE < (*window)) { - ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS); - OBJ_RELEASE(noop_chan); - } - else { - if (ORTE_SUCCESS != (rc = orte_set_attribute(&noop_chan->attributes, ORTE_QOS_WINDOW_SIZE, - ORTE_ATTR_GLOBAL, (void*)window, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(noop_chan); - } - } - }else - OBJ_RELEASE(noop_chan); - } else { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(noop_chan); - } - return noop_chan; -} - -static int noop_open (void *qos_channel, opal_buffer_t * buf) -{ - int32_t rc = ORTE_SUCCESS; - orte_qos_base_channel_t *noop_chan; - noop_chan = (orte_qos_base_channel_t*) (qos_channel); - // the Qos module puts the non local attributes to be sent to the peer in a list at the time of create. - // pack those attributes into the buffer. - if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buf, &noop_chan->attributes))) - ORTE_ERROR_LOG(rc); - return rc; -} - -static int noop_send ( void *qos_channel, orte_rml_send_t *msg) -{ - //nothing to do - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s noop_send msg = %p to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, ORTE_NAME_PRINT(&msg->dst))); - return ORTE_SUCCESS; -} - -static int noop_recv (void *qos_channel, orte_rml_recv_t *msg) -{ - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s noop_recv msg = %p from peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (void*)msg, ORTE_NAME_PRINT(&msg->sender))); - return ORTE_SUCCESS; -} - -static int noop_close (void * channel) -{ - orte_qos_base_channel_t *noop_chan; - if(NULL != channel) { - noop_chan = (orte_qos_base_channel_t*) channel; - OBJ_RELEASE (noop_chan); - return ORTE_SUCCESS; - } else - return ORTE_ERR_BAD_PARAM; - -} - -static int noop_init_recv (void *channel, opal_list_t *attributes) -{ - return ORTE_SUCCESS; -} - -static int noop_cmp (void *channel, opal_list_t *attributes) -{ - int32_t chan_typea, chan_typeb, *ptr, window_sizea, window_sizeb; - orte_qos_base_channel_t *noop_chan = (orte_qos_base_channel_t*) channel; - ptr = &chan_typea; - if (!orte_get_attribute(&noop_chan->attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) - return ORTE_ERROR; - ptr = &chan_typeb; - if (!orte_get_attribute(attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) - return ORTE_ERROR; - if (chan_typea == chan_typeb) { - ptr = &window_sizea; - if (!orte_get_attribute(&noop_chan->attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) - return ORTE_ERROR; - ptr = &window_sizeb; - if (!orte_get_attribute(attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) - return ORTE_ERROR; - return (window_sizea != window_sizeb); - } - else - return ORTE_ERROR; -} - -static void noop_send_callback (orte_rml_send_t *msg) -{ - // nothing to do for noop - ORTE_RML_SEND_COMPLETE(msg); -} diff --git a/orte/mca/qos/qos.h b/orte/mca/qos/qos.h deleted file mode 100644 index 378a8b9d93..0000000000 --- a/orte/mca/qos/qos.h +++ /dev/null @@ -1,159 +0,0 @@ -/** - * copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * This header defines Quality of Service Interface for Runtime messaging - */ - -/** - * @file - * - * Quality of Service (QoS) Communication Interface - * - * The QoS layer is responsible for providing quality of service for - * messages exchanged between two ORTE processes through the use of - * channels. - */ -#ifndef MCA_QOS_H_ -#define MCA_QOS_H_ - -#include "orte_config.h" -#include "orte/types.h" - -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "opal/class/opal_list.h" -#include "opal/mca/mca.h" -#include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" -#include "orte/mca/errmgr/errmgr.h" - -BEGIN_C_DECLS -/* ******************************************************************** */ -struct opal_buffer_t; -struct orte_process_name_t; - - -/* ******************************************************************** */ -#define ORTE_QOS_INVALID_CHANNEL_NUM 0xFFFF -#define ORTE_QOS_MAX_COMPONENTS 5 -typedef void (*orte_qos_callback_fn_t)(int status, - int channel_num, - struct orte_process_name_t* peer, - void* cbdata); - -typedef int (*mca_qos_base_component_start_fn_t)(void); -typedef void (*mca_qos_base_component_shutdown_fn_t)(void); - -#if OPAL_ENABLE_FT_CR == 1 -typedef int (*mca_qos_base_component_ft_event_fn_t)(int state); -#endif -ORTE_DECLSPEC void * orte_qos_create_channel (void *qos_mod, opal_list_t *qos_attributes, uint32_t channel_num); -ORTE_DECLSPEC int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buffer); -ORTE_DECLSPEC int orte_qos_close_channel (void *qos_mod, void *qos_channel); -ORTE_DECLSPEC void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes); -ORTE_DECLSPEC int orte_qos_cmp_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes); -ORTE_DECLSPEC int orte_qos_send_channel (void *qos_mod, void *qos_channel, orte_rml_send_t *msg); -ORTE_DECLSPEC int orte_qos_recv_channel (void *qos_mod, void *qos_channel, orte_rml_recv_t *msg); -/** - * qos module (channel) create function - * initialize type specific attributes of the channel. - */ -typedef void* (*orte_qos_base_module_create_fn_t) (opal_list_t *qos_attributes, uint32_t channel_num); - -/** - * qos module (channel) open function - * this function is called when rml_open_channel is requested - */ -typedef int (*orte_qos_base_module_open_fn_t) (void *qos_channel, - opal_buffer_t * buf); - -/** - * qos module (channel) send function - * this function is called when rml_send_channel is requested - */ -typedef int (*orte_qos_base_module_send_fn_t) ( void * qos_channel, - orte_rml_send_t *send); - -/** - * qos module (channel) recv function - * this function is called when a message is received on a channel - */ -typedef int (*orte_qos_base_module_recv_fn_t) ( void * channel, - orte_rml_recv_t *msg); -/** - * qos module (channel) close function - * this function is called when a message is received on a channel - */ - -typedef int (*orte_qos_base_module_close_fn_t) ( void * channel); -/** - * qos module (channel) init recv - * this function is used to initialize a channel for receiving msgs (called in response to open_channel req from peer) - */ -typedef int (*orte_qos_base_module_init_recv_fn_t) (void * channel, opal_list_t * attributes); - -/** - * qos module (channel) compare functions - * compares attributes of existing channel with the requested list of attributes - */ -typedef int (*orte_qos_base_module_cmp_fn_t) (void * channel, opal_list_t * attributes); - -/** - * qos module (channel) compare functions - * compares attributes of existing channel with the requested list of attributes - */ -typedef void (*orte_qos_base_module_send_callback_fn_t) (orte_rml_send_t *msg); - -/** - * - * the qos channel data structure - */ -typedef struct { - orte_qos_base_module_create_fn_t create; - orte_qos_base_module_open_fn_t open; - orte_qos_base_module_send_fn_t send; - orte_qos_base_module_recv_fn_t recv; - orte_qos_base_module_close_fn_t close; - orte_qos_base_module_init_recv_fn_t init_recv; - orte_qos_base_module_cmp_fn_t cmp; - orte_qos_base_module_send_callback_fn_t send_callback; -} orte_qos_module_t; - -typedef enum { - orte_qos_noop = 0, - orte_qos_ack = 1, - orte_qos_nack = 2, - orte_qos_ack_nack_hybrid = 3, - orte_qos_multipath = 4, -}orte_qos_type_t ; - -typedef struct { - mca_base_component_t qos_base; - mca_qos_base_component_start_fn_t start; - mca_qos_base_component_shutdown_fn_t shutdown; - orte_qos_type_t type; - orte_qos_module_t mod; -/* mca_qos_base_componenet_open_channel_fn_t open_channel; - mca_qos_base_component_send_channel_nb_fn_t send_channel; - mca_qos_base_component_recv_channel_nb_fn_t recv_channel; - mca_qos_base_component_close_channel_fn_t close_channel;*/ -#if OPAL_ENABLE_FT_CR == 1 - mca_qos_base_component_ft_event_fn_t ft_event; -#endif -} mca_qos_base_component_t; - -/** - * Macro for use in components that are of type oob - */ -#define MCA_QOS_BASE_VERSION_2_0_0 \ -ORTE_MCA_BASE_VERSION_2_1_0 ("qos", 2, 0, 0) - -END_C_DECLS - -#endif diff --git a/orte/mca/rml/base/Makefile.am b/orte/mca/rml/base/Makefile.am index 1461032070..d940589e9e 100644 --- a/orte/mca/rml/base/Makefile.am +++ b/orte/mca/rml/base/Makefile.am @@ -11,6 +11,7 @@ # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -27,4 +28,4 @@ libmca_rml_la_SOURCES += \ base/rml_base_receive.c \ base/rml_base_contact.c \ base/rml_base_msg_handlers.c \ - base/rml_base_channel_handlers.c + base/rml_base_stubs.c diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index fa3a4136ce..024e9ca2b9 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,11 +82,23 @@ ORTE_DECLSPEC void orte_rml_base_comm_start(void); ORTE_DECLSPEC void orte_rml_base_comm_stop(void); +/* + * globals that might be needed + */ +/* adding element to hold the active modules and components */ +typedef struct { + opal_list_item_t super; + int pri; + orte_rml_base_module_t *module; + mca_base_component_t *component; +} orte_rml_base_active_t; +OBJ_CLASS_DECLARATION(orte_rml_base_active_t); + /* a global struct containing framework-level values */ typedef struct { + opal_list_t actives; /* list to hold the active plugins */ opal_list_t posted_recvs; opal_list_t unmatched_msgs; - opal_pointer_array_t open_channels; #if OPAL_ENABLE_TIMING bool timing; #endif @@ -105,45 +117,6 @@ ORTE_DECLSPEC extern orte_rml_base_t orte_rml_base; */ ORTE_DECLSPEC extern opal_list_t orte_rml_base_components; - -/** - * Component structure for the selected RML component - * - * Component structure pointer for the currently selected RML - * component. Useable between calls to orte_rml_base_select() and - * orte_rml_base_close(). - * @note This pointer should not be used outside the RML base. It is - * available outside the RML base only for the F/T component. - */ -ORTE_DECLSPEC extern orte_rml_component_t *orte_rml_component; - -typedef enum { - orte_rml_channel_opening = 0, - orte_rml_channel_open = 1, - orte_rml_channel_closing = 2, - orte_rml_channel_closed = 3, -}orte_rml_channel_state_t; - -/** - * RML channel structure. - * The RML only needs basic channel information as the rest of the book keeping information - * is stored in the QoS module specific channel object. - * It contains a pointer to the QoS module that handles requests on the channel. - * It contains a pointer to a struct that contains the QoS specific channel data. - */ -typedef struct { - opal_list_item_t super; - orte_rml_channel_num_t channel_num; // the channel number reference (exposed to the user). - orte_process_name_t peer; // the other end point (peer) of the channel - orte_rml_channel_num_t peer_channel; // peer channel number - void * qos; // pointer to QoS component specific module - void * qos_channel_ptr; // pointer to QoS component specific channel struct - orte_rml_channel_state_t state; // channel state - bool recv; // set to true if this is a receive (peer opened) channel. (Default is send channel) -} orte_rml_channel_t; -OBJ_CLASS_DECLARATION(orte_rml_channel_t); - - /* structure to send RML messages - used internally */ typedef struct { opal_list_item_t super; @@ -156,8 +129,6 @@ typedef struct { union { orte_rml_callback_fn_t iov; orte_rml_buffer_callback_fn_t buffer; - orte_rml_send_channel_callback_fn_t iov_chan; - orte_rml_send_buffer_channel_callback_fn_t buf_chan; } cbfunc; void *cbdata; @@ -166,11 +137,6 @@ typedef struct { int count; /* pointer to the user's buffer */ opal_buffer_t *buffer; - /*** TODO : need to move channel specific data to a channel struct */ - /* pointer to the channel object */ - orte_rml_channel_t *channel; - /* destination channel number */ - orte_rml_channel_num_t dst_channel; /* msg seq number */ uint32_t seq_num; /* pointer to raw data for cross-transport @@ -180,47 +146,11 @@ typedef struct { } orte_rml_send_t; OBJ_CLASS_DECLARATION(orte_rml_send_t); -/* structure to send RML channel open messages - used internally */ -typedef struct { - opal_list_item_t super; - /* peer process */ - orte_process_name_t dst; - /* msg send status */ - int status; - /* channel object */ - orte_rml_channel_t *channel; - /* attributes of the channel */ - opal_list_t *qos_attributes; - /* user's callback function */ - orte_rml_channel_callback_fn_t cbfunc; - /* user's cbdata */ - void *cbdata; -} orte_rml_open_channel_t; -OBJ_CLASS_DECLARATION(orte_rml_open_channel_t); - -/* structure to send RML channel close messages - used internally */ -typedef struct { - opal_list_item_t super; - /* msg send status */ - int status; - /* channel object */ - orte_rml_channel_t *channel; - /* user's callback function */ - orte_rml_channel_callback_fn_t cbfunc; - /* user's cbdata */ - void *cbdata; -} orte_rml_close_channel_t; -OBJ_CLASS_DECLARATION(orte_rml_close_channel_t); - /* define an object for transferring send requests to the event lib */ typedef struct { opal_object_t super; opal_event_t ev; - union { - orte_rml_send_t send; - orte_rml_open_channel_t open_channel; - orte_rml_close_channel_t close_channel; - }post; + orte_rml_send_t send; } orte_rml_send_request_t; OBJ_CLASS_DECLARATION(orte_rml_send_request_t); @@ -230,7 +160,6 @@ typedef struct { opal_event_t ev; orte_process_name_t sender; // sender orte_rml_tag_t tag; // targeted tag - orte_rml_channel_num_t channel_num; // channel number uint32_t seq_num; //sequence number struct iovec iov; // the recvd data } orte_rml_recv_t; @@ -259,7 +188,7 @@ typedef struct { } orte_rml_recv_request_t; OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); -#define ORTE_RML_POST_MESSAGE(p, t, c, s, b, l) \ +#define ORTE_RML_POST_MESSAGE(p, t, s, b, l) \ do { \ orte_rml_recv_t *msg; \ opal_output_verbose(5, orte_rml_base_framework.framework_output, \ @@ -270,7 +199,6 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); msg->sender.jobid = (p)->jobid; \ msg->sender.vpid = (p)->vpid; \ msg->tag = (t); \ - msg->channel_num = (c); \ msg->seq_num = (s); \ msg->iov.iov_base = (IOVBASE_TYPE*)(b); \ msg->iov.iov_len = (l); \ @@ -314,99 +242,66 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ ORTE_NAME_PRINT(&((m)->dst)), \ __FILE__, __LINE__); \ - if( NULL == (m)->channel) { \ - if (NULL != (m)->iov) { \ - if (NULL != (m)->cbfunc.iov) { \ - (m)->cbfunc.iov((m)->status, \ - &((m)->dst), \ - (m)->iov, (m)->count, \ - (m)->tag, (m)->cbdata); \ - } \ - } else { \ - /* non-blocking buffer send */ \ - (m)->cbfunc.buffer((m)->status, &((m)->origin), \ - (m)->buffer, \ - (m)->tag, (m)->cbdata); \ - } \ - } else { \ - if (NULL != (m)->iov) { \ - if (NULL != (m)->cbfunc.iov_chan) { \ - (m)->cbfunc.iov_chan((m)->status, \ - (m)->channel->channel_num, \ - (m)->iov, (m)->count, \ - (m)->tag, (m)->cbdata); \ - } \ - } else { \ - /* non-blocking buffer send */ \ - (m)->cbfunc.buf_chan((m)->status, \ - (m)->channel->channel_num, \ - (m)->buffer, \ - (m)->tag, (m)->cbdata); \ + if (NULL != (m)->iov) { \ + if (NULL != (m)->cbfunc.iov) { \ + (m)->cbfunc.iov((m)->status, \ + &((m)->dst), \ + (m)->iov, (m)->count, \ + (m)->tag, (m)->cbdata); \ } \ - } \ + } else { \ + /* non-blocking buffer send */ \ + (m)->cbfunc.buffer((m)->status, &((m)->origin), \ + (m)->buffer, \ + (m)->tag, (m)->cbdata); \ + } \ OBJ_RELEASE(m); \ }while(0); - -#define ORTE_RML_OPEN_CHANNEL_COMPLETE(m) \ - do { \ - opal_output_verbose(5, orte_rml_base_framework.framework_output, \ - "%s-%s open channel message complete at %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - ORTE_NAME_PRINT(&((m)->dst)), \ - __FILE__, __LINE__); \ - /* call the callback function */ \ - (m)->cbfunc((m)->status, (m)->channel->channel_num, \ - &((m)->dst), \ - NULL, (m)->cbdata) ; \ - }while(0); - -#define ORTE_RML_CLOSE_CHANNEL_COMPLETE(m) \ - do { \ - opal_output_verbose(5, orte_rml_base_framework.framework_output, \ - "%s-%d close channel message complete at %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - (m)->channel->channel_num, \ - __FILE__, __LINE__); \ - /* call the callback function */ \ - (m)->cbfunc((m)->status, (m)->channel->channel_num, \ - NULL, NULL, (m)->cbdata) ; \ -}while(0); -/* - * This is the base priority for a RML wrapper component - * If there exists more than one wrapper, then the one with - * the lowest priority wins. - */ -#define RML_SELECT_WRAPPER_PRIORITY -128 - -#define ORTE_RML_INVALID_CHANNEL_NUM UINT32_MAX -ORTE_DECLSPEC orte_rml_channel_t * orte_rml_base_get_channel (orte_rml_channel_num_t chan_num); - - /* common implementations */ ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_error(int fd, short flags, void *cbdata); -ORTE_DECLSPEC void orte_rml_base_open_channel(int fd, short flags, void *cbdata); -ORTE_DECLSPEC void orte_rml_base_close_channel(int fd, short flags, void *cbdata); -ORTE_DECLSPEC void orte_rml_base_open_channel_send_callback ( int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -ORTE_DECLSPEC void orte_rml_base_open_channel_resp_callback (int status, orte_process_name_t* peer, - struct opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -ORTE_DECLSPEC void orte_rml_base_open_channel_reply_send_callback ( int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -ORTE_DECLSPEC void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel, - orte_rml_send_t *send); -ORTE_DECLSPEC int orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, - orte_rml_recv_t *recv); -ORTE_DECLSPEC void orte_rml_base_close_channel_send_callback ( int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); -ORTE_DECLSPEC void orte_rml_base_send_close_channel ( orte_rml_close_channel_t *close_chan); ORTE_DECLSPEC void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata); ORTE_DECLSPEC void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg); + + +/* Stub API interfaces to cycle through active plugins and call highest priority */ +ORTE_DECLSPEC int orte_rml_API_enable_comm(void); +ORTE_DECLSPEC void orte_rml_API_finalize(void); +ORTE_DECLSPEC char* orte_rml_API_get_contact_info(void); +ORTE_DECLSPEC void orte_rml_API_set_contact_info(const char *contact_info); +ORTE_DECLSPEC int orte_rml_API_ping(const char* contact_info, const struct timeval* tv); +ORTE_DECLSPEC int orte_rml_API_send_nb(orte_process_name_t* peer, struct iovec* msg, + int count, orte_rml_tag_t tag, + orte_rml_callback_fn_t cbfunc, void* cbdata); +ORTE_DECLSPEC int orte_rml_API_send_buffer_nb(orte_process_name_t* peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata); +ORTE_DECLSPEC void orte_rml_API_recv_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_callback_fn_t cbfunc, + void* cbdata); + +ORTE_DECLSPEC void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata); + +ORTE_DECLSPEC void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag); + +ORTE_DECLSPEC int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc); + +ORTE_DECLSPEC int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc); + +ORTE_DECLSPEC int orte_rml_API_ft_event(int state); + +ORTE_DECLSPEC void orte_rml_API_purge(orte_process_name_t *peer); + END_C_DECLS #endif /* MCA_RML_BASE_H */ diff --git a/orte/mca/rml/base/rml_base_channel_handlers.c b/orte/mca/rml/base/rml_base_channel_handlers.c deleted file mode 100644 index b07bf56d0d..0000000000 --- a/orte/mca/rml/base/rml_base_channel_handlers.c +++ /dev/null @@ -1,544 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ - -/* - * includes - */ -#include "orte_config.h" - -#include - -#include "orte/constants.h" -#include "orte/types.h" - -#include "opal/dss/dss.h" -#include "opal/util/output.h" -#include "opal/util/timings.h" -#include "opal/class/opal_list.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/base.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/qos/base/base.h" - - -static int unpack_channel_attributes (opal_buffer_t *buffer, opal_list_t *qos_attributes); -static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, - opal_list_t *qos_attributes, - bool recv); -static int send_open_channel_reply (orte_process_name_t *peer, - orte_rml_channel_t *channel, - bool accept); -void orte_rml_base_close_channel(int fd, short flags, void *cbdata) -{ - orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata; - orte_rml_close_channel_t *close_chan; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_close_channel to peer %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&req->post.close_channel.channel->peer))); - OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(&req->post.close_channel.channel->peer))); - close_chan = OBJ_NEW(orte_rml_close_channel_t); - close_chan->channel = req->post.close_channel.channel; - close_chan->cbfunc = req->post.close_channel.cbfunc; - close_chan->cbdata = req->post.close_channel.cbdata; - OBJ_RELEASE(req); - /* check with qos if the channel ready to be closed */ - if (ORTE_SUCCESS == orte_qos_close_channel (close_chan->channel->qos, - close_chan->channel->qos_channel_ptr)) { - orte_rml_base_send_close_channel( close_chan); - } - /* complete close request with error channel busy */ - else { - close_chan->status = ORTE_ERR_CHANNEL_BUSY; - ORTE_RML_CLOSE_CHANNEL_COMPLETE(close_chan); - OBJ_RELEASE(close_chan); - } -} - -void orte_rml_base_send_close_channel ( orte_rml_close_channel_t *close_chan) -{ - opal_buffer_t *buffer; - // send msg to peer to close channel. - buffer = OBJ_NEW (opal_buffer_t); - /* pack the channel number*/ - opal_dss.pack(buffer, &close_chan->channel->peer_channel, 1, OPAL_UINT32); - orte_rml.send_buffer_nb( &close_chan->channel->peer, buffer, ORTE_RML_TAG_CLOSE_CHANNEL_REQ, - orte_rml_base_close_channel_send_callback, - close_chan); -} - -void orte_rml_base_close_channel_send_callback ( int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - // this is the send call back for open channel request - orte_rml_close_channel_t *req = (orte_rml_close_channel_t*) cbdata; - orte_process_name_t peer = req->channel->peer; - opal_output_verbose(5, orte_rml_base_framework.framework_output, - "%s rml_close_channel_send_callback to peer %s status = %d", - ORTE_NAME_PRINT(sender), - ORTE_NAME_PRINT(&peer), status); - req->status = status; - // if the message could not be sent log error - if (ORTE_SUCCESS != req->status) - ORTE_ERROR_LOG (req->status); - //complete the req. - ORTE_RML_CLOSE_CHANNEL_COMPLETE(req); - opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); - // release the channel object and the req. - OBJ_RELEASE(req->channel); - OBJ_RELEASE(req); - OBJ_RELEASE(buffer); -} - -void orte_rml_base_open_channel(int fd, short flags, void *cbdata) -{ - int32_t *type, type_val; - orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata; - orte_process_name_t peer; - orte_rml_open_channel_t *open_chan; - orte_rml_channel_t *channel; - opal_buffer_t *buffer; - peer = req->post.open_channel.dst; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel to peer %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer))); - OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(&peer))); - /* return error if a channel already exists */ - if ( NULL != (channel = get_channel (&peer, req->post.open_channel.qos_attributes, false))) - { - req->post.open_channel.status = ORTE_ERR_OPEN_CHANNEL_DUPLICATE; - req->post.open_channel.channel = channel; - ORTE_RML_OPEN_CHANNEL_COMPLETE(&req->post.open_channel); - OBJ_RELEASE(req); - return; - } - channel = OBJ_NEW(orte_rml_channel_t); - channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel); - channel->peer = peer; - open_chan = OBJ_NEW(orte_rml_open_channel_t); - open_chan->dst = peer; - open_chan->qos_attributes = req->post.open_channel.qos_attributes; - open_chan->cbfunc = req->post.open_channel.cbfunc; - open_chan->cbdata = req->post.open_channel.cbdata; - OBJ_RELEASE(req); - // associate open channel request and the newly created channel object - open_chan->channel = channel; - type = &type_val; - if (!orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) { - return; - } - open_chan->channel->qos = (void*) orte_qos_get_module (open_chan->qos_attributes); - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel type = %d to peer %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - *type, - ORTE_NAME_PRINT(&peer))); - // now associate qos with the channel based on user requested attributes. - if ( NULL != open_chan->channel->qos) - { - open_chan->channel->qos_channel_ptr = orte_qos_create_channel (open_chan->channel->qos, - open_chan->qos_attributes, - open_chan->channel->channel_num); - // create rml send for open channel request. Call the corresponding QoS module to pack the attributes. - buffer = OBJ_NEW (opal_buffer_t); - // call QoS module to pack attributes - if ( ORTE_SUCCESS == (orte_qos_open_channel(open_chan->channel->qos, open_chan->channel->qos_channel_ptr, buffer))) - { - /* pack channel number at the end */ - opal_dss.pack(buffer, (void*) &open_chan->channel->channel_num, 1, OPAL_UINT32); - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel to peer %s SUCCESS sending to peer", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer))); - // post a recieve for open_channel_response tag - orte_rml.recv_buffer_nb(&peer, ORTE_RML_TAG_OPEN_CHANNEL_RESP, - ORTE_RML_NON_PERSISTENT, orte_rml_base_open_channel_resp_callback, open_chan); - // send request to peer to open channel - orte_rml.send_buffer_nb( &peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REQ, - orte_rml_base_open_channel_send_callback, - open_chan); - - } else { - open_chan->status = ORTE_ERR_PACK_FAILURE; - ORTE_RML_OPEN_CHANNEL_COMPLETE(open_chan); - opal_pointer_array_set_item ( &orte_rml_base.open_channels, open_chan->channel->channel_num, NULL); - // call QoS module to release the QoS channel object. - orte_qos_close_channel (open_chan->channel->qos, open_chan->channel->qos_channel_ptr); - OBJ_RELEASE (buffer); - OBJ_RELEASE(open_chan->channel); - OBJ_RELEASE(open_chan); - } - } - else - { - // do error completion because a component for the requested QoS does not exist - open_chan->status = ORTE_ERR_QOS_TYPE_UNSUPPORTED; - ORTE_RML_OPEN_CHANNEL_COMPLETE(open_chan); - opal_pointer_array_set_item ( &orte_rml_base.open_channels, open_chan->channel->channel_num, NULL); - OBJ_RELEASE(open_chan->channel); - OBJ_RELEASE(open_chan); - } - -} - -void orte_rml_base_open_channel_send_callback ( int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - // this is the send call back for open channel request - orte_rml_open_channel_t *req = (orte_rml_open_channel_t*) cbdata; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel_send_callback to peer %s status = %d", - ORTE_NAME_PRINT(sender), - ORTE_NAME_PRINT(&req->dst), status)); - // if the message was not sent we should retry or complete the request appropriately - if (status!= ORTE_SUCCESS) - { - req->status = status; - ORTE_RML_OPEN_CHANNEL_COMPLETE(req); - opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); - // call QoS module to release the QoS channel object. - orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr); - OBJ_RELEASE(req->channel); - OBJ_RELEASE(req); - } - else { - // start a timer for response from peer - } - OBJ_RELEASE(buffer); -} - -void orte_rml_base_open_channel_resp_callback (int status, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - orte_rml_open_channel_t *req = (orte_rml_open_channel_t*) cbdata; - orte_rml_channel_t * channel = req->channel; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel_resp_callback to peer %s status = %d channel = %p", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), status, - (void*)channel)); - int32_t rc; - bool peer_resp = false; - int32_t count = 1; - // unpack peer response from buffer to determine if peer has accepted the open request - if ((ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &peer_resp, &count, OPAL_BOOL))) && peer_resp) { - - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel_resp_callback to peer response = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - peer_resp)); - /* response will contain the peer channel number - the peer does not have the - option to change the channel attributes - unpack and get peer channel number.*/ - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel->peer_channel, &count, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - req->status = ORTE_ERR_UNPACK_FAILURE; - opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); - // call QoS module to release the QoS channel object. - orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr); - OBJ_RELEASE(req->channel); - // TBD : should we send a close channel to the peer?? - } - else { - // call qos module to update the channel state.?? - req->status = ORTE_SUCCESS; - req->channel->state = orte_rml_channel_open; - } - } - else { - if (rc) { - ORTE_ERROR_LOG(rc); - req->status = ORTE_ERR_UNPACK_FAILURE; - } else { - req->status = ORTE_ERR_OPEN_CHANNEL_PEER_REJECT; - } - opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); - // call QoS module to release the QoS channel object. - orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr); - OBJ_RELEASE(req->channel); - } - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel_resp_callback to peer %s status = %d channel =%p num = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), req->status, - (void*)channel, channel->channel_num)); - ORTE_RML_OPEN_CHANNEL_COMPLETE(req); - OBJ_RELEASE(req); -} - -static int unpack_channel_attributes (opal_buffer_t *buffer, - opal_list_t *qos_attributes) -{ - orte_attribute_t *kv; - int32_t count, n, k; - int32_t rc=ORTE_SUCCESS; - /* unpack the attributes */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &count, - &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_unpack_attributes num attributes = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - count)); - for (k=0; k < count; k++) { - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv, - &n, ORTE_ATTRIBUTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "rml_unpack_attributes unpacked attribute key = %d, value = %d ", - kv->key, - kv->data.uint8)); - kv->local = ORTE_ATTR_GLOBAL; - opal_list_append(qos_attributes, &kv->super); - } - return rc; -} - -void orte_rml_open_channel_recv_callback (int status, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - opal_list_t qos_attributes; - orte_rml_channel_t *channel; - uint8_t *type, type_val = 10; - int32_t count =1; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel_recv_callback from peer %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - OBJ_CONSTRUCT(&qos_attributes, opal_list_t); - /* unpack attributes first */ - if ( ORTE_SUCCESS == unpack_channel_attributes( buffer, &qos_attributes)) { - type = &type_val; - if (!orte_get_attribute( &qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) { - OPAL_LIST_DESTRUCT(&qos_attributes); - return; - } - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "rml_open_channel_recv_callback type =%d", - type_val)); - /* scan the list of channels to see if we already have a channel with qos_attributes */ - if (NULL == (channel = get_channel ( peer, &qos_attributes, true))) { - /* create a new channel for the req */ - channel = OBJ_NEW(orte_rml_channel_t); - channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel); - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "rml_open_channel_recv_callback channel num =%d", - channel->channel_num)); - channel->peer = *peer; - channel->recv = true; - channel->qos = (void*) orte_qos_get_module (&qos_attributes); - /* now associate qos with the channel based on requested attributes */ - channel->qos_channel_ptr = (void*) orte_qos_create_channel(channel->qos, &qos_attributes, - channel->channel_num); - if (channel->qos_channel_ptr) { - /* call qos to init recv state */ - orte_qos_init_recv_channel ( channel->qos, channel->qos_channel_ptr, &qos_attributes); - /* send channel accept reply to sender */ - if(ORTE_SUCCESS == send_open_channel_reply (peer, channel, true)) { - /* update channel state */ - channel->state = orte_rml_channel_open; - /*store src channel number */ - opal_dss.unpack(buffer, (void*) &channel->peer_channel, &count, OPAL_UINT32); - } - else { - /* the receiver shall not attempt to resend or send a reject message - instead we let the sender's request timeout at his end. - release the channel etc */ - opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); - orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); - OBJ_RELEASE(channel); - } - } else { - send_open_channel_reply (peer, NULL, false); - opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); - //orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); - OBJ_RELEASE(channel); - } - } - else { - /* there exists a channel with the same attributes reject the request */ - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "rml_open_channel_recv_callback OOPS CHANNEL EXISTS ALREADY channel num =%d", - channel->channel_num)); - send_open_channel_reply (peer, channel, false); - } - } - else { - //reply with error message - send_open_channel_reply (peer, NULL, false); - } - OPAL_LIST_DESTRUCT(&qos_attributes); -} - -static int send_open_channel_reply (orte_process_name_t *peer, - orte_rml_channel_t *channel, - bool accept) -{ - opal_buffer_t *buffer; - int32_t rc; - buffer = OBJ_NEW (opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &accept , 1, OPAL_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (accept) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &channel->channel_num , 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - /* TBD: should specify reason for reject - send open channel response to sender */ - orte_rml.send_buffer_nb ( peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_RESP, - orte_rml_base_open_channel_reply_send_callback, - channel); - - return rc; -} - -static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, - opal_list_t *qos_attributes, - bool recv) -{ - orte_rml_channel_t *channel = NULL; - int32_t i = 0; - /* search available channels and return channel that matches the attributes */ - for (i=0; i < orte_rml_base.open_channels.size; i++) { - if (NULL != (channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, i))) { - /* compare basic properties */ - if ((OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &channel->peer, peer)) && - ((orte_rml_channel_open == channel->state) || - (orte_rml_channel_opening == channel->state)) && - (channel->recv == recv)) - { - /* compare channel attributes */ - if( ORTE_SUCCESS == orte_qos_cmp_channel ( channel->qos, channel->qos_channel_ptr, qos_attributes)) - return channel; - - } - } - } - return NULL; -} - -void orte_rml_base_open_channel_reply_send_callback ( int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - // this is the send call back for open channel reply - orte_rml_channel_t *channel = (orte_rml_channel_t*) cbdata; - // if the message was not sent we should retry or release the channel resources - if (status!= ORTE_SUCCESS) - { - ORTE_ERROR_LOG (status); - // release channel - if(NULL != channel) { - opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); - // call QoS module to release the QoS channel object. - orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); - OBJ_RELEASE(channel); - } else { - // we did not accept the request so nothing to do - } - } - // if success then release the buffer and do open channel request completion after receiving response from peer - OBJ_RELEASE(buffer); -} - -orte_rml_channel_t * orte_rml_base_get_channel (orte_rml_channel_num_t chan_num) { - orte_rml_channel_t * channel; - - channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, chan_num); - if ((NULL != channel) && (orte_rml_channel_open == channel->state)) - return channel; - else - return NULL; - return channel; -} - -void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel, - orte_rml_send_t *send) -{ - // add channel number and notify Qos - send->dst_channel = channel->peer_channel; - orte_qos_send_channel (channel->qos, channel->qos_channel_ptr, send); -} - -int orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, - orte_rml_recv_t *recv) -{ - // call qos for recv post processing - return (orte_qos_recv_channel (channel->qos, channel->qos_channel_ptr, recv)); -} - -void orte_rml_close_channel_recv_callback (int status, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - // find the channel and close it or log error - orte_rml_channel_t *channel; - int32_t count =1, rc; - orte_rml_channel_num_t channel_num =5; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_close_channel_recv_callback from peer %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - /* unpack channel number */ - if(ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel_num, - &count, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - return; - } - channel = orte_rml_base_get_channel(channel_num); - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_close_channel_recv_callback for channel num =%d channel=%p", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - channel_num, (void*)channel)); - if (NULL != channel) { - orte_qos_close_channel ( channel->qos, channel->qos_channel_ptr); - opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); - OBJ_RELEASE(channel); - } else { - ORTE_ERROR_LOG(OPAL_ERR_BAD_PARAM); - } -} diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index f00b506983..4f1508d14a 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -5,7 +5,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -36,33 +36,34 @@ * component's public mca_base_component_t struct. */ #include "orte/mca/rml/base/static-components.h" -orte_rml_module_t orte_rml = {0}; -orte_rml_base_t orte_rml_base = {{{0}}}; + +/* Initialising stub fns in the global var used by other modules */ +orte_rml_base_module_t orte_rml = { + orte_rml_API_enable_comm, + orte_rml_API_finalize, + orte_rml_API_get_contact_info, + orte_rml_API_set_contact_info, + orte_rml_API_ping, + orte_rml_API_send_nb, + orte_rml_API_send_buffer_nb, + orte_rml_API_recv_nb, + orte_rml_API_recv_buffer_nb, + orte_rml_API_recv_cancel, + orte_rml_API_add_exception_handler, + orte_rml_API_del_exception_handler, + orte_rml_API_ft_event, + orte_rml_API_purge +}; + +orte_rml_base_t orte_rml_base = {{{0}}}; OPAL_TIMING_DECLARE(tm_rml) orte_rml_component_t *orte_rml_component = NULL; static bool selected = false; -static char *orte_rml_base_wrapper = NULL; static int orte_rml_base_register(mca_base_register_flag_t flags) { - int var_id; - - /* - * Which RML Wrapper component to use, if any - * - NULL or "" = No wrapper - * - ow. select that specific wrapper component - */ - orte_rml_base_wrapper = NULL; - var_id = mca_base_var_register("orte", "rml", "base", "wrapper", - "Use a Wrapper component around the selected RML component", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_rml_base_wrapper); - (void) mca_base_var_register_synonym(var_id, "orte", "rml",NULL,"wrapper", 0); - #if OPAL_ENABLE_TIMING orte_rml_base.timing = false; (void) mca_base_var_register ("orte", "rml", "base", "timing", @@ -89,6 +90,17 @@ static int orte_rml_base_close(void) { bool active; + orte_rml_base_active_t *active_module; + + /*close the active modules */ + OPAL_LIST_FOREACH(active_module, &orte_rml_base.actives, orte_rml_base_active_t) + { + if (NULL != active_module->module->finalize) { + active_module->module->finalize(); + } + } + OPAL_LIST_DESTRUCT(&orte_rml_base.actives) + /* because the RML posted recvs list is in a separate * async thread for apps, we can't just destruct it here. * Instead, we push it into that event thread and destruct @@ -107,7 +119,6 @@ static int orte_rml_base_close(void) } OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml); - OBJ_DESTRUCT(&orte_rml_base.open_channels); return mca_base_framework_components_close(&orte_rml_base_framework, NULL); } @@ -115,13 +126,11 @@ static int orte_rml_base_close(void) static int orte_rml_base_open(mca_base_open_flag_t flags) { /* Initialize globals */ + /* construct object for holding the active plugin modules */ + OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t); - OBJ_CONSTRUCT(&orte_rml_base.open_channels, opal_pointer_array_t); - if (OPAL_SUCCESS != opal_pointer_array_init(&orte_rml_base.open_channels, 0, - INT_MAX, 1)) { - return ORTE_ERR_OUT_OF_RESOURCE; - } + OPAL_TIMING_INIT(&tm_rml); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); @@ -131,116 +140,80 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, rml, "ORTE Run-Time Messaging Layer", orte_rml_base_register, orte_rml_base_open, orte_rml_base_close, mca_rml_base_static_components, 0); +OBJ_CLASS_INSTANCE(orte_rml_base_active_t, + opal_list_item_t, + NULL, NULL); +/** + * Function for selecting one component(plugin) from all those that are + * available. + */ int orte_rml_base_select(void) { - opal_list_item_t *item, *next; - mca_base_component_list_item_t *cli; + mca_base_component_list_item_t *cli=NULL; + mca_base_component_t *component=NULL; + mca_base_module_t *module=NULL; + orte_rml_base_module_t *nmodule; + orte_rml_base_active_t *newmodule, *mod; + int priority; + bool inserted; - int selected_priority = -1; - orte_rml_component_t *selected_component = NULL; - orte_rml_module_t *selected_module = NULL; + if (selected) { + return ORTE_SUCCESS; + } + selected = true; - orte_rml_component_t *wrapper_component = NULL; - bool return_silent=false; + OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) { + component = (mca_base_component_t *) cli->cli_component; - if (selected) { - return ORTE_SUCCESS; - } - selected = true; + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "orte_rml_base_select: Initializing %s component %s", + component->mca_type_name, + component->mca_component_name); - OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) { - orte_rml_component_t* component; - component = (orte_rml_component_t *) cli->cli_component; + if (NULL == ((orte_rml_component_t *)component)->rml_init) { + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "orte_rml_base_select: no init function; ignoring component [%s]",component->mca_component_name); + } else { + module = (mca_base_module_t *) ((orte_rml_component_t *)component)->rml_init(&priority); + if (NULL == module) { + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "orte_rml_base_select: init returned failure [%s]",component->mca_component_name); + continue; + } - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "orte_rml_base_select: initializing %s component %s", - component->rml_version.mca_type_name, - component->rml_version.mca_component_name); + /* based on priority add it to the actives list */ + nmodule = (orte_rml_base_module_t*) module; + /* add to the list of selected modules */ + newmodule = OBJ_NEW(orte_rml_base_active_t); + newmodule->pri = priority; + newmodule->module = nmodule; + newmodule->component = component; - if (NULL == component->rml_init) { - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "orte_rml_base_select: no init function; ignoring component"); - } else { - int priority = 0; - - orte_rml_module_t* module = component->rml_init(&priority); - if (NULL == module) { - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "orte_rml_base_select: init returned failure"); - if (priority < 0) { - return_silent = true; - } - continue; - } - - if(NULL != orte_rml_base_wrapper && - /* If this is a wrapper component then save it for later */ - RML_SELECT_WRAPPER_PRIORITY >= priority) { - if( 0 == strncmp(component->rml_version.mca_component_name, - orte_rml_base_wrapper, - strlen(orte_rml_base_wrapper) ) ) { - wrapper_component = component; - } - } else if (priority > selected_priority) { - /* Otherwise this is a normal module and subject to normal selection */ - if (NULL != selected_module && NULL != selected_module->finalize) { - selected_module->finalize(); - } - selected_priority = priority; - selected_component = component; - selected_module = module; - } + /* maintain priority order */ + inserted = false; + OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) { + if (priority > mod->pri) { + opal_list_insert_pos(&orte_rml_base.actives, + (opal_list_item_t*)mod, &newmodule->super); + inserted = true; + break; + } + } + if (!inserted) { + /* must be lowest priority - add to end */ + opal_list_append(&orte_rml_base.actives, &newmodule->super); + } + } + } + if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) { + opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + /* show the prioritized list */ + OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) { + opal_output(0, "\tComponent: %s Priority: %d", mod->component->mca_component_name, mod->pri); } - } + } - /* - * Unload all components that were not selected - */ - OPAL_LIST_FOREACH_SAFE(item, next, &orte_rml_base_framework.framework_components, opal_list_item_t) { - mca_base_component_list_item_t *cli = (mca_base_component_list_item_t *) item; - orte_rml_component_t* component = (orte_rml_component_t *) cli->cli_component; - - /* Keep it if it is the wrapper component */ - if ((component == wrapper_component) || (component == selected_component)) { - continue; - } - /* Not the selected component */ - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "orte_rml_base_select: module %s unloaded", - component->rml_version.mca_component_name); - opal_list_remove_item(&orte_rml_base_framework.framework_components, item); - mca_base_component_repository_release((mca_base_component_t *) component); - OBJ_RELEASE(item); - } - - /* setup reference to selected module */ - if (NULL != selected_module) { - orte_rml = *selected_module; - orte_rml_component = selected_component; - } - - /* If a wrapper component was requested then - * Make sure it can switch out the selected module - */ - if( NULL != wrapper_component) { - wrapper_component->rml_init(NULL); - } - - if (NULL == selected_component) { - if (return_silent) { - return ORTE_ERR_SILENT; - } - return ORTE_ERROR; - } - /* Post a persistent recieve for open channel request */ - orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_OPEN_CHANNEL_REQ, - ORTE_RML_PERSISTENT, orte_rml_open_channel_recv_callback, - NULL); - /* post a persistent recieve for close channel request */ - orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_CLOSE_CHANNEL_REQ, - ORTE_RML_PERSISTENT, orte_rml_close_channel_recv_callback, - NULL); return ORTE_SUCCESS; } @@ -278,48 +251,16 @@ static void send_cons(orte_rml_send_t *ptr) ptr->iov = NULL; ptr->buffer = NULL; ptr->data = NULL; - ptr->channel = NULL; - ptr->dst_channel = ORTE_RML_INVALID_CHANNEL_NUM; ptr->seq_num = 0xFFFFFFFF; } OBJ_CLASS_INSTANCE(orte_rml_send_t, opal_list_item_t, send_cons, NULL); -static void channel_cons(orte_rml_channel_t *ptr) -{ - ptr->channel_num = ORTE_RML_INVALID_CHANNEL_NUM; - ptr->qos = NULL; - ptr->qos_channel_ptr = NULL; - ptr->recv = false; -} - -OBJ_CLASS_INSTANCE(orte_rml_channel_t, - opal_list_item_t, - channel_cons, NULL); - -static void open_channel_cons(orte_rml_open_channel_t *ptr) -{ - ptr->cbdata = NULL; - ptr->qos_attributes = NULL; -} -OBJ_CLASS_INSTANCE(orte_rml_open_channel_t, - opal_list_item_t, - open_channel_cons, NULL); - -static void close_channel_cons(orte_rml_close_channel_t *ptr) -{ - ptr->cbdata = NULL; - ptr->channel = NULL; -} -OBJ_CLASS_INSTANCE(orte_rml_close_channel_t, - opal_list_item_t, - close_channel_cons, NULL); static void send_req_cons(orte_rml_send_request_t *ptr) { - OBJ_CONSTRUCT(&ptr->post.send, orte_rml_send_t); - OBJ_CONSTRUCT(&ptr->post.open_channel, orte_rml_open_channel_t); + OBJ_CONSTRUCT(&ptr->send, orte_rml_send_t); } OBJ_CLASS_INSTANCE(orte_rml_send_request_t, opal_object_t, @@ -329,7 +270,6 @@ static void recv_cons(orte_rml_recv_t *ptr) { ptr->iov.iov_base = NULL; ptr->iov.iov_len = 0; - ptr->channel_num = ORTE_RML_INVALID_CHANNEL_NUM; } static void recv_des(orte_rml_recv_t *ptr) { diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 758bf9105f..02a3ef0995 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,7 +46,6 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/qos/base/base.h" static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all); @@ -143,11 +142,10 @@ void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg) * to retain ownership of it, so release whatever remains */ OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s message received bytes from %s for tag %d on channel=%d called callback", + "%s message received bytes from %s for tag %d called callback", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->sender), - msg->tag, - msg->channel_num)); + msg->tag)); OBJ_DESTRUCT(&buf); } else { /* deliver as an iovec */ @@ -180,11 +178,10 @@ void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg) * the message until such a recv is issued */ OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s message received bytes from %s for tag %d on channel=%d Not Matched adding to unmatched msgs", + "%s message received bytes from %s for tag %d Not Matched adding to unmatched msgs", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->sender), - msg->tag, - msg->channel_num)); + msg->tag)); opal_list_append(&orte_rml_base.unmatched_msgs, &msg->super); } @@ -227,41 +224,24 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) { orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata; OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s message received from %s for tag %d on channel=%d", + "%s message received from %s for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->sender), - msg->tag, - msg->channel_num)); + msg->tag)); OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); - if ((ORTE_RML_INVALID_CHANNEL_NUM != msg->channel_num) && - (NULL != orte_rml_base_get_channel(msg->channel_num) )) { - - // call channel for recv post processing - if (ORTE_SUCCESS != (orte_rml_base_process_recv_channel (orte_rml_base_get_channel(msg->channel_num), msg))) - { - /* the qos channel has determined an error so we cannot complete this msg to the caller */ - OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s QoS channel receive error - cannot complete msg on channel=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - msg->channel_num)); - return; - } - - } - orte_rml_base_complete_recv_msg (&msg); + orte_rml_base_complete_recv_msg(&msg); } void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata) { orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata; OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s reprocessing msg received from %s for tag %d on channel=%d", + "%s reprocessing msg received from %s for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->sender), - msg->tag, - msg->channel_num)); + msg->tag)); OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); diff --git a/orte/mca/rml/base/rml_base_stubs.c b/orte/mca/rml/base/rml_base_stubs.c new file mode 100644 index 0000000000..00b241b5bc --- /dev/null +++ b/orte/mca/rml/base/rml_base_stubs.c @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include + +#include "opal/dss/dss.h" +#include "orte/mca/mca.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "opal/util/output.h" + +#include "orte/mca/rml/rml.h" +#include "orte/mca/state/state.h" +#include "orte/runtime/orte_wait.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/rml/base/base.h" + +/* + * The stub API interface functions + */ + +/** Enable communication once a process name has been assigned */ +int orte_rml_API_enable_comm(void) +{ + orte_rml_base_active_t *active, *next; + int rc; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:enable_comm", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and let each one enable their comm */ + OPAL_LIST_FOREACH_SAFE(active, next, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->enable_comm) { + if (ORTE_SUCCESS != (rc = active->module->enable_comm())) { + opal_output_verbose(2, orte_rml_base_framework.framework_output, + "%s rml:base:enable_comm Component %s was unable to enable comm", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + active->component->mca_component_name); + /* remove this component from our actives */ + opal_list_remove_item(&orte_rml_base.actives, &active->super); + /* give the module a chance to finalize */ + if (NULL != active->module->finalize) { + active->module->finalize(); + } + OBJ_RELEASE(active); + } + } + } + /* ensure we still have someone */ + if (0 < opal_list_get_size(&orte_rml_base.actives)) { + return ORTE_SUCCESS; + } + return ORTE_ERR_UNREACH; +} + +/** Shutdown the communication system and clean up resources */ +void orte_rml_API_finalize(void) +{ + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:finalize()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and see who can send it */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->finalize) { + active->module->finalize(); + } + } +} + +/** Get contact information for local process */ +char* orte_rml_API_get_contact_info(void) +{ + char **rc = NULL, *tmp; + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:get_contact_info()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and see who can send it */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->get_contact_info) { + tmp = active->module->get_contact_info(); + if (NULL != tmp) { + opal_argv_append_nosize(&rc, tmp); + free(tmp); + } + } + } + if (NULL != rc) { + tmp = opal_argv_join(rc, ';'); + } else { + tmp = NULL; + } + return tmp; +} + +/** Set contact information for remote process */ +void orte_rml_API_set_contact_info(const char *contact_info) +{ + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:set_contact_info()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and let all modules parse the info + * to extract their relevant portions */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->set_contact_info) { + active->module->set_contact_info(contact_info); + } + } +} + +/** Ping process for connectivity check */ +int orte_rml_API_ping(const char* contact_info, + const struct timeval* tv) +{ + int rc = ORTE_ERR_UNREACH; + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:ping()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and see if anyone can confirm connection */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->ping) { + rc = active->module->ping(contact_info, tv); + if (ORTE_SUCCESS == rc) { + /* at least someone can reach this target */ + break; + } + } + } + return rc; +} + +/** Send non-blocking iovec message */ +int orte_rml_API_send_nb(orte_process_name_t* peer, + struct iovec* msg, + int count, + orte_rml_tag_t tag, + orte_rml_callback_fn_t cbfunc, + void* cbdata) +{ + int rc = ORTE_ERR_UNREACH; + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:send_nb() to peer %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer))); + + /* cycle thru the actives and see who can send it */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->send_nb) { + rc = active->module->send_nb(peer, msg, count, tag, cbfunc, cbdata); + if (ORTE_SUCCESS == rc) { + /* someone was able to send it */ + break; + } + } + } + return rc; +} + +/** Send non-blocking buffer message */ +int orte_rml_API_send_buffer_nb(orte_process_name_t* peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata) +{ + int rc = ORTE_ERR_UNREACH; + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:send_buffer_nb()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and see who can send it */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->send_buffer_nb) { + if (ORTE_SUCCESS == (rc = active->module->send_buffer_nb(peer, buffer, tag, cbfunc, cbdata))) { + break; + } + } + } + return rc; +} + + +/** post a receive for an IOV message */ +void orte_rml_API_recv_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_base_active_t *active; + orte_rml_recv_request_t *req; + + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_recv_nb for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag)); + + /* cycle thru the actives and give each module a chance + * to do whatever module-specific things they need to do */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->recv_nb) { + active->module->recv_nb(peer, tag, persistent, cbfunc, cbdata); + } + } + /* now push the request into the event base so we can add + * the receive to our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->post->buffer_data = false; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + req->post->persistent = persistent; + req->post->cbfunc.iov = cbfunc; + req->post->cbdata = cbdata; + opal_event_set(orte_event_base, &req->ev, -1, + OPAL_EV_WRITE, + orte_rml_base_post_recv, req); + opal_event_set_priority(&req->ev, ORTE_MSG_PRI); + opal_event_active(&req->ev, OPAL_EV_WRITE, 1); +} + +/** Receive non-blocking buffer message */ +void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_base_active_t *active; + orte_rml_recv_request_t *req; + + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_recv_buffer_nb for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag)); + + /* cycle thru the actives and give each module a chance + * to do whatever module-specific things they need to do */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->recv_buffer_nb) { + active->module->recv_buffer_nb(peer, tag, persistent, cbfunc, cbdata); + } + } + + /* now push the request into the event base so we can add + * the receive to our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->post->buffer_data = true; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + req->post->persistent = persistent; + req->post->cbfunc.buffer = cbfunc; + req->post->cbdata = cbdata; + opal_event_set(orte_event_base, &req->ev, -1, + OPAL_EV_WRITE, + orte_rml_base_post_recv, req); + opal_event_set_priority(&req->ev, ORTE_MSG_PRI); + opal_event_active(&req->ev, OPAL_EV_WRITE, 1); +} + +/** Cancel posted non-blocking receive */ +void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) +{ + orte_rml_base_active_t *active; + orte_rml_recv_request_t *req; + + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_recv_cancel for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag)); + + /* cycle thru the actives and give each module a chance + * to do whatever module-specific things they need to do */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->recv_cancel) { + active->module->recv_cancel(peer,tag); + } + } + + /* now push the request into the event base so we can remove + * the receive from our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->cancel = true; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + opal_event_set(orte_event_base, &req->ev, -1, + OPAL_EV_WRITE, + orte_rml_base_post_recv, req); + opal_event_set_priority(&req->ev, ORTE_MSG_PRI); + opal_event_active(&req->ev, OPAL_EV_WRITE, 1); +} + +/** Add callback for communication exception */ +int orte_rml_API_add_exception_handler(orte_rml_exception_callback_t cbfunc) +{ + int rc = ORTE_ERROR; + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:add_exception_handler()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and see who can send it */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->add_exception_handler) { + if (ORTE_SUCCESS == (rc = active->module->add_exception_handler(cbfunc))) { + break; + } + } + } + return rc; +} + +/** Delete callback for communication exception */ +int orte_rml_API_del_exception_handler(orte_rml_exception_callback_t cbfunc) +{ + int rc = ORTE_ERROR; + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:del_exception_handler()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and see who can send it */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->del_exception_handler) { + if (ORTE_SUCCESS == (rc = active->module->del_exception_handler(cbfunc))) { + break; + } + } + } + return rc; +} + +/** Fault tolerance handler */ +int orte_rml_API_ft_event(int state) +{ + int rc = ORTE_ERROR; + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:ft_event()", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and let them all handle this event */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->ft_event) { + if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) { + break; + } + } + } + return rc; +} + + +/** Purge information */ +void orte_rml_API_purge(orte_process_name_t *peer) +{ + orte_rml_base_active_t *active; + + OPAL_OUTPUT_VERBOSE((1,orte_rml_base_framework.framework_output, + "%s rml:base:purge() - calling the respective plugin that implements this", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* cycle thru the actives and let everyone purge related info */ + OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { + if (NULL != active->module->purge) { + active->module->purge(peer); + } + } +} diff --git a/orte/mca/rml/ftrm/Makefile.am b/orte/mca/rml/ftrm/Makefile.am deleted file mode 100644 index 55e33975ad..0000000000 --- a/orte/mca/rml/ftrm/Makefile.am +++ /dev/null @@ -1,45 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - rml_ftrm.h \ - rml_ftrm_component.c \ - rml_ftrm_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_rml_ftrm_DSO -component_noinst = -component_install = mca_rml_ftrm.la -else -component_noinst = libmca_rml_ftrm.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_rml_ftrm_la_SOURCES = $(sources) -mca_rml_ftrm_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_rml_ftrm_la_SOURCES = $(sources) -libmca_rml_ftrm_la_LDFLAGS = -module -avoid-version - diff --git a/orte/mca/rml/ftrm/configure.m4 b/orte/mca/rml/ftrm/configure.m4 deleted file mode 100644 index b3a41dfed1..0000000000 --- a/orte/mca/rml/ftrm/configure.m4 +++ /dev/null @@ -1,28 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_rml_ftrm_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_rml_ftrm_CONFIG],[ - AC_CONFIG_FILES([orte/mca/rml/ftrm/Makefile]) - - # If we don't want FT, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1"], - [$1], - [$2]) -])dnl diff --git a/orte/mca/rml/ftrm/owner.txt b/orte/mca/rml/ftrm/owner.txt deleted file mode 100644 index 9e43c5910a..0000000000 --- a/orte/mca/rml/ftrm/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ? -status: unmaintained diff --git a/orte/mca/rml/ftrm/rml_ftrm.h b/orte/mca/rml/ftrm/rml_ftrm.h deleted file mode 100644 index 01cf92ead0..0000000000 --- a/orte/mca/rml/ftrm/rml_ftrm.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * ORTE RML Fault Tolerance Wrapper - Ready Message Protocol (FTRM) - * - * @file - */ -#ifndef MCA_RML_FTRM_H -#define MCA_RML_FTRM_H - -#include "orte_config.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" - -BEGIN_C_DECLS - - extern int rml_ftrm_output_handle; - - /* - * Component Information - */ - ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ftrm_component; - ORTE_MODULE_DECLSPEC extern orte_rml_module_t orte_rml_ftrm_module; - - ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ftrm_wrapped_component; - ORTE_MODULE_DECLSPEC extern orte_rml_module_t orte_rml_ftrm_wrapped_module; - - /* - * Init (Component) - */ - orte_rml_module_t* orte_rml_ftrm_component_init(int *priority); - - /* - * Init (Module) - */ - int orte_rml_ftrm_module_enable_comm(void); - - /* - * Finalize (Module) - */ - int orte_rml_ftrm_module_finalize(void); - - /* - * Get URI - */ - char * orte_rml_ftrm_get_contact_info(void); - - /* - * Set URI - */ - void orte_rml_ftrm_set_contact_info(const char* uri); - - /* - * Ping - */ - int orte_rml_ftrm_ping(const char* uri, const struct timeval* tv); - - /* - * Send Non-blocking - */ - int orte_rml_ftrm_send_nb(orte_process_name_t* peer, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - - /* - * Send Buffer Non-blocking - */ - int orte_rml_ftrm_send_buffer_nb(orte_process_name_t* peer, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - - /* - * Recv Non-blocking - */ - void orte_rml_ftrm_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - - /* - * Recv Buffer Non-blocking - */ - void orte_rml_ftrm_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - - /* - * Recv Cancel - */ - void orte_rml_ftrm_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag); - - /* - * Register a callback on loss of connection - */ - int orte_rml_ftrm_add_exception_handler(orte_rml_exception_callback_t cbfunc); - int orte_rml_ftrm_del_exception_handler(orte_rml_exception_callback_t cbfunc); - - /* - * FT Event - */ - int orte_rml_ftrm_ft_event(int state); - - void orte_rml_ftrm_purge(orte_process_name_t *peer); - -END_C_DECLS - -#endif diff --git a/orte/mca/rml/ftrm/rml_ftrm_component.c b/orte/mca/rml/ftrm/rml_ftrm_component.c deleted file mode 100644 index 17ea19cf87..0000000000 --- a/orte/mca/rml/ftrm/rml_ftrm_component.c +++ /dev/null @@ -1,183 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" - - -#include "orte/mca/rml/base/base.h" - -#include "rml_ftrm.h" - - -static int orte_rml_ftrm_register(void); -static int orte_rml_ftrm_open(void); -static int orte_rml_ftrm_close(void); - -/** - * Component definition - */ -orte_rml_component_t mca_rml_ftrm_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - .rml_version = { - ORTE_RML_BASE_VERSION_2_0_0, - - .mca_component_name = "ftrm", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - .mca_open_component = orte_rml_ftrm_open, - .mca_close_component = orte_rml_ftrm_close, - .mca_register_component_params = orte_rml_ftrm_register, - }, - .rml_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .rml_init = orte_rml_ftrm_component_init, -}; - -orte_rml_module_t orte_rml_ftrm_module = { - orte_rml_ftrm_module_enable_comm, - orte_rml_ftrm_module_finalize, - - orte_rml_ftrm_get_contact_info, - orte_rml_ftrm_set_contact_info, - - orte_rml_ftrm_ping, - - orte_rml_ftrm_send_nb, - orte_rml_ftrm_send_buffer_nb, - - orte_rml_ftrm_recv_nb, - orte_rml_ftrm_recv_buffer_nb, - orte_rml_ftrm_recv_cancel, - - orte_rml_ftrm_add_exception_handler, - orte_rml_ftrm_del_exception_handler, - - orte_rml_ftrm_ft_event, - orte_rml_ftrm_purge, -}; - -int rml_ftrm_output_handle; - -static int ftrm_priority = -1; -static int ftrm_verbosity; - -/* - * Initalize the wrapper component - */ -orte_rml_module_t* orte_rml_ftrm_component_init(int* priority) -{ - /* - * Asked to return a priority - */ - if( NULL != priority ) { - *priority = ftrm_priority; - return &orte_rml_ftrm_module; - } - /* - * Called a second time to swap module pointers - */ - else { - /* Copy the wrapped versions */ - orte_rml_ftrm_wrapped_module = orte_rml; - mca_rml_ftrm_wrapped_component = *orte_rml_component; - /* Replace with ourselves */ - orte_rml = orte_rml_ftrm_module; - orte_rml_component = &mca_rml_ftrm_component; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: component_init(): Wrapped Component (%s)", - mca_rml_ftrm_wrapped_component.rml_version.mca_component_name); - - return NULL; - } -} - -static int orte_rml_ftrm_register(void) -{ -#if OPAL_ENABLE_FT_CR != 1 - return ORTE_ERR_NOT_AVAILABLE; -#endif - - ftrm_priority = RML_SELECT_WRAPPER_PRIORITY; - (void) mca_base_component_var_register(&mca_rml_ftrm_component.rml_version, - "priority", - "Priority of the RML ftrm component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ftrm_priority); - /* Enable this wrapper = RML_SELECT_WRAPPER_PRIORITY - * ow = -1 or never selected - */ - ftrm_verbosity = 0; - (void) mca_base_component_var_register(&mca_rml_ftrm_component.rml_version, - "verbose", - "Verbose level for the RML ftrm component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ftrm_verbosity); - return ORTE_SUCCESS; -} - -/* - * Initalize the structures upon opening - */ -static int orte_rml_ftrm_open(void) -{ -#if OPAL_ENABLE_FT_CR != 1 - return ORTE_ERR_NOT_AVAILABLE; -#endif - - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != ftrm_verbosity ) { - rml_ftrm_output_handle = opal_output_open(NULL); - opal_output_set_verbosity(rml_ftrm_output_handle, ftrm_verbosity); - } else { - rml_ftrm_output_handle = -1; - } - - opal_output_verbose(10, rml_ftrm_output_handle, - "orte_rml_ftrm: open(): Priority = %d", ftrm_priority); - opal_output_verbose(10, rml_ftrm_output_handle, - "orte_rml_ftrm: open(): Verbosity = %d", ftrm_verbosity); - - return ORTE_SUCCESS; -} - -/* - * Finalize the remaining structures upon close - */ -static int orte_rml_ftrm_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/rml/ftrm/rml_ftrm_module.c b/orte/mca/rml/ftrm/rml_ftrm_module.c deleted file mode 100644 index 5471d415bb..0000000000 --- a/orte/mca/rml/ftrm/rml_ftrm_module.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/util/name_fns.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" - -#include "orte/mca/rml/base/base.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/rml/rml.h" - -#include "rml_ftrm.h" - -orte_rml_component_t mca_rml_ftrm_wrapped_component; -orte_rml_module_t orte_rml_ftrm_wrapped_module; - -/* - * Init (Module) - */ - -int orte_rml_ftrm_module_enable_comm(void) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: module_init(): Normal..."); - - if( NULL != orte_rml_ftrm_wrapped_module.enable_comm ) { - if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.enable_comm() ) ) { - return ret; - } - } - - return ORTE_SUCCESS; -} - -/* - * Finalize (Module) - */ -int orte_rml_ftrm_module_finalize(void) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: module_finalize()"); - - - if( NULL != orte_rml_ftrm_wrapped_module.finalize ) { - if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.finalize() ) ) { - return ret; - } - } - - return ORTE_SUCCESS; -} - - -/* - * Get URI - */ -char * orte_rml_ftrm_get_contact_info(void) -{ - char * rtn_val = NULL; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: get_uri()"); - - - if( NULL != orte_rml_ftrm_wrapped_module.get_contact_info ) { - rtn_val = orte_rml_ftrm_wrapped_module.get_contact_info(); - } - - return rtn_val; -} - -/* - * Set CONTACT_INFO - */ -void orte_rml_ftrm_set_contact_info(const char* contact_info) -{ - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: set_contact_info()"); - - if( NULL != orte_rml_ftrm_wrapped_module.set_contact_info ) { - orte_rml_ftrm_wrapped_module.set_contact_info(contact_info); - } -} - - -/* - * Ping - */ -int orte_rml_ftrm_ping(const char* uri, const struct timeval* tv) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: ping()"); - - if( NULL != orte_rml_ftrm_wrapped_module.ping ) { - if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.ping(uri, tv) ) ) { - return ret; - } - } - - return ORTE_SUCCESS; -} - - -/* - * Send Non-blocking - */ -int orte_rml_ftrm_send_nb(orte_process_name_t* peer, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: send_nb(%s, %d, %d )", - ORTE_NAME_PRINT(peer), count, tag); - - if( NULL != orte_rml_ftrm_wrapped_module.send_nb ) { - if(ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send_nb(peer, msg, count, tag, cbfunc, cbdata))) { - return ret; - } - } - - return ORTE_SUCCESS; -} - -/* - * Send Buffer Non-blocking - */ -int orte_rml_ftrm_send_buffer_nb(orte_process_name_t* peer, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: send_buffer_nb(%s, %d )", - ORTE_NAME_PRINT(peer), tag); - - if( NULL != orte_rml_ftrm_wrapped_module.send_buffer_nb ) { - if(ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send_buffer_nb(peer, buffer, tag, cbfunc, cbdata))) { - return ret; - } - } - - return ORTE_SUCCESS; -} - - - -/* - * Recv Non-blocking - */ -void orte_rml_ftrm_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: recv_nb(%s, %d, %d )", - ORTE_NAME_PRINT(peer), tag, persistent); - - if( NULL != orte_rml_ftrm_wrapped_module.recv_nb ) { - orte_rml_ftrm_wrapped_module.recv_nb(peer, tag, persistent, cbfunc, cbdata); - } -} - -/* - * Recv Buffer Non-blocking - */ -void orte_rml_ftrm_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: recv_buffer_nb(%s, %d, %d)", - ORTE_NAME_PRINT(peer), tag, persistent); - - if( NULL != orte_rml_ftrm_wrapped_module.recv_buffer_nb ) { - orte_rml_ftrm_wrapped_module.recv_buffer_nb(peer, tag, persistent, cbfunc, cbdata); - } -} - -/* - * Recv Cancel - */ -void orte_rml_ftrm_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) -{ - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: recv_cancel()"); - - if( NULL != orte_rml_ftrm_wrapped_module.recv_cancel ) { - orte_rml_ftrm_wrapped_module.recv_cancel(peer, tag); - } -} - - -/* - * Register a callback on loss of connection - */ -int orte_rml_ftrm_add_exception_handler(orte_rml_exception_callback_t cbfunc) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: add_exception_handler()"); - - if( NULL != orte_rml_ftrm_wrapped_module.add_exception_handler ) { - if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.add_exception_handler(cbfunc) ) ) { - return ret; - } - } - - return ORTE_SUCCESS; -} - -int orte_rml_ftrm_del_exception_handler(orte_rml_exception_callback_t cbfunc) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: del_exception_handler()"); - - if( NULL != orte_rml_ftrm_wrapped_module.del_exception_handler ) { - if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.del_exception_handler(cbfunc) ) ) { - return ret; - } - } - - return ORTE_SUCCESS; -} - -/* - * FT Event - */ -int orte_rml_ftrm_ft_event(int state) -{ - int ret; - - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: ft_event()"); - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* - * The wrapped component is responsible for calling the OOB modules - */ - if( NULL != orte_rml_ftrm_wrapped_module.ft_event ) { - if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.ft_event(state))) { - return ret; - } - } - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return ORTE_SUCCESS; -} - -void orte_rml_ftrm_purge(orte_process_name_t *peer) -{ - opal_output_verbose(20, rml_ftrm_output_handle, - "orte_rml_ftrm: purge()"); - - if( NULL != orte_rml_ftrm_wrapped_module.purge ) { - orte_rml_ftrm_wrapped_module.purge(peer); - } -} diff --git a/orte/mca/rml/oob/Makefile.am b/orte/mca/rml/oob/Makefile.am index f2fa64e0b4..1618220dc3 100644 --- a/orte/mca/rml/oob/Makefile.am +++ b/orte/mca/rml/oob/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,7 +24,6 @@ sources = \ rml_oob_contact.c \ rml_oob_exception.c \ rml_oob_ping.c \ - rml_oob_recv.c \ rml_oob_send.c # Make the output library in this directory, and name it either diff --git a/orte/mca/rml/oob/rml_oob.h b/orte/mca/rml/oob/rml_oob.h index 6c7741ac39..8b465a175a 100644 --- a/orte/mca/rml/oob/rml_oob.h +++ b/orte/mca/rml/oob/rml_oob.h @@ -14,7 +14,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 -2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +37,7 @@ BEGIN_C_DECLS typedef struct { - struct orte_rml_module_t super; + struct orte_rml_base_module_t super; opal_list_t exceptions; opal_list_t queued_routing_messages; opal_event_t *timer_event; @@ -48,7 +48,7 @@ ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component; extern orte_rml_oob_module_t orte_rml_oob_module; int orte_rml_oob_init(void); -int orte_rml_oob_fini(void); +void orte_rml_oob_fini(void); int orte_rml_oob_ft_event(int state); int orte_rml_oob_send_nb(orte_process_name_t* peer, @@ -64,43 +64,6 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, orte_rml_buffer_callback_fn_t cbfunc, void* cbdata); -void orte_rml_oob_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - -void orte_rml_oob_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - -void orte_rml_oob_recv_cancel(orte_process_name_t* peer, - orte_rml_tag_t tag); - -int orte_rml_oob_open_channel(orte_process_name_t * peer, - opal_list_t * qos_attributes, - orte_rml_channel_callback_fn_t cbfunc, - void *cbdata); - -int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_send_channel_callback_fn_t cbfunc, - void* cbdata); - -int orte_rml_oob_send_buffer_channel_nb (orte_rml_channel_num_t channel, - opal_buffer_t *buffer, - orte_rml_tag_t tag, - orte_rml_send_buffer_channel_callback_fn_t cbfunc, - void* cbdata); - -int orte_rml_oob_close_channel (orte_rml_channel_num_t channel, - orte_rml_channel_callback_fn_t cbfunc, - void* cbdata); - int orte_rml_oob_ping(const char* uri, const struct timeval* tv); diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index 93a0e14ca7..0cf7dc8d4c 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,7 +51,7 @@ #include "orte/mca/oob/base/base.h" #include "rml_oob.h" -static orte_rml_module_t* rml_oob_init(int* priority); +static orte_rml_base_module_t* rml_oob_init(int* priority); static int rml_oob_open(void); static int rml_oob_close(void); @@ -80,7 +80,6 @@ orte_rml_component_t mca_rml_oob_component = { orte_rml_oob_module_t orte_rml_oob_module = { { - .enable_comm = orte_rml_oob_init, .finalize = orte_rml_oob_fini, .get_contact_info = orte_rml_oob_get_uri, @@ -91,20 +90,10 @@ orte_rml_oob_module_t orte_rml_oob_module = { .send_nb = orte_rml_oob_send_nb, .send_buffer_nb = orte_rml_oob_send_buffer_nb, - .recv_nb = orte_rml_oob_recv_nb, - .recv_buffer_nb = orte_rml_oob_recv_buffer_nb, - - .recv_cancel = orte_rml_oob_recv_cancel, - .add_exception_handler = orte_rml_oob_add_exception, .del_exception_handler = orte_rml_oob_del_exception, .ft_event = orte_rml_oob_ft_event, - .purge = orte_rml_oob_purge, - - .open_channel = orte_rml_oob_open_channel, - .send_channel_nb = orte_rml_oob_send_channel_nb, - .send_buffer_channel_nb = orte_rml_oob_send_buffer_channel_nb, - .close_channel = orte_rml_oob_close_channel + .purge = orte_rml_oob_purge } }; @@ -124,7 +113,8 @@ rml_oob_close(void) return ORTE_SUCCESS; } -static orte_rml_module_t* + +static orte_rml_base_module_t* rml_oob_init(int* priority) { if (init_done) { @@ -150,7 +140,7 @@ orte_rml_oob_init(void) } -int +void orte_rml_oob_fini(void) { opal_list_item_t *item; @@ -163,8 +153,6 @@ orte_rml_oob_fini(void) /* clear the base receive */ orte_rml_base_comm_stop(); - - return ORTE_SUCCESS; } #if OPAL_ENABLE_FT_CR == 1 diff --git a/orte/mca/rml/oob/rml_oob_recv.c b/orte/mca/rml/oob/rml_oob_recv.c deleted file mode 100644 index c510b7a62e..0000000000 --- a/orte/mca/rml/oob/rml_oob_recv.c +++ /dev/null @@ -1,99 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "orte/mca/rml/base/base.h" - -#include "rml_oob.h" - -void orte_rml_oob_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_request_t *req; - - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_recv_nb for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag)); - - req = OBJ_NEW(orte_rml_recv_request_t); - req->post->buffer_data = false; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - req->post->persistent = persistent; - req->post->cbfunc.iov = cbfunc; - req->post->cbdata = cbdata; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); -} - - -void orte_rml_oob_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_request_t *req; - - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_recv_buffer_nb for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag)); - - req = OBJ_NEW(orte_rml_recv_request_t); - req->post->buffer_data = true; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - req->post->persistent = persistent; - req->post->cbfunc.buffer = cbfunc; - req->post->cbdata = cbdata; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); -} - - -void orte_rml_oob_recv_cancel(orte_process_name_t* peer, - orte_rml_tag_t tag) -{ - orte_rml_recv_request_t *req; - - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_recv_cancel for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag)); - - req = OBJ_NEW(orte_rml_recv_request_t); - req->cancel = true; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); -} diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 92ebe89166..8b7f385402 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +34,7 @@ #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/rml_types.h" #include "rml_oob.h" -#include "orte/mca/qos/base/base.h" + typedef struct { opal_object_t object; opal_event_t ev; @@ -95,8 +95,8 @@ static void send_self_exe(int fd, short args, void* data) static void send_msg(int fd, short args, void *cbdata) { orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata; - orte_process_name_t *peer = &(req->post.send.dst); - orte_rml_tag_t tag = req->post.send.tag; + orte_process_name_t *peer = &(req->send.dst); + orte_rml_tag_t tag = req->send.tag; orte_rml_recv_t *rcv; orte_rml_send_t *snd; int bytes; @@ -135,16 +135,16 @@ static void send_msg(int fd, short args, void *cbdata) /* setup the send callback */ xfer = OBJ_NEW(orte_self_send_xfer_t); - if (NULL != req->post.send.iov) { - xfer->iov = req->post.send.iov; - xfer->count = req->post.send.count; - xfer->cbfunc.iov = req->post.send.cbfunc.iov; + if (NULL != req->send.iov) { + xfer->iov = req->send.iov; + xfer->count = req->send.count; + xfer->cbfunc.iov = req->send.cbfunc.iov; } else { - xfer->buffer = req->post.send.buffer; - xfer->cbfunc.buffer = req->post.send.cbfunc.buffer; + xfer->buffer = req->send.buffer; + xfer->cbfunc.buffer = req->send.cbfunc.buffer; } xfer->tag = tag; - xfer->cbdata = req->post.send.cbdata; + xfer->cbdata = req->send.cbdata; /* setup the event for the send callback */ opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); @@ -154,11 +154,11 @@ static void send_msg(int fd, short args, void *cbdata) rcv = OBJ_NEW(orte_rml_recv_t); rcv->sender = *peer; rcv->tag = tag; - if (NULL != req->post.send.iov) { + if (NULL != req->send.iov) { /* get the total number of bytes in the iovec array */ bytes = 0; - for (i = 0 ; i < req->post.send.count ; ++i) { - bytes += req->post.send.iov[i].iov_len; + for (i = 0 ; i < req->send.count ; ++i) { + bytes += req->send.iov[i].iov_len; } /* get the required memory allocation */ if (0 < bytes) { @@ -166,15 +166,15 @@ static void send_msg(int fd, short args, void *cbdata) rcv->iov.iov_len = bytes; /* transfer the bytes */ ptr = (char*)rcv->iov.iov_base; - for (i = 0 ; i < req->post.send.count ; ++i) { - memcpy(ptr, req->post.send.iov[i].iov_base, req->post.send.iov[i].iov_len); - ptr += req->post.send.iov[i].iov_len; + for (i = 0 ; i < req->send.count ; ++i) { + memcpy(ptr, req->send.iov[i].iov_base, req->send.iov[i].iov_len); + ptr += req->send.iov[i].iov_len; } } - } else if (0 < req->post.send.buffer->bytes_used) { - rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->post.send.buffer->bytes_used); - memcpy(rcv->iov.iov_base, req->post.send.buffer->base_ptr, req->post.send.buffer->bytes_used); - rcv->iov.iov_len = req->post.send.buffer->bytes_used; + } else if (0 < req->send.buffer->bytes_used) { + rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->send.buffer->bytes_used); + memcpy(rcv->iov.iov_base, req->send.buffer->base_ptr, req->send.buffer->bytes_used); + rcv->iov.iov_len = req->send.buffer->bytes_used; } /* post the message for receipt - since the send callback was posted * first and has the same priority, it will execute first @@ -188,32 +188,21 @@ static void send_msg(int fd, short args, void *cbdata) snd->dst = *peer; snd->origin = *ORTE_PROC_MY_NAME; snd->tag = tag; - if (NULL != req->post.send.iov) { - snd->iov = req->post.send.iov; - snd->count = req->post.send.count; - snd->cbfunc.iov = req->post.send.cbfunc.iov; + if (NULL != req->send.iov) { + snd->iov = req->send.iov; + snd->count = req->send.count; + snd->cbfunc.iov = req->send.cbfunc.iov; } else { - snd->buffer = req->post.send.buffer; - snd->cbfunc.buffer = req->post.send.cbfunc.buffer; - } - snd->cbdata = req->post.send.cbdata; - snd->channel = req->post.send.channel; - /* call send prep to prep the Qos channel for send */ - if (NULL != snd->channel) - { - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s send_msg sending on channel %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), snd->channel->channel_num)); - orte_rml_base_prep_send_channel (snd->channel, snd); + snd->buffer = req->send.buffer; + snd->cbfunc.buffer = req->send.cbfunc.buffer; } + snd->cbdata = req->send.cbdata; /* activate the OOB send state */ ORTE_OOB_SEND(snd); OBJ_RELEASE(req); } - - int orte_rml_oob_send_nb(orte_process_name_t* peer, struct iovec* iov, int count, @@ -233,8 +222,8 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } - if( NULL == peer || - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { + if (NULL == peer || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) { /* cannot send to an invalid peer */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; @@ -243,12 +232,12 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer, * race conditions and threads */ req = OBJ_NEW(orte_rml_send_request_t); - req->post.send.dst = *peer; - req->post.send.iov = iov; - req->post.send.count = count; - req->post.send.tag = tag; - req->post.send.cbfunc.iov = cbfunc; - req->post.send.cbdata = cbdata; + req->send.dst = *peer; + req->send.iov = iov; + req->send.count = count; + req->send.tag = tag; + req->send.cbfunc.iov = cbfunc; + req->send.cbdata = cbdata; /* setup the event for the send callback */ opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); opal_event_set_priority(&req->ev, ORTE_MSG_PRI); @@ -257,7 +246,6 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer, return ORTE_SUCCESS; } - int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, opal_buffer_t* buffer, orte_rml_tag_t tag, @@ -276,8 +264,8 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } - if( NULL == peer || - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { + if (NULL == peer || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) { /* cannot send to an invalid peer */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; @@ -286,11 +274,11 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, * race conditions and threads */ req = OBJ_NEW(orte_rml_send_request_t); - req->post.send.dst = *peer; - req->post.send.buffer = buffer; - req->post.send.tag = tag; - req->post.send.cbfunc.buffer = cbfunc; - req->post.send.cbdata = cbdata; + req->send.dst = *peer; + req->send.buffer = buffer; + req->send.tag = tag; + req->send.cbfunc.buffer = cbfunc; + req->send.cbdata = cbdata; /* setup the event for the send callback */ opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); opal_event_set_priority(&req->ev, ORTE_MSG_PRI); @@ -298,145 +286,3 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, return ORTE_SUCCESS; } - -int orte_rml_oob_open_channel(orte_process_name_t * peer, - opal_list_t *qos_attributes, - orte_rml_channel_callback_fn_t cbfunc, - void *cbdata) -{ - orte_rml_send_request_t *req; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel to peer %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - if( NULL == peer || - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { - /* cannot send to an invalid peer */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* process the request in an event to be safe */ - req = OBJ_NEW(orte_rml_send_request_t); - req->post.open_channel.dst = *peer; - req->post.open_channel.qos_attributes = qos_attributes; - req->post.open_channel.cbfunc = cbfunc; - req->post.open_channel.cbdata = cbdata; - /* setup the event for the open callback */ - opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, orte_rml_base_open_channel, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel to peer %s - set event done", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - return ORTE_SUCCESS; -} - -int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel_num, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_send_channel_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_send_request_t *req; - orte_rml_channel_t *channel; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_send_buffer to channel %d at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - channel_num, tag)); - - if (ORTE_RML_TAG_INVALID == tag) { - /* cannot send to an invalid tag */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - channel = (orte_rml_channel_t*) orte_rml_base_get_channel (channel_num); - if (NULL == channel) { - /* cannot send to a non existing or closed channel */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* get ourselves into an event to protect against - * race conditions and threads - */ - req = OBJ_NEW(orte_rml_send_request_t); - req->post.send.dst = channel->peer; - req->post.send.iov = msg; - req->post.send.count = count; - req->post.send.tag = tag; - req->post.send.cbfunc.iov_chan = cbfunc; - req->post.send.cbdata = cbdata; - req->post.send.channel = channel; - /* setup the event for the send callback */ - opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); - return ORTE_SUCCESS; -} - -int orte_rml_oob_send_buffer_channel_nb (orte_rml_channel_num_t channel_num, - opal_buffer_t *buffer, - orte_rml_tag_t tag, - orte_rml_send_buffer_channel_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_send_request_t *req; - orte_rml_channel_t *channel; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_send_buffer to channel %d at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - channel_num, tag)); - - if (ORTE_RML_TAG_INVALID == tag) { - /* cannot send to an invalid tag */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - channel = (orte_rml_channel_t*) orte_rml_base_get_channel (channel_num); - if (NULL == channel) { - /* cannot send to a non existing or closed channel */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* get ourselves into an event to protect against - * race conditions and threads - */ - req = OBJ_NEW(orte_rml_send_request_t); - req->post.send.dst = channel->peer; - req->post.send.buffer = buffer; - req->post.send.tag = tag; - req->post.send.cbfunc.buf_chan = cbfunc; - req->post.send.cbdata = cbdata; - req->post.send.channel = channel; - /* setup the event for the send callback */ - opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); - return ORTE_SUCCESS; -} - -int orte_rml_oob_close_channel (orte_rml_channel_num_t channel_num, - orte_rml_channel_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_channel_t *channel; - orte_rml_send_request_t *req; - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_close_channel channel num %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - channel_num)); - channel = orte_rml_base_get_channel (channel_num); - if (NULL == channel) - return ORTE_ERR_BAD_PARAM; - /* process the request in an event to be safe */ - req = OBJ_NEW(orte_rml_send_request_t); - req->post.close_channel.channel = channel; - req->post.close_channel.cbfunc = cbfunc; - req->post.close_channel.cbdata = cbdata; - /* setup the event for the open callback */ - opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, orte_rml_base_close_channel, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); - return ORTE_SUCCESS; -} diff --git a/orte/mca/rml/rml.h b/orte/mca/rml/rml.h index 7c9abe8df9..ca90599a04 100644 --- a/orte/mca/rml/rml.h +++ b/orte/mca/rml/rml.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -57,7 +57,8 @@ BEGIN_C_DECLS struct opal_buffer_t; -struct orte_rml_module_t; +struct orte_rml_base_module_t; +struct orte_rml_API_module_t; typedef struct { opal_object_t super; orte_process_name_t name; @@ -109,8 +110,7 @@ ORTE_DECLSPEC void orte_rml_close_channel_recv_callback(int status, * @retval NULL An error occurred and initialization did not occur * @retval non-NULL The module was successfully initialized */ -typedef struct orte_rml_module_t* (*orte_rml_component_init_fn_t)(int *priority); - +typedef struct orte_rml_base_module_t* (*orte_rml_component_init_fn_t)(int *priority); /** * RML component interface @@ -235,14 +235,8 @@ typedef int (*orte_rml_module_enable_comm_fn_t)(void); * all resources associated with the module. After the finalize * function is called, all interface functions (and the module * structure itself) are not available for use. - * - * @note Whether or not the finalize function returns successfully, - * the module should not be used once this function is called. - * - * @retval ORTE_SUCCESS Success - * @retval ORTE_ERROR An unspecified error occurred */ -typedef int (*orte_rml_module_finalize_fn_t)(void); +typedef void (*orte_rml_module_finalize_fn_t)(void); /** @@ -595,12 +589,8 @@ typedef int (*orte_rml_module_close_channel_fn_t)( orte_rml_channel_num_t channe /** * RML module interface - * - * Module interface to the RML communication system. A global - * instance of this module, orte_rml, provices an interface into the - * active RML interface. */ -struct orte_rml_module_t { +struct orte_rml_base_module_t { /** Enable communication once a process name has been assigned */ orte_rml_module_enable_comm_fn_t enable_comm; /** Shutdown the communication system and clean up resources */ @@ -638,24 +628,13 @@ struct orte_rml_module_t { /** Purge information */ orte_rml_module_purge_fn_t purge; - - /** Open a qos messaging channel to a peer*/ - orte_rml_module_open_channel_fn_t open_channel; - - /** send a non blocking iovec message over a channel */ - orte_rml_module_send_channel_nb_fn_t send_channel_nb; - - /** send a non blocking buffer message over a channel */ - orte_rml_module_send_buffer_channel_nb_fn_t send_buffer_channel_nb; - - /** close a qos messaging channel */ - orte_rml_module_close_channel_fn_t close_channel; }; -/** Convienence typedef */ -typedef struct orte_rml_module_t orte_rml_module_t; +/** Convenience typedef */ +typedef struct orte_rml_base_module_t orte_rml_base_module_t; + /** Interface for RML communication */ -ORTE_DECLSPEC extern orte_rml_module_t orte_rml; +ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml; /* ******************************************************************** */