1
1

A number of improvements / changes to the RML/OOB layers:

* General TCP cleanup for OPAL / ORTE
  * Simplifying the OOB by moving much of the logic into the RML
  * Allowing the OOB RML component to do routing of messages
  * Adding a component framework for handling routing tables
  * Moving the xcast functionality from the OOB base to its own framework

Includes merge from tmp/bwb-oob-rml-merge revisions:

    r15506, r15507, r15508, r15510, r15511, r15512, r15513

This commit was SVN r15528.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r15506
  r15507
  r15508
  r15510
  r15511
  r15512
  r15513
Этот коммит содержится в:
Brian Barrett 2007-07-20 01:34:02 +00:00
родитель 2d17dd9516
Коммит 39a6057fc6
133 изменённых файлов: 5055 добавлений и 4461 удалений

Просмотреть файл

@ -657,26 +657,21 @@ ompi_show_title "Type tests"
# Size of pid_t
AC_CHECK_SIZEOF(pid_t)
AC_CHECK_TYPES([socklen_t], [], [], [AC_INCLUDES_DEFAULT
#include <sys/socket.h>])
AC_CHECK_TYPES([struct sockaddr_in], [], [], [AC_INCLUDES_DEFAULT
AC_CHECK_TYPES([socklen_t, struct sockaddr_in, struct sockaddr_in6,
struct sockaddr_storage],
[], [], [AC_INCLUDES_DEFAULT
#if HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif])
# Do we have IPv6 support?
AC_CHECK_TYPES([struct sockaddr_in6], [], [], [AC_INCLUDES_DEFAULT
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif])
AC_CHECK_TYPES([struct sockaddr_storage], [], [], [AC_INCLUDES_DEFAULT
#include <sys/socket.h>
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif])
AC_CHECK_DECLS([AF_UNSPEC, PF_UNSPEC], [], [], [AC_INCLUDES_DEFAULT
AC_CHECK_DECLS([AF_UNSPEC, PF_UNSPEC, AF_INET6, PF_INET6],
[], [], [AC_INCLUDES_DEFAULT
#if HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif])
@ -692,14 +687,11 @@ AC_DEFINE_UNQUOTED(OMPI_HAVE_SA_RESTART, $VALUE,
[Whether we have SA_RESTART in <signal.h> or not])
AC_MSG_RESULT([$MSG])
# sa_len in struct sockaddr
AC_MSG_CHECKING([for sa_len in struct sockaddr])
AC_TRY_COMPILE([#include <sys/types.h>
#include <sys/socket.h>], [struct sockaddr s; s.sa_len;],
[MSG=yes VALUE=1], [MSG=no VALUE=0])
AC_DEFINE_UNQUOTED(OMPI_HAVE_SA_LEN, $VALUE,
[Whether we have the sa_len struct in <sys/socket.h> or not])
AC_MSG_RESULT([$MSG])
AC_CHECK_MEMBERS([struct sockaddr.sa_len], [], [], [
#include <sys/types.h>
#if HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif])
AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [
#include <sys/types.h>

Просмотреть файл

@ -759,10 +759,10 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf,
if ( send_first ) {
rc = orte_rml.send_buffer(remote_leader, sbuf, 0, 0);
rc = orte_rml.recv_buffer(remote_leader, rbuf, 0);
rc = orte_rml.recv_buffer(remote_leader, rbuf, 0, 0);
}
else {
rc = orte_rml.recv_buffer(remote_leader, rbuf, 0);
rc = orte_rml.recv_buffer(remote_leader, rbuf, 0, 0);
rc = orte_rml.send_buffer(remote_leader, sbuf, 0, 0);
}

Просмотреть файл

@ -130,9 +130,9 @@ int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root,
/* Exchange the number and the list of processes in the groups */
if ( send_first ) {
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
rc = orte_rml.recv_buffer(rport, nrbuf, tag);
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
} else {
rc = orte_rml.recv_buffer(rport, nrbuf, tag);
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
}
@ -320,7 +320,7 @@ int ompi_comm_get_rport(orte_process_name_t *port, int send_first,
if (NULL == rbuf) {
return ORTE_ERROR;
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag))) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(rbuf);
return rc;

Просмотреть файл

@ -4033,7 +4033,7 @@ static int recv_bookmarks(int peer_idx)
goto cleanup;
}
if ( 0 > (ret = orte_rml.recv_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG) ) ) {
if ( 0 > (ret = orte_rml.recv_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) , 0) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_bookmarks: Failed to receive bookmark from peer [%lu,%lu,%lu]: Return %d\n",
ORTE_NAME_ARGS(&peer_name),
@ -4268,7 +4268,7 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
* Recv the ACK msg
*/
if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer,
OMPI_CRCP_COORD_BOOKMARK_TAG) ) ) {
OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: do_send_msg_detail: [%lu,%lu,%lu] --> [%lu,%lu,%lu] Failed to receive ACK buffer from peer. Return %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
@ -4419,7 +4419,7 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref,
/*
* Recv the msg
*/
if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG) ) ) {
if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: do_recv_msg_detail: [%lu,%lu,%lu] <-- [%lu,%lu,%lu] Failed to receive buffer from peer. Return %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
@ -5045,7 +5045,7 @@ static int coord_basic_barrier_recv(int peer_idx)
goto cleanup;
}
if ( 0 > (ret = orte_rml.recv_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG+1) ) ) {
if ( 0 > (ret = orte_rml.recv_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG+1, 0) ) ) {
opal_output(mca_crcp_coord_component.super.output_handle,
"crcp:coord: recv_bookmarks: Failed to receive bookmark from peer [%lu,%lu,%lu]: Return %d\n",
ORTE_NAME_ARGS(&peer_name),

Просмотреть файл

@ -37,6 +37,7 @@
#include "pml_ob1_rdmafrag.h"
#include "ompi/mca/bml/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "ompi/runtime/ompi_cr.h"
#include "ompi/runtime/ompi_module_exchange.h"
@ -541,7 +542,7 @@ int mca_pml_ob1_ft_event( int state )
return ret;
}
if (ORTE_SUCCESS != (ret = orte_rml.xcast_gate(orte_gpr.deliver_notify_msg))) {
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast_gate(orte_gpr.deliver_notify_msg))) {
opal_output(0,
"pml:ob1: ft_event(Restart): Stage Gate 1 Failed %d",
ret);
@ -565,7 +566,7 @@ int mca_pml_ob1_ft_event( int state )
return ret;
}
if (ORTE_SUCCESS != (ret = orte_rml.xcast_gate(orte_gpr.deliver_notify_msg))) {
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast_gate(orte_gpr.deliver_notify_msg))) {
opal_output(0,"pml:ob1: ft_event(Restart): Stage Gate 1 Failed %d",
ret);
return ret;

Просмотреть файл

@ -26,7 +26,6 @@
#include "opal/util/show_help.h"
#include "orte/util/sys_info.h"
#include "orte/dss/dss.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"

Просмотреть файл

@ -51,6 +51,7 @@
#include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/runtime/runtime.h"
#include "mpi.h"
@ -157,7 +158,7 @@ int ompi_mpi_finalize(void)
/*
* Wait for everyone to get here
*/
if (ORTE_SUCCESS != (ret = orte_rml.xcast_gate(orte_gpr.deliver_notify_msg))) {
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast_gate(orte_gpr.deliver_notify_msg))) {
ORTE_ERROR_LOG(ret);
return ret;
}
@ -311,7 +312,7 @@ int ompi_mpi_finalize(void)
* the RTE while the smr is trying to do the update - which causes
* an ugly race condition
*/
if (ORTE_SUCCESS != (ret = orte_rml.xcast_gate(orte_gpr.deliver_notify_msg))) {
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast_gate(orte_gpr.deliver_notify_msg))) {
ORTE_ERROR_LOG(ret);
return ret;
}

Просмотреть файл

@ -43,8 +43,6 @@
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/runtime/runtime.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/gpr/gpr.h"
@ -52,6 +50,7 @@
#include "orte/mca/schema/schema.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/runtime/params.h"
#include "ompi/constants.h"
@ -561,7 +560,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
/* FIRST BARRIER - WAIT FOR XCAST STG1 MESSAGE TO ARRIVE */
if (ORTE_SUCCESS != (ret = orte_rml.xcast_gate(orte_gpr.deliver_notify_msg))) {
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast_gate(orte_gpr.deliver_notify_msg))) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n";
goto error;
@ -669,7 +668,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Second barrier -- wait for XCAST STG2 MESSAGE to arrive */
if (ORTE_SUCCESS != (ret = orte_rml.xcast_gate(orte_gpr.deliver_notify_msg))) {
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast_gate(orte_gpr.deliver_notify_msg))) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n";
goto error;

Просмотреть файл

@ -257,7 +257,7 @@ void ompi_info::open_components()
component_map["rmgr"] = &orte_rmgr_base.rmgr_components;
orte_rml_base_open();
component_map["rml"] = &orte_rml_base.rml_components;
component_map["rml"] = &orte_rml_base_components;
orte_pls_base_open();
component_map["pls"] = &orte_pls_base.available_components;

Просмотреть файл

@ -487,11 +487,27 @@ static inline uint16_t ntohs(uint16_t netvar) { return netvar; }
#define sockaddr_storage sockaddr
#define ss_family sa_family
#endif
/* Compatibility structure so that we don't have to have as many
#if checks in the code base */
#if !defined(HAVE_STRUCT_SOCKADDR_IN6) && defined(HAVE_STRUCT_SOCKADDR_IN)
#define sockaddr_in6 sockaddr_in
#define sin6_len sin_len
#define sin6_family sin_family
#define sin6_port sin_port
#endif
#if !HAVE_DECL_AF_UNSPEC
#define AF_UNSPEC AF_INET
#define AF_UNSPEC 0
#endif
#if !HAVE_DECL_PF_UNSPEC
#define PF_UNSPEC PF_INET
#define PF_UNSPEC 0
#endif
#if !HAVE_DECL_AF_INET6
#define AF_INET6 AF_UNSPEC
#endif
#if !HAVE_DECL_PF_INET6
#define PF_INET6 PF_UNSPEC
#endif
#if defined(__APPLE__) && defined(HAVE_INTTYPES_H)

Просмотреть файл

@ -260,7 +260,7 @@ static int opal_ifinit(void)
OBJ_CONSTRUCT(&intf, opal_list_item_t);
/* compute offset for entries */
#if OMPI_HAVE_SA_LEN
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
length = sizeof(struct sockaddr);
if (ifr->ifr_addr.sa_len > length) {

Просмотреть файл

@ -29,7 +29,7 @@
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rml/rml.h"
/*

Просмотреть файл

@ -158,7 +158,7 @@ int orte_errmgr_orted_abort_procs_request(orte_process_name_t *procs, orte_std_c
}
/* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(orte_errmgr_orted_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
if (0 > orte_rml.recv_buffer(orte_errmgr_orted_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -162,7 +162,7 @@ int orte_errmgr_proxy_abort_procs_request(orte_process_name_t *procs, orte_std_c
}
/* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -238,7 +238,7 @@ int orte_errmgr_proxy_register_job(orte_jobid_t job)
}
/* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -277,7 +277,7 @@ int orte_filem_base_query_remote_path(char **remote_ref, orte_process_name_t *pe
/*
* Get the response
*/
if( 0 > (ret = orte_rml.recv_buffer(peer, loc_buffer, ORTE_RML_TAG_FILEM)) ) {
if( 0 > (ret = orte_rml.recv_buffer(peer, loc_buffer, ORTE_RML_TAG_FILEM, 0)) ) {
exit_status = ret;
goto cleanup;
}

Просмотреть файл

@ -86,7 +86,7 @@ int orte_gpr_proxy_arith(orte_gpr_addr_mode_t addr_mode,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -144,7 +144,7 @@ int orte_gpr_proxy_increment_value(orte_gpr_value_t *value)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -202,7 +202,7 @@ int orte_gpr_proxy_decrement_value(orte_gpr_value_t *value)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -79,7 +79,7 @@ int orte_gpr_proxy_cleanup_job(orte_jobid_t jobid)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -137,7 +137,7 @@ int orte_gpr_proxy_cleanup_proc(orte_process_name_t *proc)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -37,6 +37,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/oob/oob_types.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/errmgr/errmgr.h"
#include "gpr_proxy.h"
@ -234,7 +235,7 @@ orte_gpr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_
}
/* setup the replica location */
if(ORTE_SUCCESS != (ret = orte_rml.parse_uris(orte_process_info.gpr_replica_uri, &name, NULL))) {
if(ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.gpr_replica_uri, &name, NULL))) {
ORTE_ERROR_LOG(ret);
return NULL;
}

Просмотреть файл

@ -116,7 +116,7 @@ int orte_gpr_proxy_exec_compound_cmd(orte_buffer_t *buffer)
goto CLEANUP;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
rc = ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -80,7 +80,7 @@ int orte_gpr_proxy_delete_segment(char *segment)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -156,7 +156,7 @@ int orte_gpr_proxy_delete_entries(orte_gpr_addr_mode_t mode,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -233,7 +233,7 @@ int orte_gpr_proxy_index(char *segment, orte_std_cntr_t *cnt, char ***index)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -79,7 +79,7 @@ int orte_gpr_proxy_dump_all(void)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -140,7 +140,7 @@ int orte_gpr_proxy_dump_segments(char *segment)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -201,7 +201,7 @@ int orte_gpr_proxy_dump_triggers(orte_gpr_trigger_id_t start)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -262,7 +262,7 @@ int orte_gpr_proxy_dump_subscriptions(orte_gpr_subscription_id_t start)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -324,7 +324,7 @@ int orte_gpr_proxy_dump_a_trigger(char *name,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -387,7 +387,7 @@ int orte_gpr_proxy_dump_a_subscription(char *name,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -449,7 +449,7 @@ int orte_gpr_proxy_dump_callbacks(void)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -586,7 +586,7 @@ int orte_gpr_proxy_dump_segment_size(char *segment)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}

Просмотреть файл

@ -79,7 +79,7 @@ int orte_gpr_proxy_put(orte_std_cntr_t cnt, orte_gpr_value_t **values)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -150,7 +150,7 @@ int orte_gpr_proxy_get(orte_gpr_addr_mode_t mode,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
@ -216,7 +216,7 @@ int orte_gpr_proxy_get_conditional(orte_gpr_addr_mode_t mode,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}

Просмотреть файл

@ -135,7 +135,7 @@ orte_gpr_proxy_subscribe(orte_std_cntr_t num_subs,
goto subscribe_error;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
rc = ORTE_ERR_COMM_FAILURE;
@ -263,7 +263,7 @@ PROCESS:
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
@ -369,7 +369,7 @@ PROCESS:
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);

41
orte/mca/grpcomm/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# main library setup
noinst_LTLIBRARIES = libmca_grpcomm.la
libmca_grpcomm_la_SOURCES =
# header setup
nobase_orte_HEADERS =
# local files
headers = grpcomm.h
libmca_grpcomm_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
nobase_orte_HEADERS += $(headers)
ortedir = $(includedir)/openmpi/orte/mca/grpcomm
else
ortedir = $(includedir)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

25
orte/mca/grpcomm/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,25 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
headers += \
base/base.h
libmca_grpcomm_la_SOURCES += \
base/grpcomm_base_close.c \
base/grpcomm_base_select.c \
base/grpcomm_base_open.c

67
orte/mca/grpcomm/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,67 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef MCA_GRPCOMM_BASE_H
#define MCA_GRPCOMM_BASE_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/class/opal_list.h"
#include "opal/mca/mca.h"
#include "orte/mca/grpcomm/grpcomm.h"
/*
* Global functions for MCA overall collective open and close
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* function definitions
*/
ORTE_DECLSPEC int orte_grpcomm_base_open(void);
ORTE_DECLSPEC int orte_grpcomm_base_select(void);
ORTE_DECLSPEC int orte_grpcomm_base_close(void);
/*
* globals that might be needed
*/
ORTE_DECLSPEC extern int orte_grpcomm_base_output;
ORTE_DECLSPEC extern bool mca_grpcomm_base_selected;
ORTE_DECLSPEC extern opal_list_t mca_grpcomm_base_components_available;
ORTE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_base_selected_component;
/*
* external API functions will be documented in the mca/grpcomm/grpcomm.h file
*/
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -20,20 +20,26 @@
#include <stdio.h>
#include "orte/orte_constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/grpcomm/base/base.h"
int orte_rml_base_close(void)
int orte_grpcomm_base_close(void)
{
/* shutdown any remaining opened components */
if (! opal_list_is_empty(&orte_rml_base.rml_components)) {
mca_base_components_close(orte_rml_base.rml_output,
&orte_rml_base.rml_components, NULL);
}
OBJ_DESTRUCT(&orte_rml_base.rml_components);
return ORTE_SUCCESS;
}
/* If we have a selected component and module, then finalize it */
if (mca_grpcomm_base_selected) {
mca_grpcomm_base_selected_component.grpcomm_finalize();
}
/* Close all remaining available components (may be one if this is a
OpenRTE program, or [possibly] multiple if this is ompi_info) */
mca_base_components_close(orte_grpcomm_base_output,
&mca_grpcomm_base_components_available, NULL);
/* All done */
return ORTE_SUCCESS;
}

80
orte/mca/grpcomm/base/grpcomm_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,80 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
#include "orte/mca/grpcomm/base/base.h"
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "orte/mca/grpcomm/base/static-components.h"
/*
* Global variables
*/
int orte_grpcomm_base_output = -1;
bool mca_grpcomm_base_selected;
orte_grpcomm_base_module_t orte_grpcomm;
opal_list_t mca_grpcomm_base_components_available;
orte_grpcomm_base_component_t mca_grpcomm_base_selected_component;
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
int orte_grpcomm_base_open(void)
{
int value;
/* Debugging / verbose output */
mca_base_param_reg_int_name("grpcomm_base", "verbose",
"Verbosity level for the grpcomm framework",
false, false, 0, &value);
if (value != 0) {
orte_grpcomm_base_output = opal_output_open(NULL);
} else {
orte_grpcomm_base_output = -1;
}
/* Open up all available components */
if (ORTE_SUCCESS !=
mca_base_components_open("grpcomm", orte_grpcomm_base_output,
mca_grpcomm_base_static_components,
&mca_grpcomm_base_components_available, true)) {
return ORTE_ERROR;
}
/* All done */
return ORTE_SUCCESS;
}

101
orte/mca/grpcomm/base/grpcomm_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,101 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/grpcomm/base/base.h"
/**
* Function for selecting one component from all those that are
* available.
*/
int orte_grpcomm_base_select(void)
{
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
orte_grpcomm_base_component_t *component, *best_component = NULL;
orte_grpcomm_base_module_t *module, *best_module = NULL;
int priority, best_priority = -1;
/* Iterate through all the available components */
for (item = opal_list_get_first(&mca_grpcomm_base_components_available);
item != opal_list_get_end(&mca_grpcomm_base_components_available);
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t *) item;
component = (orte_grpcomm_base_component_t *) cli->cli_component;
/* Call the component's init function and see if it wants to be
selected */
module = component->grpcomm_init(&priority);
/* If we got a non-NULL module back, then the component wants to
be selected. So save its multi/hidden values and save the
module with the highest priority */
if (NULL != module) {
/* If this is the best one, save it */
if (priority > best_priority) {
/* If there was a previous best one, finalize */
if (NULL != best_component) {
best_component->grpcomm_finalize();
}
/* Save the new best one */
best_module = module;
best_component = component;
/* update the best priority */
best_priority = priority;
}
/* If it's not the best one, finalize it */
else {
component->grpcomm_finalize();
}
}
}
/* If we didn't find one to select, barf */
if (NULL == best_component) {
return ORTE_ERROR;
}
/* We have happiness -- save the component and module for later
usage */
orte_grpcomm = *best_module;
mca_grpcomm_base_selected_component = *best_component;
mca_grpcomm_base_selected = true;
/* all done */
return ORTE_SUCCESS;
}

46
orte/mca/grpcomm/basic/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
grpcomm_basic.h \
grpcomm_basic_module.c \
grpcomm_basic_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_grpcomm_basic_DSO
component_noinst =
component_install = mca_grpcomm_basic.la
else
component_noinst = libmca_grpcomm_basic.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_grpcomm_basic_la_SOURCES = $(sources)
mca_grpcomm_basic_la_LDFLAGS = -module -avoid-version
mca_grpcomm_basic_la_LIBADD = \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_grpcomm_basic_la_SOURCES =$(sources)
libmca_grpcomm_basic_la_LDFLAGS = -module -avoid-version

24
orte/mca/grpcomm/basic/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

82
orte/mca/grpcomm/basic/grpcomm_basic.h Обычный файл
Просмотреть файл

@ -0,0 +1,82 @@
/* -*- C -*-
*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#ifndef GRPCOMM_BASIC_H
#define GRPCOMM_BASIC_H
#include "orte_config.h"
#include "orte/orte_types.h"
#include "orte/orte_constants.h"
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"
#include "opal/class/opal_object.h"
#include "orte/mca/grpcomm/grpcomm.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* globals
*/
/*
* globals needed within component
*/
typedef struct {
int output;
int xcast_linear_xover;
int xcast_binomial_xover;
orte_std_cntr_t num_active;
opal_mutex_t mutex;
opal_condition_t cond;
} orte_grpcomm_basic_globals_t;
extern orte_grpcomm_basic_globals_t orte_grpcomm_basic;
/*
* Module open / close
*/
int orte_grpcomm_basic_open(void);
int orte_grpcomm_basic_close(void);
orte_grpcomm_base_module_t* orte_grpcomm_basic_init(int *priority);
/*
* Startup / Shutdown
*/
int orte_grpcomm_basic_module_init(void);
int orte_grpcomm_basic_finalize(void);
/*
* xcast interfaces
*/
void orte_ns_replica_recv(int status, orte_process_name_t* sender,
orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata);
ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t orte_grpcomm_basic_component;
extern orte_grpcomm_base_module_t orte_grpcomm_basic_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -0,0 +1,153 @@
/* -*- C -*-
*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* The Open MPI Name Server
*
* The Open MPI Name Server provides unique name ranges for processes
* within the universe. Each universe will have one name server
* running within the seed daemon. This is done to prevent the
* inadvertent duplication of names.
*/
/*
* includes
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/threads/mutex.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "grpcomm_basic.h"
#define XCAST_LINEAR_XOVER_DEFAULT 10
#define XCAST_BINOMIAL_XOVER_DEFAULT INT_MAX
/*
* Struct of function pointers that need to be initialized
*/
orte_grpcomm_base_component_t mca_grpcomm_basic_component = {
{
ORTE_GRPCOMM_BASE_VERSION_2_0_0,
"basic", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
orte_grpcomm_basic_open, /* module open */
orte_grpcomm_basic_close /* module close */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
orte_grpcomm_basic_init, /* component init */
orte_grpcomm_basic_finalize /* component shutdown */
};
/*
* instantiate globals needed within basic component
*/
orte_grpcomm_basic_globals_t orte_grpcomm_basic;
/* Open the component */
int orte_grpcomm_basic_open(void)
{
int value;
char *mode;
mca_base_component_t *c = &mca_grpcomm_basic_component.grpcomm_version;
/* initialize globals */
OBJ_CONSTRUCT(&orte_grpcomm_basic.mutex, opal_mutex_t);
OBJ_CONSTRUCT(&orte_grpcomm_basic.cond, opal_condition_t);
orte_grpcomm_basic.num_active = 0;
/* register parameters */
mca_base_param_reg_int(c, "verbose",
"Verbosity level for the grpcomm basic component",
false, false, 0, &value);
if (value != 0) {
orte_grpcomm_basic.output = opal_output_open(NULL);
} else {
orte_grpcomm_basic.output = -1;
}
mca_base_param_reg_int(c, "xcast_linear_xover",
"Number of daemons where use of linear xcast mode is to begin",
false, false, XCAST_LINEAR_XOVER_DEFAULT, &orte_grpcomm_basic.xcast_linear_xover);
mca_base_param_reg_int(c, "xcast_binomial_xover",
"Number of daemons where use of binomial xcast mode is to begin",
false, false, XCAST_BINOMIAL_XOVER_DEFAULT, &orte_grpcomm_basic.xcast_binomial_xover);
mca_base_param_reg_string(c, "xcast_mode",
"Select xcast mode (\"linear\" | \"binomial\" | \"direct\")",
false, false, "none", &mode);
if (0 == strcmp(mode, "binomial")) {
orte_grpcomm_basic.xcast_binomial_xover = 0;
orte_grpcomm_basic.xcast_linear_xover = 0;
} else if (0 == strcmp(mode, "linear")) {
orte_grpcomm_basic.xcast_linear_xover = 0;
orte_grpcomm_basic.xcast_binomial_xover = INT_MAX;
} else if (0 == strcmp(mode, "direct")) {
orte_grpcomm_basic.xcast_binomial_xover = INT_MAX;
orte_grpcomm_basic.xcast_linear_xover = INT_MAX;
} else if (0 != strcmp(mode, "none")) {
opal_output(0, "grpcomm_basic_xcast_mode: unknown option %s - using defaults", mode);
}
return ORTE_SUCCESS;
}
/* Close the component */
int orte_grpcomm_basic_close(void)
{
OBJ_DESTRUCT(&orte_grpcomm_basic.mutex);
OBJ_DESTRUCT(&orte_grpcomm_basic.cond);
return ORTE_SUCCESS;
}
orte_grpcomm_base_module_t* orte_grpcomm_basic_init(int *priority)
{
/* we are the default, so set a low priority so we can be overridden */
*priority = 1;
return &orte_grpcomm_basic_module;
}
/*
* finalize routine
*/
int orte_grpcomm_basic_finalize(void)
{
return ORTE_SUCCESS;
}

Просмотреть файл

@ -40,40 +40,56 @@
#include "orte/mca/rml/rml.h"
#include "orte/runtime/params.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "grpcomm_basic.h"
/* API functions */
static int xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
static int xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
static int xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc);
orte_grpcomm_base_module_t orte_grpcomm_basic_module = {
xcast,
xcast_nb,
xcast_gate
};
/* Local functions */
static int mca_oob_xcast_binomial_tree(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
static int xcast_binomial_tree(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
static int mca_oob_xcast_linear(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
static int xcast_linear(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
static int mca_oob_xcast_direct(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
static int xcast_direct(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
/* define a callback function for use by the blocking version
* of xcast so we can "hold" the caller here until all non-blocking
* sends have completed
*/
static void mca_oob_xcast_send_cb(int status,
orte_process_name_t* peer,
orte_buffer_t* buffer,
int tag,
void* cbdata)
static void xcast_send_cb(int status,
orte_process_name_t* peer,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
void* cbdata)
{
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_oob_xcast_num_active--;
if (orte_oob_xcast_num_active == 0) {
opal_condition_signal(&orte_oob_xcast_cond);
orte_grpcomm_basic.num_active--;
if (orte_grpcomm_basic.num_active <= 0) {
orte_grpcomm_basic.num_active = 0; /* just to be safe */
opal_condition_signal(&orte_grpcomm_basic.cond);
}
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
return;
}
@ -84,14 +100,16 @@ static void mca_oob_xcast_send_cb(int status,
*/
/* Non-blocking version */
int mca_oob_xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
static int xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
int rc = ORTE_SUCCESS;
struct timeval start, stop;
orte_vpid_t num_daemons;
opal_output(orte_grpcomm_basic.output, "oob_xcast_nb: sent to job %ld tag %ld", (long)job, (long)tag);
/* if there is no message to send, then just return ok */
if (NULL == buffer) {
return ORTE_SUCCESS;
@ -109,8 +127,9 @@ int mca_oob_xcast_nb(orte_jobid_t job,
return rc;
}
opal_output(mca_oob_base_output, "oob_xcast_nb: num_daemons %ld linear xover: %ld binomial xover: %ld",
(long)num_daemons, (long)orte_oob_xcast_linear_xover, (long)orte_oob_xcast_binomial_xover);
opal_output(orte_grpcomm_basic.output, "oob_xcast_nb: num_daemons %ld linear xover: %ld binomial xover: %ld",
(long)num_daemons, (long)orte_grpcomm_basic.xcast_linear_xover,
(long)orte_grpcomm_basic.xcast_binomial_xover);
if (num_daemons < 2) {
/* if there is only one daemon in the system, then we must
@ -126,7 +145,7 @@ int mca_oob_xcast_nb(orte_jobid_t job,
* use-case behavior MUST always be retained or else
* singletons and HNP startup will fail!
*/
rc = mca_oob_xcast_direct(job, buffer, tag);
rc = xcast_direct(job, buffer, tag);
goto DONE;
}
@ -136,12 +155,12 @@ int mca_oob_xcast_nb(orte_jobid_t job,
* they wish via MCA params
*/
if (num_daemons < orte_oob_xcast_linear_xover) {
rc = mca_oob_xcast_direct(job, buffer, tag);
} else if (num_daemons < orte_oob_xcast_binomial_xover) {
rc = mca_oob_xcast_linear(job, buffer, tag);
if (num_daemons < orte_grpcomm_basic.xcast_linear_xover) {
rc = xcast_direct(job, buffer, tag);
} else if (num_daemons < orte_grpcomm_basic.xcast_binomial_xover) {
rc = xcast_linear(job, buffer, tag);
} else {
rc = mca_oob_xcast_binomial_tree(job, buffer, tag);
rc = xcast_binomial_tree(job, buffer, tag);
}
DONE:
@ -156,14 +175,16 @@ DONE:
}
/* Blocking version */
int mca_oob_xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
static int xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
int rc = ORTE_SUCCESS;
struct timeval start, stop;
orte_vpid_t num_daemons;
opal_output(orte_grpcomm_basic.output, "oob_xcast: sent to job %ld tag %ld", (long)job, (long)tag);
/* if there is no message to send, then just return ok */
if (NULL == buffer) {
return ORTE_SUCCESS;
@ -181,8 +202,9 @@ int mca_oob_xcast(orte_jobid_t job,
return rc;
}
opal_output(mca_oob_base_output, "oob_xcast: num_daemons %ld linear xover: %ld binomial xover: %ld",
(long)num_daemons, (long)orte_oob_xcast_linear_xover, (long)orte_oob_xcast_binomial_xover);
opal_output(orte_grpcomm_basic.output, "oob_xcast: num_daemons %ld linear xover: %ld binomial xover: %ld",
(long)num_daemons, (long)orte_grpcomm_basic.xcast_linear_xover,
(long)orte_grpcomm_basic.xcast_binomial_xover);
if (num_daemons < 2) {
/* if there is only one daemon in the system, then we must
@ -198,7 +220,7 @@ int mca_oob_xcast(orte_jobid_t job,
* use-case behavior MUST always be retained or else
* singletons and HNP startup will fail!
*/
rc = mca_oob_xcast_direct(job, buffer, tag);
rc = xcast_direct(job, buffer, tag);
goto DONE;
}
@ -208,21 +230,21 @@ int mca_oob_xcast(orte_jobid_t job,
* they wish via MCA params
*/
if (num_daemons < orte_oob_xcast_linear_xover) {
rc = mca_oob_xcast_direct(job, buffer, tag);
} else if (num_daemons < orte_oob_xcast_binomial_xover) {
rc = mca_oob_xcast_linear(job, buffer, tag);
if (num_daemons < orte_grpcomm_basic.xcast_linear_xover) {
rc = xcast_direct(job, buffer, tag);
} else if (num_daemons < orte_grpcomm_basic.xcast_binomial_xover) {
rc = xcast_linear(job, buffer, tag);
} else {
rc = mca_oob_xcast_binomial_tree(job, buffer, tag);
rc = xcast_binomial_tree(job, buffer, tag);
}
DONE:
/* now go to sleep until woken up */
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
if (orte_oob_xcast_num_active > 0) {
opal_condition_wait(&orte_oob_xcast_cond, &orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
if (orte_grpcomm_basic.num_active > 0) {
opal_condition_wait(&orte_grpcomm_basic.cond, &orte_grpcomm_basic.mutex);
}
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
if (orte_timing) {
gettimeofday(&stop, NULL);
@ -233,9 +255,9 @@ DONE:
return rc;
}
static int mca_oob_xcast_binomial_tree(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
static int xcast_binomial_tree(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
orte_daemon_cmd_flag_t command, mode;
orte_std_cntr_t i;
@ -245,9 +267,8 @@ static int mca_oob_xcast_binomial_tree(orte_jobid_t job,
orte_buffer_t *buf;
orte_vpid_t num_daemons;
int bitmap;
orte_std_cntr_t binomial_xcast_num_active;
opal_output(mca_oob_base_output, "oob_xcast_mode: binomial");
opal_output(orte_grpcomm_basic.output, "oob_xcast_mode: binomial");
/* this is the HNP end, so it starts the procedure. Since the HNP is always the
* vpid=0 at this time, we take advantage of that fact to figure out who we
@ -335,29 +356,23 @@ static int mca_oob_xcast_binomial_tree(orte_jobid_t job,
* we would get the chance to increment the num_active. This causes us
* to not correctly wakeup and reset the xcast_in_progress flag
*/
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
/* compute the number of sends we are going to do - it would be nice
* to have a simple algo to do this, but for now just brute force
* is fine
*/
binomial_xcast_num_active = 0;
for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) {
peer = rank | mask;
if (peer < size) {
++binomial_xcast_num_active;
++orte_grpcomm_basic.num_active;
}
}
if (binomial_xcast_num_active == 0) {
/* if we aren't going to send anything at all, we
* need to reset the xcast_in_progress flag so
* we don't block the entire system and return
*/
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
if (orte_grpcomm_basic.num_active == 0) {
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
rc = ORTE_SUCCESS;
goto CLEANUP;
}
orte_oob_xcast_num_active += binomial_xcast_num_active;
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
target.cellid = ORTE_PROC_MY_NAME->cellid;
target.jobid = 0;
@ -365,21 +380,21 @@ static int mca_oob_xcast_binomial_tree(orte_jobid_t job,
peer = rank | mask;
if (peer < size) {
target.vpid = (orte_vpid_t)peer;
opal_output(mca_oob_base_output, "[%ld,%ld,%ld] xcast to [%ld,%ld,%ld]", ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), ORTE_NAME_ARGS(&target));
if (0 > (rc = mca_oob_send_packed_nb(&target, buf, ORTE_RML_TAG_ORTED_ROUTED,
0, mca_oob_xcast_send_cb, NULL))) {
opal_output(orte_grpcomm_basic.output, "[%ld,%ld,%ld] xcast to [%ld,%ld,%ld]", ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), ORTE_NAME_ARGS(&target));
if (0 > (rc = orte_rml.send_buffer_nb(&target, buf, ORTE_RML_TAG_ORTED_ROUTED,
0, xcast_send_cb, NULL))) {
if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
rc = ORTE_ERR_COMM_FAILURE;
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
orte_oob_xcast_num_active -= (num_daemons-i);
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active -= (num_daemons-i);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
goto CLEANUP;
}
/* decrement the number we are waiting to see */
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
orte_oob_xcast_num_active--;
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active--;
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
}
}
}
@ -390,18 +405,17 @@ CLEANUP:
return rc;
}
static int mca_oob_xcast_linear(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
static int xcast_linear(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
int rc;
orte_buffer_t *buf;
orte_daemon_cmd_flag_t command, mode=ORTE_DAEMON_ROUTE_NONE;
orte_vpid_t i, range;
orte_process_name_t dummy;
orte_std_cntr_t linear_xcast_num_active;
opal_output(mca_oob_base_output, "oob_xcast_mode: linear");
opal_output(orte_grpcomm_basic.output, "oob_xcast_mode: linear");
/* since we have to pack some additional info into the buffer to be
* sent to the daemons, we create a new buffer into which we will
@ -464,8 +478,8 @@ static int mca_oob_xcast_linear(orte_jobid_t job,
* we would get the chance to increment the num_active. This causes us
* to not correctly wakeup and reset the xcast_in_progress flag
*/
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
linear_xcast_num_active = range;
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active += range;
if (orte_process_info.daemon ||
orte_process_info.seed ||
orte_process_info.singleton) {
@ -473,19 +487,14 @@ static int mca_oob_xcast_linear(orte_jobid_t job,
* so we need to adjust the number of sends
* we are expecting to complete
*/
linear_xcast_num_active--;
if (linear_xcast_num_active <= 0) {
/* if we aren't going to send anything at all, we
* need to reset the xcast_in_progress flag so
* we don't block the entire system and return
*/
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
orte_grpcomm_basic.num_active--;
if (orte_grpcomm_basic.num_active <= 0) {
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
rc = ORTE_SUCCESS;
goto CLEANUP;
}
}
orte_oob_xcast_num_active += linear_xcast_num_active;
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
/* send the message to each daemon as fast as we can */
dummy.cellid = ORTE_PROC_MY_NAME->cellid;
@ -493,20 +502,20 @@ static int mca_oob_xcast_linear(orte_jobid_t job,
for (i=0; i < range; i++) {
if (ORTE_PROC_MY_NAME->vpid != i) { /* don't send to myself */
dummy.vpid = i;
if (0 > (rc = mca_oob_send_packed_nb(&dummy, buf, ORTE_RML_TAG_ORTED_ROUTED,
0, mca_oob_xcast_send_cb, NULL))) {
if (0 > (rc = orte_rml.send_buffer_nb(&dummy, buf, ORTE_RML_TAG_ORTED_ROUTED,
0, xcast_send_cb, NULL))) {
if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
rc = ORTE_ERR_COMM_FAILURE;
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
orte_oob_xcast_num_active -= (range-i);
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active -= (range-i);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
goto CLEANUP;
}
/* decrement the number we are waiting to see */
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
orte_oob_xcast_num_active--;
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active--;
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
}
}
}
@ -518,9 +527,9 @@ CLEANUP:
return rc;
}
static int mca_oob_xcast_direct(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
static int xcast_direct(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
orte_std_cntr_t i;
int rc;
@ -529,7 +538,7 @@ static int mca_oob_xcast_direct(orte_jobid_t job,
opal_list_t attrs;
opal_list_item_t *item;
opal_output(mca_oob_base_output, "oob_xcast_mode: direct");
opal_output(orte_grpcomm_basic.output, "oob_xcast_mode: direct");
/* need to get the job peers so we know who to send the message to */
OBJ_CONSTRUCT(&attrs, opal_list_t);
@ -553,25 +562,26 @@ static int mca_oob_xcast_direct(orte_jobid_t job,
* we would get the chance to increment the num_active. This causes us
* to not correctly wakeup and reset the xcast_in_progress flag
*/
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
orte_oob_xcast_num_active += n;
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active += n;
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
opal_output(orte_grpcomm_basic.output, "oob_xcast_direct: num_active now %ld", (long)orte_grpcomm_basic.num_active);
for(i=0; i<n; i++) {
opal_output(mca_oob_base_output, "oob_xcast: sending to [%ld,%ld,%ld]", ORTE_NAME_ARGS(peers+i));
if (0 > (rc = mca_oob_send_packed_nb(peers+i, buffer, tag, 0, mca_oob_xcast_send_cb, NULL))) {
if (0 > (rc = orte_rml.send_buffer_nb(peers+i, buffer, tag, 0, xcast_send_cb, NULL))) {
if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
rc = ORTE_ERR_COMM_FAILURE;
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
orte_oob_xcast_num_active -= (n-i);
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active -= (n-i);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
goto CLEANUP;
}
/* decrement the number we are waiting to see */
OPAL_THREAD_LOCK(&orte_oob_xcast_mutex);
orte_oob_xcast_num_active--;
OPAL_THREAD_UNLOCK(&orte_oob_xcast_mutex);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active--;
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
}
}
rc = ORTE_SUCCESS;
@ -582,7 +592,7 @@ CLEANUP:
return rc;
}
int mca_oob_xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc)
static int xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc)
{
int rc;
orte_std_cntr_t i;
@ -590,7 +600,7 @@ int mca_oob_xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc)
orte_gpr_notify_message_t *mesg;
OBJ_CONSTRUCT(&rbuf, orte_buffer_t);
rc = mca_oob_recv_packed(ORTE_NAME_WILDCARD, &rbuf, ORTE_RML_TAG_XCAST_BARRIER);
rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &rbuf, ORTE_RML_TAG_XCAST_BARRIER, 0);
if(rc < 0) {
OBJ_DESTRUCT(&rbuf);
return rc;

128
orte/mca/grpcomm/grpcomm.h Обычный файл
Просмотреть файл

@ -0,0 +1,128 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* The OpenRTE Group Communications
*
* The OpenRTE Group Comm framework provides communication services that
* span entire jobs or collections of processes. It is not intended to be
* used for point-to-point communications (the RML does that), nor should
* it be viewed as a high-performance communication channel for large-scale
* data transfers.
*/
#ifndef MCA_GRPCOMM_H
#define MCA_GRPCOMM_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/mca/mca.h"
#include "orte/dss/dss_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/rml/rml_types.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Component functions - all MUST be provided!
*/
/* Send a message to all members of a job - blocking */
typedef int (*orte_grpcomm_base_module_xcast_fn_t)(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
/* Send a message to all members of a job - non-blocking */
typedef int (*orte_grpcomm_base_module_xcast_nb_fn_t)(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
/* Wait for receipt of an xcast message */
typedef int (*orte_grpcomm_base_module_xcast_gate_fn_t)(orte_gpr_trigger_cb_fn_t cbfunc);
/*
* Ver 2.0
*/
struct orte_grpcomm_base_module_2_0_0_t {
orte_grpcomm_base_module_xcast_fn_t xcast;
orte_grpcomm_base_module_xcast_nb_fn_t xcast_nb;
orte_grpcomm_base_module_xcast_gate_fn_t xcast_gate;
};
typedef struct orte_grpcomm_base_module_2_0_0_t orte_grpcomm_base_module_2_0_0_t;
typedef orte_grpcomm_base_module_2_0_0_t orte_grpcomm_base_module_t;
/*
* NS Component
*/
/**
* Initialize the selected component.
*/
typedef orte_grpcomm_base_module_t* (*orte_grpcomm_base_component_init_fn_t)(int *priority);
/**
* Finalize the selected module
*/
typedef int (*orte_grpcomm_base_component_finalize_fn_t)(void);
/*
* the standard component data structure
*/
struct orte_grpcomm_base_component_2_0_0_t {
mca_base_component_t grpcomm_version;
mca_base_component_data_1_0_0_t grpcomm_data;
orte_grpcomm_base_component_init_fn_t grpcomm_init;
orte_grpcomm_base_component_finalize_fn_t grpcomm_finalize;
};
typedef struct orte_grpcomm_base_component_2_0_0_t orte_grpcomm_base_component_2_0_0_t;
typedef orte_grpcomm_base_component_2_0_0_t orte_grpcomm_base_component_t;
/*
* Macro for use in components that are of type grpcomm v2.0.0
*/
#define ORTE_GRPCOMM_BASE_VERSION_2_0_0 \
/* grpcomm v2.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* grpcomm v2.0 */ \
"grpcomm", 2, 0, 0
/* Global structure for accessing name server functions
*/
ORTE_DECLSPEC extern orte_grpcomm_base_module_t orte_grpcomm; /* holds selected module's function pointers */
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,6 +1,5 @@
#include "orte_config.h"
#include "opal/util/output.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/iof/base/iof_base_header.h"
#include "iof_svc.h"
#include "iof_svc_proxy.h"

Просмотреть файл

@ -78,7 +78,7 @@ typedef uint8_t orte_ns_cmd_flag_t;
#define ORTE_NS_ASSIGN_OOB_TAG_CMD (int8_t) 11
#define ORTE_NS_GET_PEERS_CMD (int8_t) 12
#define ORTE_NS_DEFINE_DATA_TYPE_CMD (int8_t) 13
#define ORTE_NS_CREATE_MY_NAME_CMD (int8_t) 14
#define ORTE_NS_DUMP_CELLS_CMD (int8_t) 15
#define ORTE_NS_DUMP_JOBIDS_CMD (int8_t) 16
#define ORTE_NS_DUMP_TAGS_CMD (int8_t) 17

Просмотреть файл

@ -39,7 +39,7 @@
#include "orte/dss/dss.h"
#include "opal/mca/mca.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rml/rml.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"

Просмотреть файл

@ -100,7 +100,7 @@ int orte_ns_proxy_create_cellid(orte_cellid_t *cellid, char *site, char *resourc
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -181,7 +181,7 @@ int orte_ns_proxy_get_cell_info(orte_cellid_t cellid,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);
@ -280,7 +280,7 @@ int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnode
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -386,7 +386,7 @@ int orte_ns_proxy_get_node_info(char ***nodenames, orte_cellid_t cellid,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);

Просмотреть файл

@ -38,6 +38,7 @@
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "ns_proxy.h"
@ -222,7 +223,7 @@ mca_ns_base_module_t* orte_ns_proxy_init(int *priority)
*priority = 10;
/* define the proxy for us to use */
if(ORTE_SUCCESS != (ret = orte_rml.parse_uris(orte_process_info.ns_replica_uri, &name, NULL))) {
if(ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.ns_replica_uri, &name, NULL))) {
ORTE_ERROR_LOG(ret);
return NULL;
}

Просмотреть файл

@ -70,7 +70,7 @@ int orte_ns_proxy_dump_cells(void)
OBJ_DESTRUCT(&cmd);
OBJ_CONSTRUCT(&answer, orte_buffer_t);
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);
@ -130,7 +130,7 @@ int orte_ns_proxy_dump_jobs(void)
OBJ_DESTRUCT(&cmd);
OBJ_CONSTRUCT(&answer, orte_buffer_t);
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);
@ -193,7 +193,7 @@ int orte_ns_proxy_dump_tags(void)
OBJ_DESTRUCT(&cmd);
OBJ_CONSTRUCT(&answer, orte_buffer_t);
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);
@ -268,7 +268,7 @@ int orte_ns_proxy_dump_datatypes(void)
OBJ_DESTRUCT(&cmd);
OBJ_CONSTRUCT(&answer, orte_buffer_t);
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);

Просмотреть файл

@ -145,7 +145,7 @@ int orte_ns_proxy_get_peers(orte_process_name_t **procs,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);
@ -272,7 +272,7 @@ int orte_ns_proxy_assign_rml_tag(orte_rml_tag_t *tag,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);
@ -404,7 +404,7 @@ int orte_ns_proxy_define_data_type(const char *name,
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex);
@ -467,31 +467,13 @@ int orte_ns_proxy_define_data_type(const char *name,
*/
int orte_ns_proxy_create_my_name(void)
{
orte_buffer_t* cmd;
orte_ns_cmd_flag_t command;
int rc;
orte_process_name_t new_name;
int ret;
command = ORTE_NS_CREATE_MY_NAME_CMD;
cmd = OBJ_NEW(orte_buffer_t);
if (cmd == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
ret = orte_rml.get_new_name(&new_name);
if (ORTE_SUCCESS == ret) {
memcpy(ORTE_PROC_MY_NAME, &new_name, sizeof(orte_process_name_t));
}
if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_NS_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);
return ORTE_ERR_COMM_FAILURE;
}
OBJ_RELEASE(cmd);
return ORTE_SUCCESS;
return ret;
}

Просмотреть файл

@ -80,7 +80,7 @@ int orte_ns_proxy_create_jobid(orte_jobid_t *job, opal_list_t *attrs)
OBJ_RELEASE(answer);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -157,7 +157,7 @@ int orte_ns_proxy_get_job_descendants(orte_jobid_t **descendants, orte_std_cntr_
OBJ_RELEASE(answer);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -253,7 +253,7 @@ int orte_ns_proxy_get_job_children(orte_jobid_t **descendants, orte_std_cntr_t *
OBJ_RELEASE(answer);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -346,7 +346,7 @@ int orte_ns_proxy_get_root_job(orte_jobid_t *root_job, orte_jobid_t job)
OBJ_RELEASE(answer);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -419,7 +419,7 @@ int orte_ns_proxy_get_parent_job(orte_jobid_t *parent, orte_jobid_t job)
OBJ_RELEASE(answer);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -495,7 +495,7 @@ int orte_ns_proxy_get_job_family(orte_jobid_t** family, orte_std_cntr_t *num_mem
OBJ_RELEASE(answer);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -596,7 +596,7 @@ int orte_ns_proxy_reserve_range(orte_jobid_t job, orte_vpid_t range, orte_vpid_t
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -664,7 +664,7 @@ int orte_ns_proxy_get_vpid_range(orte_jobid_t job, orte_vpid_t *range)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) {
if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -413,11 +413,7 @@ void orte_ns_replica_recv(int status, orte_process_name_t* sender,
goto RETURN_ERROR;
}
break;
case ORTE_NS_CREATE_MY_NAME_CMD:
/* ignore this command */
break;
case ORTE_NS_GET_PEERS_CMD:
/* get the list of attributes */
OBJ_CONSTRUCT(&attrs, opal_list_t);

Просмотреть файл

@ -957,7 +957,7 @@ static int odls_default_fork_local_proc(
}
/* pass my contact info to the local proc so we can talk */
uri = orte_rml.get_uri();
uri = orte_rml.get_contact_info();
param = mca_base_param_environ_variable("orte","local_daemon","uri");
opal_setenv(param, uri, true, &environ_copy);
free(param);
@ -1008,7 +1008,7 @@ static int odls_default_fork_local_proc(
if(NULL != orte_process_info.ns_replica_uri) {
uri = strdup(orte_process_info.ns_replica_uri);
} else {
uri = orte_rml.get_uri();
uri = orte_rml.get_contact_info();
}
param = mca_base_param_environ_variable("ns","replica","uri");
opal_setenv(param, uri, true, &environ_copy);
@ -1019,7 +1019,7 @@ static int odls_default_fork_local_proc(
if(NULL != orte_process_info.gpr_replica_uri) {
uri = strdup(orte_process_info.gpr_replica_uri);
} else {
uri = orte_rml.get_uri();
uri = orte_rml.get_contact_info();
}
param = mca_base_param_environ_variable("gpr","replica","uri");
opal_setenv(param, uri, true, &environ_copy);
@ -1393,7 +1393,7 @@ int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data)
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
return ORTE_ERR_FILE_OPEN_FAILURE;
}
my_uri = orte_rml.get_uri();
my_uri = orte_rml.get_contact_info();
fprintf(fp, "%s\n", my_uri);
fclose(fp);
free(uri_file);

Просмотреть файл

@ -21,12 +21,5 @@ headers += \
libmca_oob_la_SOURCES += \
base/oob_base_close.c \
base/oob_base_except.c \
base/oob_base_init.c \
base/oob_base_open.c \
base/oob_base_ping.c \
base/oob_base_recv.c \
base/oob_base_recv_nb.c \
base/oob_base_send.c \
base/oob_base_xcast.c \
base/oob_base_send_nb.c
base/oob_base_open.c

Просмотреть файл

@ -35,9 +35,9 @@
#include <net/uio.h>
#endif
#include "opal/mca/mca.h"
#include "opal/threads/condition.h"
#include "orte/mca/oob/oob.h"
#include "opal/mca/mca.h"
#include "orte/dss/dss_types.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
@ -52,11 +52,6 @@ extern "C" {
* global flag for use in timing tests
*/
ORTE_DECLSPEC extern int mca_oob_base_output;
ORTE_DECLSPEC extern bool orte_oob_base_timing;
ORTE_DECLSPEC extern bool orte_oob_xcast_timing;
ORTE_DECLSPEC extern opal_mutex_t orte_oob_xcast_mutex;
ORTE_DECLSPEC extern opal_condition_t orte_oob_xcast_cond;
ORTE_DECLSPEC extern int orte_oob_xcast_linear_xover, orte_oob_xcast_binomial_xover;
ORTE_DECLSPEC extern orte_std_cntr_t orte_oob_xcast_num_active;
/*
@ -68,425 +63,47 @@ ORTE_DECLSPEC extern bool orte_oob_base_already_opened;
* OOB API
*/
/**
* General flags for send/recv
*
* An example of usage - to determine the size of the next available message w/out receiving it:
*
* int size = mca_oob_recv(name, 0, 0, MCA_OOB_TRUNC|MCA_OOB_PEEK);
*/
#define MCA_OOB_PEEK 0x01 /**< flag to oob_recv to allow caller to peek a portion of the next available
* message w/out removing the message from the queue. */
#define MCA_OOB_TRUNC 0x02 /**< flag to oob_recv to return the actual size of the message even if
* the receive buffer is smaller than the number of bytes available */
#define MCA_OOB_ALLOC 0x04 /**< flag to oob_recv to request the oob to allocate a buffer of the appropriate
* size for the receive and return the allocated buffer and size in the first
* element of the iovec array. */
#define MCA_OOB_PERSISTENT 0x08 /* post receive request persistently - don't remove on match */
/**
* Obtain a string representation of the OOB contact information for
* the selected OOB channels. This string may be passed to another
* application via an MCA parameter (OMPI_MCA_oob_base_seed) to bootstrap
* communications.
*
* @return A null terminated string that should be freed by the caller.
*
* Note that mca_oob_base_init() must be called to load and select
* an OOB module prior to calling this routine.
*/
ORTE_DECLSPEC char* mca_oob_get_my_contact_info(void);
/**
* Pre-populate the cache of contact information required by the OOB
* to reach a given destination. This is required to setup a pointer
* to initial registry/name server/etc.
*
* @param uri The contact information of the peer process obtained
* via a call to mca_oob_get_contact_info().
*
*/
ORTE_DECLSPEC int mca_oob_set_contact_info(const char*);
/**
* A routine to ping a given process name to determine if it is reachable.
*
* @param name The peer name.
* @param tv The length of time to wait on a connection/response.
*
* Note that this routine blocks up to the specified timeout waiting for a
* connection / response from the specified peer. If the peer is unavailable
* an error status is returned.
*/
ORTE_DECLSPEC int mca_oob_ping(const char*, struct timeval* tv);
/**
* Extract from the contact info the peer process identifier.
*
* @param cinfo (IN) The contact information of the peer process.
* @param name (OUT) The peer process identifier.
* @param uris (OUT) Will return an array of uri strings corresponding
* to the peers exported protocols.
*
* Note the caller may pass NULL for the uris if they only wish to extact
* the process name.
*/
ORTE_DECLSPEC int mca_oob_parse_contact_info(const char* uri, orte_process_name_t* peer, char*** uris);
/**
* Set the contact info for the seed daemon.
*
* Note that this can also be passed to the application as an
* MCA parameter (OMPI_MCA_oob_base_seed). The contact info (of the seed)
* must currently be set before calling mca_oob_base_init().
*/
ORTE_DECLSPEC int mca_oob_set_contact_info(const char*);
/**
* Update the contact info tables
*/
ORTE_DECLSPEC void mca_oob_update_contact_info(orte_gpr_notify_data_t* data, void* cbdata);
/**
* Similiar to unix writev(2).
*
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) Currently unused.
* @return OMPI error code (<0) on error number of bytes actually sent.
*
* This routine provides semantics similar to unix send/writev with the addition of
* a tag parameter that can be used by the application to match the send w/ a specific
* receive. In other words - a recv call by the specified peer will only succeed when
* the corresponding (or wildcard) tag is used.
*
* The <i>peer</i> parameter represents an opaque handle to the peer process that
* is resolved by the oob layer (using the registry) to an actual physical network
* address.
*/
ORTE_DECLSPEC int mca_oob_send(
orte_process_name_t* peer,
struct iovec *msg,
int count,
int tag,
int flags);
/*
* Similiar to unix send(2) and mca_oob_send.
*
* @param peer (IN) Opaque name of peer process.
* @param buffer (IN) Prepacked OMPI_BUFFER containing data to send
* @param flags (IN) Currently unused.
* @return OMPI error code (<0) on error or number of bytes actually sent.
*/
ORTE_DECLSPEC int mca_oob_send_packed(
orte_process_name_t* peer,
orte_buffer_t* buffer,
int tag,
int flags);
/**
* Similiar to unix readv(2)
*
* @param peer (IN/OUT) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive. In the
* case of a wildcard receive, will be modified to return the matched peer name.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN/OUT) User defined tag for matching send/recv. In the case of a wildcard receive, will
* be modified to return the matched tag. May be optionally by NULL to specify a
* wildcard receive with no return value.
* @param flags (IN) May be MCA_OOB_PEEK to return up to the number of bytes provided in the
* iovec array without removing the message from the queue.
* @return OMPI error code (<0) on error or number of bytes actually received.
*
* The OOB recv call is similar to unix recv/readv in that it requires the caller to manage
* memory associated w/ the message. The routine accepts an array of iovecs (<i>msg</i>); however,
* the caller must determine the appropriate number of elements (<i>count</i>) and allocate the
* buffer space for each entry.
*
* The <i>tag</i> parameter is provided to facilitate this. The user may define tags based on message
* type to determine the message layout and size, as the mca_oob_recv call will block until a message
* with the matching tag is received.
*
* Alternately, the <i>flags</i> parameter may be used to peek (MCA_OOB_PEEK) a portion of the message
* (e.g. a standard message header) or determine the overall message size (MCA_OOB_TRUNC|MCA_OOB_PEEK)
* without removing the message from the queue.
*
*/
ORTE_DECLSPEC int mca_oob_recv(
orte_process_name_t* peer,
struct iovec *msg,
int count,
int tag,
int flags);
/**
* Similiar to unix read(2)
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param buf (OUT) Array of iovecs describing user buffers and lengths.
* @param tag (IN/OUT) User defined tag for matching send/recv.
* @return OMPI error code (<0) on error or number of bytes actually received.
*
*
* This version of oob_recv is as above except it does NOT take a iovec list
* but instead hands back a orte_buffer_t* buffer with the message in it.
* The user is responsible for releasing the buffer when finished w/ it.
*
*/
ORTE_DECLSPEC int mca_oob_recv_packed (
orte_process_name_t* peer,
orte_buffer_t *buf,
int tag);
/*
* Non-blocking versions of send/recv.
*/
/**
* Callback function on send/recv completion.
*
* @param status (IN) Completion status - equivalent to the return value from blocking send/recv.
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param cbdata (IN) User data.
*/
typedef void (*mca_oob_callback_fn_t)(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
int tag,
void* cbdata);
/**
* Callback function on send/recv completion for buffer PACKED message only.
* i.e. only mca_oob_send_packed_nb and mca_oob_recv_packed_nb USE this.
*
* @param status (IN) Completion status - equivalent to the return value from blocking send/recv.
* @param peer (IN) Opaque name of peer process.
* @param buffer (IN) For sends, this is a pointer to a prepacked buffer
For recvs, OOB creates and returns a buffer
* @param tag (IN) User defined tag for matching send/recv.
* @param cbdata (IN) User data.
*/
typedef void (*mca_oob_callback_packed_fn_t)(
int status,
orte_process_name_t* peer,
orte_buffer_t* buffer,
int tag,
void* cbdata);
/**
* Non-blocking version of mca_oob_send().
*
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) Currently unused.
* @param cbfunc (IN) Callback function on send completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error number of bytes actually sent.
*
* The user supplied callback function is called when the send completes. Note that
* the callback may occur before the call to mca_oob_send returns to the caller,
* if the send completes during the call.
*
*/
ORTE_DECLSPEC int mca_oob_send_nb(
orte_process_name_t* peer,
struct iovec* msg,
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
void* cbdata);
/**
* Non-blocking version of mca_oob_send_packed().
*
* @param peer (IN) Opaque name of peer process.
* @param buffer (IN) Opaque buffer handle.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) Currently unused.
* @param cbfunc (IN) Callback function on send completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error number of bytes actually sent.
*
* The user supplied callback function is called when the send completes. Note that
* the callback may occur before the call to mca_oob_send returns to the caller,
* if the send completes during the call.
*
*/
ORTE_DECLSPEC int mca_oob_send_packed_nb(
orte_process_name_t* peer,
orte_buffer_t* buffer,
int tag,
int flags,
mca_oob_callback_packed_fn_t cbfunc,
void* cbdata);
/**
* Non-blocking version of mca_oob_recv().
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) May be MCA_OOB_PEEK to return up to size bytes of msg w/out removing it from the queue,
* @param cbfunc (IN) Callback function on recv completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error or number of bytes actually received.
*
* The user supplied callback function is called asynchronously when a message is received
* that matches the call parameters.
*/
ORTE_DECLSPEC int mca_oob_recv_nb(
orte_process_name_t* peer,
struct iovec* msg,
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
void* cbdata);
/**
* Routine to cancel pending non-blocking recvs.
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param tag (IN) User defined tag for matching send/recv.
* @return OMPI error code (<0) on error or number of bytes actually received.
*/
ORTE_DECLSPEC int mca_oob_recv_cancel(
orte_process_name_t* peer,
int tag);
/**
* Non-blocking version of mca_oob_recv_packed().
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param buffer (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) May be MCA_OOB_PEEK to return up to size bytes of msg w/out removing it from the queue,
* @param cbfunc (IN) Callback function on recv completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error or number of bytes actually received.
*
* The user supplied callback function is called asynchronously when a message is received
* that matches the call parameters.
*/
ORTE_DECLSPEC int mca_oob_recv_packed_nb(
orte_process_name_t* peer,
int tag,
int flags,
mca_oob_callback_packed_fn_t cbfunc,
void* cbdata);
/**
* A "broadcast-like" function over the specified set of peers.
* @param job The job whose processes are to receive the message.
* @param msg The message to be sent
* @param cbfunc Callback function on receipt of data
*
* Note that the callback function is provided so that the data can be
* received and interpreted by the application
* associate a component and a module that belongs to it
*/
ORTE_DECLSPEC int mca_oob_xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
ORTE_DECLSPEC int mca_oob_xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
ORTE_DECLSPEC int mca_oob_xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc);
/*
* Register my contact info with the General Purpose Registry
* This function causes the component to "put" its contact info
* on the registry.
struct mca_oob_base_info_t {
opal_list_item_t super;
mca_oob_base_component_t *oob_component;
mca_oob_t *oob_module;
};
/**
* Convenience Typedef
*/
ORTE_DECLSPEC int mca_oob_register_contact_info(void);
typedef struct mca_oob_base_info_t mca_oob_base_info_t;
/*
* Register a subscription to receive contact info on other processes
* This function will typically be called from within a GPR compound command
* to register a subscription against a stage gate trigger. When fired, this
* will return the OOB contact info for all processes in the specified job
/**
* declare the association structure as a class
*/
ORTE_DECLSPEC int mca_oob_register_subscription(orte_jobid_t job, char *trigger);
/*
* Get contact info for a process or job
* Returns contact info for the specified process. If the vpid in the process name
* is WILDCARD, then it returns the contact info for all processes in the specified
* job. If the jobid is WILDCARD, then it returns the contact info for processes
* of the specified vpid across all jobs. Obviously, combining the two WILDCARD
* values will return contact info for everyone!
*/
ORTE_DECLSPEC int mca_oob_get_contact_info(orte_process_name_t *name, orte_gpr_notify_data_t **data);
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(mca_oob_base_info_t);
/*
* Callback on exception condition.
* Global functions for MCA overall collective open and close
*/
ORTE_DECLSPEC int mca_oob_base_open(void);
ORTE_DECLSPEC int mca_oob_base_init(void);
ORTE_DECLSPEC int mca_oob_base_module_init(void);
ORTE_DECLSPEC int mca_oob_base_close(void);
typedef enum {
MCA_OOB_PEER_UNREACH,
MCA_OOB_PEER_DISCONNECTED
} mca_oob_base_exception_t;
typedef int (*mca_oob_base_exception_fn_t)(const orte_process_name_t* peer, int exception);
/**
* Register a callback function on loss of a connection.
/*
* Global struct holding the selected module's function pointers
*/
ORTE_DECLSPEC int mca_oob_add_exception_handler(
mca_oob_base_exception_fn_t cbfunc);
/**
* Remove a callback
*/
ORTE_DECLSPEC int mca_oob_del_exception_handler(
mca_oob_base_exception_fn_t cbfunc);
/**
* Invoke exception handlers
*/
ORTE_DECLSPEC void mca_oob_call_exception_handlers(
orte_process_name_t* peer, int exception);
ORTE_DECLSPEC extern int mca_oob_base_output;
extern char* mca_oob_base_include;
extern char* mca_oob_base_exclude;
ORTE_DECLSPEC extern opal_list_t mca_oob_base_components;
ORTE_DECLSPEC extern opal_list_t mca_oob_base_modules;
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -41,10 +41,6 @@ int mca_oob_base_close(void)
}
/* destruct the condition variables for xcast */
OBJ_DESTRUCT(&orte_oob_xcast_mutex);
OBJ_DESTRUCT(&orte_oob_xcast_cond);
/* Finalize all the oob modules and free their list items */
for (item = opal_list_remove_first(&mca_oob_base_modules);
item != NULL;
@ -61,7 +57,6 @@ int mca_oob_base_close(void)
OBJ_DESTRUCT(&mca_oob_base_modules);
OBJ_DESTRUCT(&mca_oob_base_components);
OBJ_DESTRUCT(&mca_oob_base_exception_handlers);
/* All done */
orte_oob_base_already_opened = false;

Просмотреть файл

@ -1,86 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/oob/oob.h"
OBJ_CLASS_INSTANCE(
mca_oob_base_exception_handler_t,
opal_list_item_t,
NULL,
NULL
);
/**
* Register a callback function on loss of a connection.
*/
int mca_oob_add_exception_handler(
mca_oob_base_exception_fn_t cbfunc)
{
mca_oob_base_exception_handler_t *eh = OBJ_NEW(mca_oob_base_exception_handler_t);
eh->cbfunc = cbfunc;
opal_list_append(&mca_oob_base_exception_handlers, &eh->super);
return ORTE_SUCCESS;
}
/**
* Remove a callback
*/
int mca_oob_del_exception_handler(
mca_oob_base_exception_fn_t cbfunc)
{
opal_list_item_t* item;
item = opal_list_get_first(&mca_oob_base_exception_handlers);
while(item != opal_list_get_end(&mca_oob_base_exception_handlers)) {
opal_list_item_t* next = opal_list_get_next(item);
mca_oob_base_exception_handler_t* eh = (mca_oob_base_exception_handler_t*)item;
if(eh->cbfunc == cbfunc) {
opal_list_remove_item(&mca_oob_base_exception_handlers, &eh->super);
OBJ_RELEASE(eh);
}
item = next;
}
return ORTE_SUCCESS;
}
/**
* Invoke exception handlers
*/
void mca_oob_call_exception_handlers(
orte_process_name_t* peer, int exception)
{
opal_list_item_t* item;
item = opal_list_get_first(&mca_oob_base_exception_handlers);
while(item != opal_list_get_end(&mca_oob_base_exception_handlers)) {
opal_list_item_t* next = opal_list_get_next(item);
mca_oob_base_exception_handler_t* eh = (mca_oob_base_exception_handler_t*)item;
eh->cbfunc(peer,exception);
item = next;
}
}

Просмотреть файл

@ -49,44 +49,6 @@ OBJ_CLASS_INSTANCE(
NULL
);
/**
* Parse contact info string into process name and list of uri strings.
*/
int mca_oob_parse_contact_info(
const char* contact_info,
orte_process_name_t* name,
char*** uri)
{
orte_process_name_t* proc_name;
int rc;
/* parse the process name */
char* cinfo = strdup(contact_info);
char* ptr = strchr(cinfo, ';');
if(NULL == ptr) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
free(cinfo);
return ORTE_ERR_BAD_PARAM;
}
*ptr = '\0';
ptr++;
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(&proc_name, cinfo))) {
ORTE_ERROR_LOG(rc);
free(cinfo);
return rc;
}
*name = *proc_name;
free(proc_name);
if (NULL != uri) {
/* parse the remainder of the string into an array of uris */
*uri = opal_argv_split(ptr, ';');
}
free(cinfo);
return ORTE_SUCCESS;
}
/**
* Function for selecting one module from all those that are
@ -142,71 +104,6 @@ int mca_oob_base_init(void)
}
/**
* Obtains the contact info (oob implementation specific) URI strings through
* which this process can be contacted on an OOB channel.
*
* @return A null terminated string.
*
* The caller is responsible for freeing the returned string.
*/
char* mca_oob_get_my_contact_info()
{
char *proc_name=NULL;
char *proc_addr = mca_oob.oob_get_addr();
char *contact_info=NULL;
int rc;
if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string(&proc_name,
orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
if (0 > asprintf(&contact_info, "%s;%s", proc_name, proc_addr)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
free(proc_name);
free(proc_addr);
return contact_info;
}
/**
* Setup the contact information for the seed daemon - which
* is passed as an MCA parameter.
*
* @param seed
*/
int mca_oob_set_contact_info(const char* contact_info)
{
orte_process_name_t name;
char** uri;
char** ptr;
int rc = mca_oob_parse_contact_info(contact_info, &name, &uri);
if(rc != ORTE_SUCCESS)
return rc;
for(ptr = uri; ptr != NULL && *ptr != NULL; ptr++) {
opal_list_item_t* item;
for (item = opal_list_get_first(&mca_oob_base_modules);
item != opal_list_get_end(&mca_oob_base_modules);
item = opal_list_get_next(item)) {
mca_oob_base_info_t* base = (mca_oob_base_info_t *) item;
if (strncmp(base->oob_component->oob_base.mca_component_name, *ptr,
strlen(base->oob_component->oob_base.mca_component_name)) == 0)
base->oob_module->oob_set_addr(&name, *ptr);
}
}
if(uri != NULL) {
opal_argv_free(uri);
}
return ORTE_SUCCESS;
}
/**
* Called to request the selected oob components to
* initialize their connections to the HNP (if not an HNP), or
@ -225,90 +122,3 @@ int mca_oob_base_module_init(void)
}
return ORTE_SUCCESS;
}
/**
* Called to have all selected oob components register their
* contact info on the GPR
*/
int mca_oob_register_contact_info(void)
{
opal_list_item_t* item;
int rc;
for (item = opal_list_get_first(&mca_oob_base_modules);
item != opal_list_get_end(&mca_oob_base_modules);
item = opal_list_get_next(item)) {
mca_oob_base_info_t* base = (mca_oob_base_info_t *) item;
if (NULL != base->oob_module->oob_register_contact_info) {
if (ORTE_SUCCESS != (rc = base->oob_module->oob_register_contact_info())) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_SUCCESS;
}
/**
* Called to have all selected oob components register a subscription
* to receive their required contact info from all processes in the
* specified job when the provided trigger fires
*/
int mca_oob_register_subscription(orte_jobid_t job, char *trigger)
{
opal_list_item_t* item;
int rc;
for (item = opal_list_get_first(&mca_oob_base_modules);
item != opal_list_get_end(&mca_oob_base_modules);
item = opal_list_get_next(item)) {
mca_oob_base_info_t* base = (mca_oob_base_info_t *) item;
if (NULL != base->oob_module->oob_register_subscription) {
if (ORTE_SUCCESS != (rc = base->oob_module->oob_register_subscription(job, trigger))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_SUCCESS;
}
/*
* Called to get contact info for a process or job from all selected
* oob components
*/
int mca_oob_get_contact_info(orte_process_name_t *name, orte_gpr_notify_data_t **data)
{
opal_list_item_t* item;
int rc;
for (item = opal_list_get_first(&mca_oob_base_modules);
item != opal_list_get_end(&mca_oob_base_modules);
item = opal_list_get_next(item)) {
mca_oob_base_info_t* base = (mca_oob_base_info_t *) item;
if (NULL != base->oob_module->oob_get_contact_info) {
if (ORTE_SUCCESS != (rc = base->oob_module->oob_get_contact_info(name, data))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_SUCCESS;
}
/*
* Called to update contact info tables in all selected oob components
*/
void mca_oob_update_contact_info(orte_gpr_notify_data_t* data, void* cbdata)
{
opal_list_item_t* item;
for (item = opal_list_get_first(&mca_oob_base_modules);
item != opal_list_get_end(&mca_oob_base_modules);
item = opal_list_get_next(item)) {
mca_oob_base_info_t* base = (mca_oob_base_info_t *) item;
if (NULL != base->oob_module->oob_update_contact_info) {
base->oob_module->oob_update_contact_info(data, cbdata);
}
}
}

Просмотреть файл

@ -25,10 +25,8 @@
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/threads/condition.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
/*
* The following file was created by configure. It contains extern
@ -46,18 +44,10 @@ mca_oob_t mca_oob;
int mca_oob_base_output = -1;
opal_list_t mca_oob_base_components;
opal_list_t mca_oob_base_modules;
opal_list_t mca_oob_base_exception_handlers;
opal_mutex_t orte_oob_xcast_mutex;
opal_condition_t orte_oob_xcast_cond;
int orte_oob_xcast_linear_xover;
int orte_oob_xcast_binomial_xover;
orte_std_cntr_t orte_oob_xcast_num_active;
bool orte_oob_base_already_opened = false;
#define ORTE_OOB_XCAST_LINEAR_XOVER_DEFAULT 10
#define ORTE_OOB_XCAST_BINOMIAL_XOVER_DEFAULT INT_MAX
/**
* Function for finding and opening either all MCA components, or the one
@ -66,7 +56,6 @@ bool orte_oob_base_already_opened = false;
int mca_oob_base_open(void)
{
int param, value;
char *mode;
/* Sanity check. This may be able to be removed when the rml/oob
interface is re-worked (the current infrastructure may invoke
@ -75,12 +64,7 @@ int mca_oob_base_open(void)
if (orte_oob_base_already_opened) {
return ORTE_SUCCESS;
}
/* initialize the condition variables for xcast */
OBJ_CONSTRUCT(&orte_oob_xcast_mutex, opal_mutex_t);
OBJ_CONSTRUCT(&orte_oob_xcast_cond, opal_condition_t);
orte_oob_xcast_num_active = 0;
/* register parameters */
param = mca_base_param_reg_int_name("oob", "base_verbose",
"Verbosity level for the oob framework",
@ -91,35 +75,9 @@ int mca_oob_base_open(void)
mca_oob_base_output = -1;
}
param = mca_base_param_reg_int_name("oob", "xcast_linear_xover",
"Number of daemons where use of linear xcast mode is to begin",
false, false, ORTE_OOB_XCAST_LINEAR_XOVER_DEFAULT, &orte_oob_xcast_linear_xover);
param = mca_base_param_reg_int_name("oob", "xcast_binomial_xover",
"Number of daemons where use of binomial xcast mode is to begin",
false, false, ORTE_OOB_XCAST_BINOMIAL_XOVER_DEFAULT, &orte_oob_xcast_binomial_xover);
param = mca_base_param_reg_string_name("oob", "xcast_mode",
"Select xcast mode (\"linear\" | \"binomial\" | \"direct\")",
false, false, "none", &mode);
if (0 == strcmp(mode, "binomial")) {
orte_oob_xcast_binomial_xover = 0;
orte_oob_xcast_linear_xover = 0;
} else if (0 == strcmp(mode, "linear")) {
orte_oob_xcast_linear_xover = 0;
orte_oob_xcast_binomial_xover = INT_MAX;
} else if (0 == strcmp(mode, "direct")) {
orte_oob_xcast_binomial_xover = INT_MAX;
orte_oob_xcast_linear_xover = INT_MAX;
} else if (0 != strcmp(mode, "none")) {
opal_output(0, "oob_xcast_mode: unknown option %s - using defaults", mode);
}
/* Open up all available components */
OBJ_CONSTRUCT(&mca_oob_base_components, opal_list_t);
OBJ_CONSTRUCT(&mca_oob_base_modules, opal_list_t);
OBJ_CONSTRUCT(&mca_oob_base_exception_handlers, opal_list_t);
if (ORTE_SUCCESS !=
mca_base_components_open("oob", mca_oob_base_output,

Просмотреть файл

@ -1,58 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "orte/orte_constants.h"
#include "opal/util/argv.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
int mca_oob_ping(const char* contact_info, struct timeval* tv)
{
orte_process_name_t name;
char** uris;
char** ptr;
int rc;
if(ORTE_SUCCESS != (rc = mca_oob_parse_contact_info(contact_info, &name, &uris))) {
return rc;
}
ptr = uris;
while(ptr && *ptr) {
if(ORTE_SUCCESS == (rc = mca_oob.oob_ping(&name, *ptr, tv)))
break;
ptr++;
}
opal_argv_free(uris);
return rc;
}

Просмотреть файл

@ -1,76 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/dss/dss.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#include <string.h>
/*
* Similiar to unix recv(2)
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param types (IN) Parallel array to iovecs describing data type of each iovec element.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) May be MCA_OOB_PEEK to return up to the number of bytes provided in the
* iovec array without removing the message from the queue.
* @return OMPI error code (<0) on error or number of bytes actually received.
*/
int mca_oob_recv(orte_process_name_t* peer, struct iovec *msg, int count, int tag, int flags)
{
return(mca_oob.oob_recv(peer, msg, count, tag, flags));
}
/*
* Similiar to unix recv(2)
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param buffer (OUT) Buffer that the OOB creates to recv this message...
* @param tag (IN) User defined tag for matching send/recv.
* iovec array without removing the message from the queue.
* @return OMPI error code (<0) on error or number of bytes actually received.
*/
int mca_oob_recv_packed(orte_process_name_t* peer, orte_buffer_t *buf, int tag)
{
int rc;
struct iovec msg[1];
/* setup iov */
msg[0].iov_base = NULL;
msg[0].iov_len = 0;
rc = mca_oob.oob_recv(peer, msg, 1, tag, MCA_OOB_ALLOC);
if(rc < 0)
return rc;
/* initialize buffer */
return orte_dss.load(buf, msg[0].iov_base, msg[0].iov_len);
}

Просмотреть файл

@ -1,167 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/dss/dss.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include <string.h>
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
/*
* Internal type to handle non-blocking packed receive.
*/
struct mca_oob_recv_cbdata {
struct iovec cbiov;
mca_oob_callback_packed_fn_t cbfunc;
void* cbdata;
bool persistent;
};
typedef struct mca_oob_recv_cbdata mca_oob_recv_cbdata_t;
static void mca_oob_recv_callback(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
int tag,
void* cbdata);
/*
* Non-blocking version of mca_oob_recv_nb().
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param flags (IN) May be MCA_OOB_PEEK to return up to size bytes of msg w/out removing it from the queue,
* @param cbfunc (IN) Callback function on recv completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error or number of bytes actually received.
*/
int mca_oob_recv_nb(orte_process_name_t* peer, struct iovec* msg, int count, int tag, int flags,
mca_oob_callback_fn_t cbfunc, void* cbdata)
{
return(mca_oob.oob_recv_nb(peer, msg, count, tag, flags, cbfunc, cbdata));
}
/*
* Cancel non-blocking recv.j
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param tag (IN) User defined tag for message matching.
* @return OMPI success or error code (<0) on error.
*/
int mca_oob_recv_cancel(orte_process_name_t* peer, int tag)
{
return(mca_oob.oob_recv_cancel(peer, tag));
}
/**
* Non-blocking version of mca_oob_recv_packed().
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param buffer (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) May be MCA_OOB_PEEK to return up to size bytes of msg w/out removing it from the queue,
* @param cbfunc (IN) Callback function on recv completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error or number of bytes actually received.
*
* The user supplied callback function is called asynchronously when a message is received
* that matches the call parameters.
*/
int mca_oob_recv_packed_nb(
orte_process_name_t* peer,
int tag,
int flags,
mca_oob_callback_packed_fn_t cbfunc,
void* cbdata)
{
mca_oob_recv_cbdata_t *oob_cbdata = (mca_oob_recv_cbdata_t*)malloc(sizeof(mca_oob_recv_cbdata_t));
int rc;
if(NULL == oob_cbdata) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
memset(oob_cbdata, 0, sizeof(mca_oob_recv_cbdata_t));
oob_cbdata->cbfunc = cbfunc;
oob_cbdata->cbdata = cbdata;
oob_cbdata->persistent = (flags & MCA_OOB_PERSISTENT) ? true : false;
rc = mca_oob.oob_recv_nb(peer, &oob_cbdata->cbiov, 1, tag, flags|MCA_OOB_ALLOC, mca_oob_recv_callback, oob_cbdata);
if(rc < 0) {
free(oob_cbdata);
}
return rc;
}
/**
* Callback function on non-blocking recv completion.
*
* @param status (IN) Completion status - equivalent to the return value from blocking send/recv.
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param cbdata (IN) User data.
*/
static void mca_oob_recv_callback(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
int tag,
void* cbdata)
{
mca_oob_recv_cbdata_t *oob_cbdata = (mca_oob_recv_cbdata_t*)cbdata;
orte_buffer_t buffer;
/* validate status */
if(status < 0) {
oob_cbdata->cbfunc(status, peer, NULL, tag, oob_cbdata->cbdata);
free(oob_cbdata);
return;
}
/* init a buffer with the received message */
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
orte_dss.load(&buffer,msg[0].iov_base,msg[0].iov_len);
/* call users callback function */
oob_cbdata->cbfunc(status, peer, &buffer, tag, oob_cbdata->cbdata);
/* cleanup */
OBJ_DESTRUCT(&buffer);
if(oob_cbdata->persistent == false) {
free(oob_cbdata);
}
}

Просмотреть файл

@ -1,80 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <string.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#include "orte/orte_constants.h"
#include "orte/dss/dss.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "opal/util/output.h"
/*
* Similiar to unix send(2).
*
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param flags (IN) Currently unused.
* @return OMPI error code (<0) on error or number of bytes actually sent.
*/
int mca_oob_send(orte_process_name_t* peer, struct iovec *msg, int count, int tag, int flags)
{
return(mca_oob.oob_send(peer, msg, count, tag, flags));
}
/*
* Similiar to unix send(2) and mca_oob_send.
*
* @param peer (IN) Opaque name of peer process.
* @param buffer (IN) Prepacked OMPI_BUFFER containing data to send
* @param flags (IN) Currently unused.
* @return OMPI error code (<0) on error or number of bytes actually sent.
*/
int mca_oob_send_packed (orte_process_name_t* peer, orte_buffer_t* buffer, int tag, int flags)
{
void *dataptr;
orte_std_cntr_t datalen;
struct iovec msg[1];
int rc;
/* first build iovec from buffer information */
rc = orte_dss.unload(buffer, &dataptr, &datalen);
if(rc != ORTE_SUCCESS) {
return rc;
}
orte_dss.load(buffer, dataptr, datalen);
msg[0].iov_base = (IOVBASE_TYPE*)dataptr;
msg[0].iov_len = datalen;
return(mca_oob.oob_send(peer, msg, 1, tag, flags));
}

Просмотреть файл

@ -1,160 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/dss/dss.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include <string.h>
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
/*
* internal struct for non-blocking packed sends
*/
struct mca_oob_send_cbdata {
orte_buffer_t* cbbuf;
struct iovec cbiov;
mca_oob_callback_packed_fn_t cbfunc;
void* cbdata;
};
typedef struct mca_oob_send_cbdata mca_oob_send_cbdata_t;
static void mca_oob_send_callback(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
int tag,
void* cbdata);
/*
* Non-blocking version of mca_oob_send().
*
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param flags (IN) Currently unused.
* @param cbfunc (IN) Callback function on send completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error number of bytes actually sent.
*
*/
int mca_oob_send_nb(orte_process_name_t* peer, struct iovec* msg, int count, int tag,
int flags, mca_oob_callback_fn_t cbfunc, void* cbdata)
{
return(mca_oob.oob_send_nb(peer, msg, count, tag, flags, cbfunc, cbdata));
}
/**
* Non-blocking version of mca_oob_send_packed().
*
* @param peer (IN) Opaque name of peer process.
* @param buffer (IN) Opaque buffer handle.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) Currently unused.
* @param cbfunc (IN) Callback function on send completion.
* @param cbdata (IN) User data that is passed to callback function.
* @return OMPI error code (<0) on error number of bytes actually sent.
*
* The user supplied callback function is called when the send completes. Note that
* the callback may occur before the call to mca_oob_send returns to the caller,
* if the send completes during the call.
*
*/
int mca_oob_send_packed_nb(
orte_process_name_t* peer,
orte_buffer_t* buffer,
int tag,
int flags,
mca_oob_callback_packed_fn_t cbfunc,
void* cbdata)
{
mca_oob_send_cbdata_t *oob_cbdata;
void *dataptr;
orte_std_cntr_t datalen;
int rc;
/* first build iovec from buffer information */
rc = orte_dss.unload(buffer, &dataptr, &datalen);
if (rc != ORTE_SUCCESS) {
return rc;
}
orte_dss.load(buffer, dataptr, datalen);
/* allocate a struct to pass into callback */
if(NULL == (oob_cbdata = (mca_oob_send_cbdata_t*)malloc(sizeof(mca_oob_send_cbdata_t)))) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
oob_cbdata->cbbuf = buffer;
oob_cbdata->cbfunc = cbfunc;
oob_cbdata->cbdata = cbdata;
oob_cbdata->cbiov.iov_base = (IOVBASE_TYPE*)dataptr;
oob_cbdata->cbiov.iov_len = datalen;
/* queue up the request */
rc = mca_oob.oob_send_nb(peer, &oob_cbdata->cbiov, 1, tag, flags, mca_oob_send_callback, oob_cbdata);
if(rc < 0) {
free(oob_cbdata);
}
return rc;
}
/**
* Callback function on send completion for buffer PACKED message only.
* i.e. only mca_oob_send_packed_nb and mca_oob_recv_packed_nb USE this.
*
* @param status (IN) Completion status - equivalent to the return value from blocking send/recv.
* @param peer (IN) Opaque name of peer process.
* @param buffer (IN) For sends, this is a pointer to a prepacked buffer
For recvs, OOB creates and returns a buffer
* @param tag (IN) User defined tag for matching send/recv.
* @param cbdata (IN) User data.
*/
static void mca_oob_send_callback(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
int tag,
void* cbdata)
{
/* validate status */
mca_oob_send_cbdata_t *oob_cbdata = (mca_oob_send_cbdata_t*)cbdata;
if(status < 0) {
oob_cbdata->cbfunc(status, peer, NULL, tag, oob_cbdata->cbdata);
free(oob_cbdata);
return;
}
oob_cbdata->cbfunc(status, peer, oob_cbdata->cbbuf, tag, oob_cbdata->cbdata);
free(oob_cbdata);
}

Просмотреть файл

@ -33,59 +33,41 @@
#include "opal/mca/mca.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/oob/oob_types.h"
#include "orte/mca/oob/base/base.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_oob_1_0_0_t;
/**
* Convenience Typedef
*/
BEGIN_C_DECLS
struct mca_oob_1_0_0_t;
typedef struct mca_oob_1_0_0_t mca_oob_1_0_0_t;
/**
* Convenience typedef
*/
typedef struct mca_oob_1_0_0_t mca_oob_t;
/********
* NOTE: these are functions and prototypes for the use of the modules
* and components.
* THESE ARE NOT USER INTERFACE FUNCTIONS.
* the user interface is in mca/oob/base/base.h
*/
typedef mca_oob_t* (*mca_oob_base_component_init_fn_t)(int *priority);
struct mca_oob_base_component_1_0_0_t {
mca_base_component_t oob_base;
mca_base_component_data_1_0_0_t oob_data;
mca_oob_base_component_init_fn_t oob_init;
};
typedef struct mca_oob_base_component_1_0_0_t mca_oob_base_component_1_0_0_t;
typedef mca_oob_base_component_1_0_0_t mca_oob_base_component_t;
/*
* OOB Component/Module function prototypes.
*/
/**
* Implementation of mca_oob_base_module_get_addr().
*/
typedef char* (*mca_oob_base_module_get_addr_fn_t)(void);
/**
* Implementation of mca_oob_base_module_set_addr().
*
* @param addr Address of seed in component specific uri format.
*/
typedef int (*mca_oob_base_module_set_addr_fn_t)(const orte_process_name_t* peer,
const char* uri);
typedef int (*mca_oob_base_module_set_addr_fn_t)(const orte_process_name_t*, const char* uri);
/**
* Implementation of mca_oob_base_module_update_contact_info()
*/
typedef void (*mca_oob_module_update_contact_info_fn_t)(orte_gpr_notify_data_t* data,
void* cbdata);
typedef int (*mca_oob_base_module_get_new_name_fn_t)(orte_process_name_t*);
/**
* Implementation of mca_oob_ping().
@ -94,46 +76,10 @@ typedef void (*mca_oob_module_update_contact_info_fn_t)(orte_gpr_notify_data_t*
* @param tv (IN) Timeout to wait in connection response.
* @return OMPI error code (<0) or ORTE_SUCCESS
*/
typedef int (*mca_oob_base_module_ping_fn_t)(const orte_process_name_t*,
const char* uri,
const struct timeval* tv);
typedef int (*mca_oob_base_module_ping_fn_t)(const orte_process_name_t*, const char* uri, const struct timeval* tv);
/**
* Implementation of mca_oob_send().
*
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) Currently unused.
* @return OMPI error code (<0) on error number of bytes actually sent.
*/
typedef int (*mca_oob_base_module_send_fn_t)(
orte_process_name_t* peer,
struct iovec *msg,
int count,
int tag,
int flags);
/**
* Implementation of mca_oob_recv().
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param types (IN) Parallel array to iovecs describing data type of each iovec element.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) May be MCA_OOB_PEEK to return up to the number of bytes provided in the
* iovec array without removing the message from the queue.
* @return OMPI error code (<0) on error or number of bytes actually received.
*/
typedef int (*mca_oob_base_module_recv_fn_t)(
orte_process_name_t* peer,
struct iovec *msg,
int count,
int tag,
int flags);
/**
* Implementation of mca_oob_send_nb().
@ -155,7 +101,7 @@ typedef int (*mca_oob_base_module_send_nb_fn_t)(
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
@ -177,7 +123,7 @@ typedef int (*mca_oob_base_module_recv_nb_fn_t)(
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
@ -201,97 +147,33 @@ typedef int (*mca_oob_base_module_init_fn_t)(void);
*/
typedef int (*mca_oob_base_module_fini_fn_t)(void);
/**
* xcast function for sending common messages to all processes
*/
typedef int (*mca_oob_base_module_xcast_fn_t)(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
typedef int (*mca_oob_base_module_xcast_nb_fn_t)(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
typedef int (*mca_oob_base_module_xcast_gate_fn_t)(orte_gpr_trigger_cb_fn_t cbfunc);
/* ft event */
typedef int (*mca_oob_base_module_ft_event_fn_t)( int state );
/*
* Register my contact info with the General Purpose Registry
* This function causes the component to "put" its contact info
* on the registry.
*/
typedef int (*mca_oob_module_register_contact_info_fn_t)(void);
/*
* Register a subscription to receive contact info on other processes
* This function will typically be called from within a GPR compound command
* to register a subscription against a stage gate trigger. When fired, this
* will return the OOB contact info for all processes in the specified job
*/
typedef int (*mca_oob_module_register_subscription_fn_t)(orte_jobid_t job, char *trigger);
/*
* Get contact info for a process or job
* Returns contact info for the specified process. If the vpid in the process name
* is WILDCARD, then it returns the contact info for all processes in the specified
* job. If the jobid is WILDCARD, then it returns the contact info for processes
* of the specified vpid across all jobs. Obviously, combining the two WILDCARD
* values will return contact info for everyone!
*/
typedef int (*mca_oob_module_get_contact_info_fn_t)(orte_process_name_t *name, orte_gpr_notify_data_t **data);
/**
* OOB Module
*/
struct mca_oob_1_0_0_t {
mca_oob_base_module_get_addr_fn_t oob_get_addr;
mca_oob_base_module_set_addr_fn_t oob_set_addr;
mca_oob_base_module_ping_fn_t oob_ping;
mca_oob_base_module_send_fn_t oob_send;
mca_oob_base_module_recv_fn_t oob_recv;
mca_oob_base_module_send_nb_fn_t oob_send_nb;
mca_oob_base_module_recv_nb_fn_t oob_recv_nb;
mca_oob_base_module_recv_cancel_fn_t oob_recv_cancel;
mca_oob_base_module_init_fn_t oob_init;
mca_oob_base_module_fini_fn_t oob_fini;
mca_oob_base_module_xcast_fn_t oob_xcast;
mca_oob_base_module_xcast_nb_fn_t oob_xcast_nb;
mca_oob_base_module_xcast_gate_fn_t oob_xcast_gate;
mca_oob_base_module_get_addr_fn_t oob_get_addr;
mca_oob_base_module_set_addr_fn_t oob_set_addr;
mca_oob_base_module_get_new_name_fn_t oob_get_new_name;
mca_oob_base_module_ping_fn_t oob_ping;
mca_oob_base_module_send_nb_fn_t oob_send_nb;
mca_oob_base_module_recv_nb_fn_t oob_recv_nb;
mca_oob_base_module_recv_cancel_fn_t oob_recv_cancel;
mca_oob_base_module_ft_event_fn_t oob_ft_event;
mca_oob_module_register_contact_info_fn_t oob_register_contact_info;
mca_oob_module_register_subscription_fn_t oob_register_subscription;
mca_oob_module_get_contact_info_fn_t oob_get_contact_info;
mca_oob_module_update_contact_info_fn_t oob_update_contact_info;
orte_rml_exception_callback_t oob_exception_callback;
};
/**
* OOB Component
*/
typedef mca_oob_t* (*mca_oob_base_component_init_fn_t)(int *priority);
/**
* the standard component data structure
*/
struct mca_oob_base_component_1_0_0_t {
mca_base_component_t oob_base;
mca_base_component_data_1_0_0_t oob_data;
mca_oob_base_component_init_fn_t oob_init;
};
/**
* Convenience Typedef
*/
typedef struct mca_oob_base_component_1_0_0_t mca_oob_base_component_1_0_0_t;
/**
* Convenience Typedef
*/
typedef mca_oob_base_component_1_0_0_t mca_oob_base_component_t;
/**
* Macro for use in components that are of type oob v1.0.0
*/
@ -302,69 +184,12 @@ typedef mca_oob_base_component_1_0_0_t mca_oob_base_component_t;
"oob", 1, 0, 0
/*
* This is the first module on the list. This is here temporarily
* to make things work
* BWB - FIX ME - This is the first module on the list. This is here
* temporarily to make things work
*/
ORTE_DECLSPEC extern mca_oob_t mca_oob;
/**
* associate a component and a module that belongs to it
*/
struct mca_oob_base_info_t {
opal_list_item_t super;
mca_oob_base_component_t *oob_component;
mca_oob_t *oob_module;
};
/**
* Convenience Typedef
*/
typedef struct mca_oob_base_info_t mca_oob_base_info_t;
END_C_DECLS
/**
* declare the association structure as a class
*/
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(mca_oob_base_info_t);
/**
* List of registrations of exception callbacks
*/
struct mca_oob_base_exception_handler_t {
opal_list_item_t super;
mca_oob_base_exception_fn_t cbfunc;
};
/**
* Convenience Typedef
*/
typedef struct mca_oob_base_exception_handler_t mca_oob_base_exception_handler_t;
/**
* declare the association structure as a class
*/
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(mca_oob_base_exception_handler_t);
/*
* Global functions for MCA overall collective open and close
*/
ORTE_DECLSPEC int mca_oob_base_open(void);
ORTE_DECLSPEC int mca_oob_base_init(void);
ORTE_DECLSPEC int mca_oob_base_module_init(void);
ORTE_DECLSPEC int mca_oob_base_close(void);
/*
* Global struct holding the selected module's function pointers
*/
ORTE_DECLSPEC extern int mca_oob_base_output;
extern char* mca_oob_base_include;
extern char* mca_oob_base_exclude;
ORTE_DECLSPEC extern opal_list_t mca_oob_base_components;
ORTE_DECLSPEC extern opal_list_t mca_oob_base_modules;
ORTE_DECLSPEC extern opal_list_t mca_oob_base_exception_handlers;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -41,9 +41,8 @@
#include "opal/mca/timer/base/base.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
#define ORTE_OOB_TCP_KEY "oob-tcp"
@ -52,6 +51,7 @@ extern "C" {
#define OOB_TCP_DEBUG_INFO 3 /* information about startup, connection establish, etc. */
#define OOB_TCP_DEBUG_ALL 4 /* everything else */
extern mca_oob_t mca_oob_tcp;
/*
* standard component functions
@ -72,31 +72,6 @@ int mca_oob_tcp_init(void);
*/
int mca_oob_tcp_fini(void);
/*
* Register my contact info with the General Purpose Registry
* This function causes the component to "put" its contact info
* on the registry.
*/
int mca_oob_tcp_register_contact_info(void);
/*
* Register a subscription to receive contact info on other processes
* This function will typically be called from within a GPR compound command
* to register a subscription against a stage gate trigger. When fired, this
* will return the OOB contact info for all processes in the specified job
*/
int mca_oob_tcp_register_subscription(orte_jobid_t job, char *trigger);
/*
* Get contact info for a process or job
* Returns contact info for the specified process. If the vpid in the process name
* is WILDCARD, then it returns the contact info for all processes in the specified
* job. If the jobid is WILDCARD, then it returns the contact info for processes
* of the specified vpid across all jobs. Obviously, combining the two WILDCARD
* values will return contact info for everyone!
*/
ORTE_DECLSPEC int mca_oob_tcp_get_contact_info(orte_process_name_t *name, orte_gpr_notify_data_t **data);
/**
* Compare two process names for equality.
*
@ -122,6 +97,8 @@ char* mca_oob_tcp_get_addr(void);
int mca_oob_tcp_set_addr(const orte_process_name_t*, const char*);
int mca_oob_tcp_get_new_name(orte_process_name_t* name);
/**
* A routine to ping a given process name to determine if it is reachable.
*
@ -135,43 +112,6 @@ int mca_oob_tcp_set_addr(const orte_process_name_t*, const char*);
int mca_oob_tcp_ping(const orte_process_name_t*, const char* uri, const struct timeval* tv);
/**
* Similiar to unix writev(2).
*
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) Currently unused.
* @return OMPI error code (<0) on error number of bytes actually sent.
*/
int mca_oob_tcp_send(
orte_process_name_t* peer,
struct iovec *msg,
int count,
int tag,
int flags);
/**
* Similiar to unix readv(2)
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User defined tag for matching send/recv.
* @param flags (IN) May be MCA_OOB_PEEK to return up to the number of bytes provided in the
* iovec array without removing the message from the queue.
* @return OMPI error code (<0) on error or number of bytes actually received.
*/
int mca_oob_tcp_recv(
orte_process_name_t* peer,
struct iovec * msg,
int count,
int tag,
int flags);
/*
* Non-blocking versions of send/recv.
@ -197,7 +137,7 @@ int mca_oob_tcp_send_nb(
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
@ -219,7 +159,7 @@ int mca_oob_tcp_recv_nb(
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
@ -245,11 +185,7 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t*);
*/
int mca_oob_tcp_parse_uri(
const char* uri,
#if OPAL_WANT_IPV6
struct sockaddr_in6* inaddr
#else
struct sockaddr_in* inaddr
#endif
struct sockaddr* inaddr
);
/**
@ -278,11 +214,8 @@ struct mca_oob_tcp_component_t {
char* tcp_exclude; /**< list of ip interfaces to exclude */
int tcp_listen_sd; /**< listen socket for incoming IPv4 connection requests */
unsigned short tcp_listen_port; /**< IPv4 listen port */
#if OPAL_WANT_IPV6
int tcp6_listen_sd; /**< listen socket for incoming IPv6 connection requests */
unsigned short tcp6_listen_port; /**< IPv6 listen port */
#endif
opal_list_t tcp_subscriptions; /**< list of registry subscriptions */
opal_list_t tcp_peer_list; /**< list of peers sorted in mru order */
opal_hash_table_t tcp_peers; /**< peers sorted by name */
opal_hash_table_t tcp_peer_names; /**< cache of peer contact info sorted by name */
@ -292,12 +225,8 @@ struct mca_oob_tcp_component_t {
int tcp_sndbuf; /**< socket send buffer size */
int tcp_rcvbuf; /**< socket recv buffer size */
opal_free_list_t tcp_msgs; /**< free list of messages */
opal_event_t tcp_send_event; /**< event structure for IPv4 sends */
opal_event_t tcp_recv_event; /**< event structure for IPv4 recvs */
#if OPAL_WANT_IPV6
opal_event_t tcp6_send_event; /**< event structure for IPv6 sends */
opal_event_t tcp6_recv_event; /**< event structure for IPv6 recvs */
#endif
opal_mutex_t tcp_lock; /**< lock for accessing module state */
opal_list_t tcp_events; /**< list of pending events (accepts) */
opal_list_t tcp_msg_post; /**< list of recieves user has posted */
@ -311,6 +240,8 @@ struct mca_oob_tcp_component_t {
bool tcp_shutdown;
mca_oob_tcp_listen_type_t tcp_listen_type;
opal_list_t tcp_available_devices;
opal_thread_t tcp_listen_thread;
opal_free_list_t tcp_pending_connections_fl;
opal_list_t tcp_pending_connections;
@ -325,8 +256,6 @@ struct mca_oob_tcp_component_t {
int tcp_copy_max_size;
int tcp_copy_spin_count;
int connect_sleep;
bool tcp_ignore_localhost; /**< should use localhost as an address or not */
};
/**
@ -346,18 +275,27 @@ extern int mca_oob_tcp_output_handle;
struct mca_oob_tcp_pending_connection_t {
opal_free_list_item_t super;
int fd;
/* Bug, FIXME: Port to IPv6 */
struct sockaddr_in addr;
};
typedef struct mca_oob_tcp_pending_connection_t mca_oob_tcp_pending_connection_t;
OBJ_CLASS_DECLARATION(mca_oob_tcp_pending_connection_t);
opal_free_list_item_t super;
int fd;
/* Bug, FIXME: Port to IPv6 */
struct sockaddr_in addr;
};
typedef struct mca_oob_tcp_pending_connection_t mca_oob_tcp_pending_connection_t;
OBJ_CLASS_DECLARATION(mca_oob_tcp_pending_connection_t);
struct mca_oob_tcp_device_t {
opal_list_item_t super;
int if_index;
bool if_local;
struct sockaddr_storage if_addr;
};
typedef struct mca_oob_tcp_device_t mca_oob_tcp_device_t;
OBJ_CLASS_DECLARATION(mca_oob_tcp_device_t);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS
#endif /* MCA_OOB_TCP_H_ */

Просмотреть файл

@ -65,168 +65,7 @@ OBJ_CLASS_INSTANCE(
mca_oob_tcp_addr_destruct);
int mca_oob_tcp_addr_pack(orte_buffer_t* buffer)
{
orte_std_cntr_t count = 0;
int i;
int rc;
rc = orte_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME);
if(rc != ORTE_SUCCESS)
return rc;
for(i=opal_ifbegin(); i>0; i=opal_ifnext(i)) {
struct sockaddr_storage inaddr;
opal_ifindextoaddr(i, (struct sockaddr*) &inaddr, sizeof(inaddr));
if(mca_oob_tcp_component.tcp_ignore_localhost &&
opal_net_islocalhost((struct sockaddr*) &inaddr)) {
continue;
}
count++;
}
rc = orte_dss.pack(buffer, &count, 1, ORTE_STD_CNTR);
if(rc != ORTE_SUCCESS)
return rc;
for(i=opal_ifbegin(); i>0; i=opal_ifnext(i)) {
struct sockaddr_storage inaddr;
uint8_t type;
uint32_t ipaddr;
uint16_t port;
opal_ifindextoaddr(i, (struct sockaddr*) &inaddr, sizeof(inaddr));
if(mca_oob_tcp_component.tcp_ignore_localhost &&
opal_net_islocalhost((struct sockaddr*) &inaddr))
continue;
switch (inaddr.ss_family) {
case AF_INET:
type = MCA_OOB_TCP_ADDR_TYPE_AFINET;
port = mca_oob_tcp_component.tcp_listen_port;
orte_dss.pack(buffer, &type, 1, ORTE_INT8);
orte_dss.pack(buffer, &port, sizeof (port), ORTE_BYTE);
ipaddr = (uint32_t) ((struct sockaddr_in*)&inaddr)->sin_addr.s_addr;
orte_dss.pack(buffer, &ipaddr, sizeof (ipaddr), ORTE_BYTE);
break;
#if OPAL_WANT_IPV6
case AF_INET6:
type = MCA_OOB_TCP_ADDR_TYPE_AFINET6;
port = mca_oob_tcp_component.tcp6_listen_port;
orte_dss.pack(buffer, &type, 1, ORTE_INT8);
orte_dss.pack(buffer, &port, sizeof (port), ORTE_BYTE);
orte_dss.pack(buffer, &((struct sockaddr_in6*)&inaddr)->sin6_addr,
16, ORTE_BYTE);
break;
#endif
default:
/* shouldn't get here, as opal_if shouldn't allow anything
but AFINET and AF_INET6. */
continue;
}
}
return ORTE_SUCCESS;
}
mca_oob_tcp_addr_t* mca_oob_tcp_addr_unpack(orte_buffer_t* buffer)
{
mca_oob_tcp_addr_t* addr = OBJ_NEW(mca_oob_tcp_addr_t);
int rc;
orte_std_cntr_t count;
if(NULL == addr)
return NULL;
count = 1;
rc = orte_dss.unpack(buffer, &addr->addr_name, &count, ORTE_NAME);
if(rc != ORTE_SUCCESS) {
OBJ_RELEASE(addr);
return NULL;
}
count = 1;
rc = orte_dss.unpack(buffer, &addr->addr_count, &count, ORTE_STD_CNTR);
if(rc != ORTE_SUCCESS) {
OBJ_RELEASE(addr);
return NULL;
}
if(addr->addr_count != 0) {
orte_std_cntr_t i;
#if OPAL_WANT_IPV6
addr->addr_inet = (struct sockaddr_in6 *)malloc(sizeof(struct sockaddr_in6) * addr->addr_count);
#else
addr->addr_inet = (struct sockaddr_in *)malloc(sizeof(struct sockaddr_in) * addr->addr_count);
#endif
if(NULL == addr->addr_inet) {
OBJ_RELEASE(addr);
return NULL;
}
addr->addr_alloc = addr->addr_count;
for(i=0; i<addr->addr_count; i++) {
uint8_t type;
uint16_t port;
/* unpack and expand family */
count = 1;
rc = orte_dss.unpack(buffer, &type, &count, ORTE_INT8);
if(rc != ORTE_SUCCESS) {
OBJ_RELEASE(addr);
return NULL;
}
/* and the listen port */
count = sizeof (port);
rc = orte_dss.unpack(buffer, &port, &count, ORTE_BYTE);
if(rc != ORTE_SUCCESS) {
OBJ_RELEASE(addr);
return NULL;
}
switch (type) {
case MCA_OOB_TCP_ADDR_TYPE_AFINET:
{
struct sockaddr_in* target;
uint32_t ipaddr;
count = sizeof (ipaddr);
rc = orte_dss.unpack(buffer, &ipaddr, &count, ORTE_BYTE);
if(rc != ORTE_SUCCESS) {
OBJ_RELEASE(addr);
return NULL;
}
target = (struct sockaddr_in*)&(addr->addr_inet[i]);
target->sin_family = AF_INET;
target->sin_port = port;
target->sin_addr.s_addr = ipaddr;
}
break;
#if OPAL_WANT_IPV6
case MCA_OOB_TCP_ADDR_TYPE_AFINET6:
{
uint8_t address[16];
struct sockaddr_in6* target;
count = 16;
rc = orte_dss.unpack(buffer, &address, &count, ORTE_BYTE);
if(rc != ORTE_SUCCESS) {
OBJ_RELEASE(addr);
return NULL;
}
target = (struct sockaddr_in6*)&(addr->addr_inet[i]);
target->sin6_family = AF_INET6;
target->sin6_port = port;
memcpy(&target->sin6_addr, &address, sizeof (address));
}
break;
#endif
default:
OBJ_RELEASE(addr);
return NULL;
}
}
}
return addr;
}
int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr_storage* retval)
int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr* retval)
{
static uint32_t i_have = MCA_OOB_TCP_ADDR_UNCLASSIFIED; /* my own capabilities */
@ -237,26 +76,14 @@ int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr_storage*
if(MCA_OOB_TCP_ADDR_UNCLASSIFIED == addr->addr_matched) {
orte_std_cntr_t i=0;
for(i=0; i<addr->addr_count; i++) {
int ifindex;
for(ifindex=opal_ifbegin(); ifindex>0; ifindex=opal_ifnext(ifindex)) {
struct sockaddr_storage inaddr;
opal_list_item_t *item;
for (item = opal_list_get_first(&mca_oob_tcp_component.tcp_available_devices) ;
item != opal_list_get_end(&mca_oob_tcp_component.tcp_available_devices) ;
item = opal_list_get_next(item)) {
mca_oob_tcp_device_t *dev = (mca_oob_tcp_device_t*) item;
uint32_t inmask;
char name[32];
opal_ifindextoname(i, name, sizeof(name));
if (mca_oob_tcp_component.tcp_include != NULL &&
strstr(mca_oob_tcp_component.tcp_include,name) == NULL) {
continue;
}
if (mca_oob_tcp_component.tcp_exclude != NULL &&
strstr(mca_oob_tcp_component.tcp_exclude,name) != NULL) {
continue;
}
opal_ifindextoaddr(ifindex, (struct sockaddr*) &inaddr, sizeof(inaddr));
if(mca_oob_tcp_component.tcp_ignore_localhost &&
opal_net_islocalhost((struct sockaddr*) &inaddr)) {
continue;
}
opal_ifindextomask(ifindex, &inmask, sizeof(inmask));
opal_ifindextomask(dev->if_index, &inmask, sizeof(inmask));
/* Decide which address to try first; note that we're
called multiple times and each time we need to
@ -268,7 +95,7 @@ int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr_storage*
- when IPv4private + IPv6, use IPv6 (this should
be changed when there is something like a CellID)
*/
if (true == opal_net_addr_isipv4public ((struct sockaddr*) &inaddr)) {
if (true == opal_net_addr_isipv4public ((struct sockaddr*) &dev->if_addr)) {
i_have |= MCA_OOB_TCP_ADDR_IPV4public;
}
@ -284,8 +111,7 @@ int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr_storage*
goto done;
}
#if OPAL_WANT_IPV6
if (AF_INET6 == inaddr.ss_family) {
if (AF_INET6 == dev->if_addr.ss_family) {
i_have |= MCA_OOB_TCP_ADDR_IPV6;
}
@ -296,7 +122,6 @@ int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr_storage*
goto done;
}
#endif
/* if match on network prefix - start here */
/* Bug, FIXME: This code is dangerous, it will prefer
local addresses even if they point to wrong hosts
@ -306,7 +131,7 @@ int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr_storage*
adi@2006-09-30
*/
if(opal_net_samenetwork((struct sockaddr*) &inaddr,
if(opal_net_samenetwork((struct sockaddr*) &dev->if_addr,
(struct sockaddr*)&addr->addr_inet[i],
inmask)) {
addr->addr_matched |= MCA_OOB_TCP_ADDR_MATCHED;
@ -318,42 +143,38 @@ int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t* addr, struct sockaddr_storage*
done:
; /* NOP */
}
memcpy (retval, &addr->addr_inet[addr->addr_next],
sizeof (addr->addr_inet[addr->addr_next]));
if (addr->addr_inet[addr->addr_next].ss_family == AF_INET) {
memcpy(retval, &addr->addr_inet[addr->addr_next],
sizeof(struct sockaddr_in));
} else {
memcpy(retval, &addr->addr_inet[addr->addr_next],
sizeof(struct sockaddr_in6));
}
if(++addr->addr_next >= addr->addr_count)
addr->addr_next = 0;
return ORTE_SUCCESS;
}
#if OPAL_WANT_IPV6
int mca_oob_tcp_addr_insert(mca_oob_tcp_addr_t* addr, const struct sockaddr_in6* inaddr)
#else
int mca_oob_tcp_addr_insert(mca_oob_tcp_addr_t* addr, const struct sockaddr_in* inaddr)
#endif
int
mca_oob_tcp_addr_insert(mca_oob_tcp_addr_t* addr, const struct sockaddr* inaddr)
{
if(addr->addr_alloc == 0) {
addr->addr_alloc = 2;
#if OPAL_WANT_IPV6
addr->addr_inet = (struct sockaddr_in6 *)malloc(addr->addr_alloc * sizeof(struct sockaddr_in6));
#else
addr->addr_inet = (struct sockaddr_in *)malloc(addr->addr_alloc * sizeof(struct sockaddr_in));
#endif
addr->addr_inet = (struct sockaddr_storage*) malloc(addr->addr_alloc * sizeof(struct sockaddr_storage));
} else if(addr->addr_count == addr->addr_alloc) {
addr->addr_alloc <<= 1;
#if OPAL_WANT_IPV6
addr->addr_inet = (struct sockaddr_in6 *)realloc(addr->addr_inet, addr->addr_alloc * sizeof(struct sockaddr_in6));
#else
addr->addr_inet = (struct sockaddr_in *)realloc(addr->addr_inet, addr->addr_alloc * sizeof(struct sockaddr_in));
#endif
addr->addr_inet = (struct sockaddr_storage*) realloc(addr->addr_inet, addr->addr_alloc * sizeof(struct sockaddr_storage));
}
if(NULL == addr->addr_inet) return ORTE_ERR_OUT_OF_RESOURCE;
if (inaddr->sa_family == AF_INET) {
memcpy(addr->addr_inet+addr->addr_count, inaddr, sizeof(struct sockaddr_in));
} else {
memcpy(addr->addr_inet+addr->addr_count, inaddr, sizeof(struct sockaddr_in6));
}
if(NULL == addr->addr_inet)
return ORTE_ERR_OUT_OF_RESOURCE;
#if OPAL_WANT_IPV6
memcpy(addr->addr_inet+addr->addr_count, inaddr, sizeof(struct sockaddr_in6));
#else
memcpy(addr->addr_inet+addr->addr_count, inaddr, sizeof(struct sockaddr_in));
#endif
addr->addr_count++;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -42,9 +42,7 @@ BEGIN_C_DECLS
#define MCA_OOB_TCP_ADDR_IPV4public 4 /* peer has public IPv4 address */
#define MCA_OOB_TCP_ADDR_TYPE_AFINET 0x01
#if OPAL_WANT_IPV6
# define MCA_OOB_TCP_ADDR_TYPE_AFINET6 0x02
#endif
#define MCA_OOB_TCP_ADDR_TYPE_AFINET6 0x02
/**
* Address info published to registry
@ -56,44 +54,23 @@ struct mca_oob_tcp_addr_t {
orte_std_cntr_t addr_next;
orte_std_cntr_t addr_alloc;
orte_std_cntr_t addr_matched;/* status of already tried address classes */
#if OPAL_WANT_IPV6
struct sockaddr_in6* addr_inet;
#else
struct sockaddr_in* addr_inet;
#endif
struct sockaddr_storage *addr_inet; /* yes, we want storage here, so the indexes work out... */
};
typedef struct mca_oob_tcp_addr_t mca_oob_tcp_addr_t;
OBJ_CLASS_DECLARATION(mca_oob_tcp_addr_t);
/**
* Unpack the contact information posted by the peer.
*/
mca_oob_tcp_addr_t* mca_oob_tcp_addr_unpack(orte_buffer_t*);
/**
* Pack this hosts addressing info into a buffer for posting
* into the registry.
*/
int mca_oob_tcp_addr_pack(orte_buffer_t*);
/**
*
*/
#if OPAL_WANT_IPV6
int mca_oob_tcp_addr_insert(mca_oob_tcp_addr_t*, const struct sockaddr_in6*);
#else
int mca_oob_tcp_addr_insert(mca_oob_tcp_addr_t*, const struct sockaddr_in*);
#endif
int mca_oob_tcp_addr_insert(mca_oob_tcp_addr_t*, const struct sockaddr*);
/**
*
*/
int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t*, struct sockaddr_storage*);
int mca_oob_tcp_addr_get_next(mca_oob_tcp_addr_t*, struct sockaddr*);
END_C_DECLS

Просмотреть файл

@ -183,9 +183,6 @@ int mca_oob_tcp_msg_complete(mca_oob_tcp_msg_t* msg, orte_process_name_t * peer)
opal_list_item_t* item;
OPAL_THREAD_UNLOCK(&msg->msg_lock);
/* post to a global list of completed messages */
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
opal_list_append(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg);
#if defined(__WINDOWS__)
/**
* In order to be able to generate TCP events recursively, Windows need
@ -196,37 +193,50 @@ int mca_oob_tcp_msg_complete(mca_oob_tcp_msg_t* msg, orte_process_name_t * peer)
* engine will call our progress function later once all socket related
* events have been processed.
*/
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
return ORTE_SUCCESS;
#else
if(opal_list_get_size(&mca_oob_tcp_component.tcp_msg_completed) > 1) {
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
return ORTE_SUCCESS;
}
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
opal_list_append(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg);
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
return ORTE_SUCCESS;
#else
/* post to a global list of completed messages */
if ((msg->msg_flags & ORTE_RML_FLAG_RECURSIVE_CALLBACK) == 0) {
int size;
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
opal_list_append(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg);
size = opal_list_get_size(&mca_oob_tcp_component.tcp_msg_completed);
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
if(size > 1) {
return ORTE_SUCCESS;
}
}
/* invoke message callback */
msg->msg_cbfunc(msg->msg_rc, peer, msg->msg_uiov, msg->msg_ucnt, msg->msg_hdr.msg_tag, msg->msg_cbdata);
/* dispatch any completed events */
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg);
MCA_OOB_TCP_MSG_RETURN(msg);
while(NULL !=
(item = opal_list_remove_first(&mca_oob_tcp_component.tcp_msg_completed))) {
msg = (mca_oob_tcp_msg_t*)item;
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
msg->msg_cbfunc(
msg->msg_rc,
&msg->msg_peer,
msg->msg_uiov,
msg->msg_ucnt,
msg->msg_hdr.msg_tag,
msg->msg_cbdata);
/* dispatch any completed events */
if ((msg->msg_flags & ORTE_RML_FLAG_RECURSIVE_CALLBACK) == 0) {
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg);
MCA_OOB_TCP_MSG_RETURN(msg);
while(NULL !=
(item = opal_list_remove_first(&mca_oob_tcp_component.tcp_msg_completed))) {
msg = (mca_oob_tcp_msg_t*)item;
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
msg->msg_cbfunc(
msg->msg_rc,
&msg->msg_peer,
msg->msg_uiov,
msg->msg_ucnt,
msg->msg_hdr.msg_tag,
msg->msg_cbdata);
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
MCA_OOB_TCP_MSG_RETURN(msg);
}
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
} else {
MCA_OOB_TCP_MSG_RETURN(msg);
}
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
#endif /* defined(__WINDOWS__) */
} else {
opal_condition_broadcast(&msg->msg_condition);
@ -368,7 +378,9 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_close(peer);
mca_oob_call_exception_handlers(&peer->peer_name, MCA_OOB_PEER_DISCONNECTED);
if (NULL != mca_oob_tcp.oob_exception_callback) {
mca_oob_tcp.oob_exception_callback(&peer->peer_name, ORTE_RML_PEER_DISCONNECTED);
}
return false;
} else if (rc == 0) {
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) {
@ -377,7 +389,9 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
ORTE_NAME_ARGS(&(peer->peer_name)));
}
mca_oob_tcp_peer_close(peer);
mca_oob_call_exception_handlers(&peer->peer_name, MCA_OOB_PEER_DISCONNECTED);
if (NULL != mca_oob_tcp.oob_exception_callback) {
mca_oob_tcp.oob_exception_callback(&peer->peer_name, ORTE_RML_PEER_DISCONNECTED);
}
return false;
}
@ -473,26 +487,14 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
post = mca_oob_tcp_msg_match_post(&peer->peer_name, msg->msg_hdr.msg_tag);
if(NULL != post) {
if(post->msg_flags & MCA_OOB_ALLOC) {
/* set the users iovec struct to point to pre-allocated buffer */
if(NULL == post->msg_uiov || 0 == post->msg_ucnt) {
post->msg_rc = ORTE_ERR_BAD_PARAM;
} else {
/* first iovec of recv message contains the header -
* subsequent contain user data
*/
post->msg_uiov[0].iov_base = (ompi_iov_base_ptr_t)msg->msg_rwbuf;
post->msg_uiov[0].iov_len = msg->msg_hdr.msg_size;
post->msg_rc = msg->msg_hdr.msg_size;
msg->msg_rwbuf = NULL;
}
if(NULL == post->msg_uiov || 0 == post->msg_ucnt) {
opal_output(0, "msg_data returning bad param");
post->msg_rc = ORTE_ERR_BAD_PARAM;
} else {
/* copy msg data into posted recv */
if (post->msg_flags & ORTE_RML_ALLOC) msg->msg_flags |= ORTE_RML_ALLOC;
post->msg_rc = mca_oob_tcp_msg_copy(msg, post->msg_uiov, post->msg_ucnt);
if(post->msg_flags & MCA_OOB_TRUNC) {
if(post->msg_flags & ORTE_RML_TRUNC) {
int i, size = 0;
for(i=1; i<msg->msg_rwcnt+1; i++)
size += msg->msg_rwiov[i].iov_len;
@ -500,7 +502,7 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
}
}
if(post->msg_flags & MCA_OOB_PEEK) {
if(post->msg_flags & ORTE_RML_PEEK) {
/* will need message for actual receive */
opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, &msg->super.super);
} else {
@ -509,7 +511,7 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
mca_oob_tcp_component.tcp_match_count++;
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
if(post->msg_flags & MCA_OOB_PERSISTENT) {
if(post->msg_flags & ORTE_RML_PERSISTENT) {
post->msg_cbfunc(
post->msg_rc,
&peer->peer_name,
@ -542,30 +544,38 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
int mca_oob_tcp_msg_copy(mca_oob_tcp_msg_t* msg, struct iovec* iov, int count)
{
int i;
unsigned char* src_ptr = (unsigned char*)msg->msg_rwbuf;
int i, ret = 0;
unsigned char* src_ptr = (unsigned char*) msg->msg_rwbuf;
size_t src_len = msg->msg_hdr.msg_size;
struct iovec *dst = iov;
int rc = 0;
for(i=0; i<count; i++) {
unsigned char* dst_ptr = (unsigned char*)dst->iov_base;
size_t dst_len = dst->iov_len;
while(dst_len > 0) {
size_t len = (dst_len <= src_len) ? dst_len : src_len;
memcpy(dst_ptr, src_ptr, len);
rc += len;
dst_ptr += len;
dst_len -= len;
src_ptr += len;
src_len -= len;
if(src_len == 0) {
return rc;
for (i = 0 ; i < count ; i++) {
if ((msg->msg_flags & ORTE_RML_ALLOC) && (i == count - 1)) {
if (i == 0) {
iov[i].iov_base = src_ptr;
iov[i].iov_len = src_len;
msg->msg_rwbuf = NULL;
} else {
iov[i].iov_base = malloc(src_len);
iov[i].iov_len = src_len;
memcpy(iov[i].iov_base, src_ptr, src_len);
}
} else {
if (iov[i].iov_len > src_len) {
memcpy(iov[i].iov_base, src_ptr, src_len);
iov[i].iov_len = src_len;
} else {
memcpy(iov[i].iov_base, src_ptr, iov[i].iov_len);
}
}
dst++;
ret += iov[i].iov_len;
src_len -= iov[i].iov_len;
src_ptr += iov[i].iov_len;
if (0 == src_len) break;
}
return rc;
return ret;
}
/*
@ -611,7 +621,7 @@ mca_oob_tcp_msg_t* mca_oob_tcp_msg_match_post(orte_process_name_t* name, int tag
if(ORTE_EQUAL == orte_dss.compare(name, &msg->msg_peer, ORTE_NAME)) {
if (msg->msg_hdr.msg_tag == tag) {
if((msg->msg_flags & MCA_OOB_PERSISTENT) == 0) {
if((msg->msg_flags & ORTE_RML_PERSISTENT) == 0) {
opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_post, &msg->super.super);
}
return msg;

Просмотреть файл

@ -57,7 +57,7 @@ struct mca_oob_tcp_msg_t {
int msg_rwnum; /**< number of iovecs left for read/write */
int msg_rwcnt; /**< total number of iovecs for read/write */
void* msg_rwbuf; /**< optional buffer for send/recv */
mca_oob_callback_fn_t msg_cbfunc; /**< the callback function for the send/receive */
orte_rml_callback_fn_t msg_cbfunc; /**< the callback function for the send/receive */
void * msg_cbdata; /**< the data for the callback fnuction */
bool msg_complete; /**< whether the message is done sending or not */
orte_process_name_t msg_peer; /**< the name of the peer */

Просмотреть файл

@ -99,11 +99,7 @@ static void mca_oob_tcp_peer_construct(mca_oob_tcp_peer_t* peer)
memset(&peer->peer_send_event, 0, sizeof(peer->peer_send_event));
memset(&peer->peer_recv_event, 0, sizeof(peer->peer_recv_event));
peer->peer_sd = -1;
#if OPAL_WANT_IPV6
memset(&peer->peer6_recv_event, 0, sizeof(peer->peer6_recv_event));
memset(&peer->peer6_send_event, 0, sizeof(peer->peer6_send_event));
peer->peer6_sd = -1;
#endif
peer->peer_current_af = AF_UNSPEC;
memset(&peer->peer_timer_event, 0, sizeof(peer->peer_timer_event));
opal_evtimer_set(&peer->peer_timer_event, mca_oob_tcp_peer_timer_handler, peer);
}
@ -130,10 +126,7 @@ static int mca_oob_tcp_peer_event_init(mca_oob_tcp_peer_t* peer)
{
memset(&peer->peer_recv_event, 0, sizeof(peer->peer_recv_event));
memset(&peer->peer_send_event, 0, sizeof(peer->peer_send_event));
#if OPAL_WANT_IPV6
memset(&peer->peer6_recv_event, 0, sizeof(peer->peer6_recv_event));
memset(&peer->peer6_send_event, 0, sizeof(peer->peer6_send_event));
#endif
if (peer->peer_sd >= 0) {
opal_event_set(
&peer->peer_recv_event,
@ -149,23 +142,6 @@ static int mca_oob_tcp_peer_event_init(mca_oob_tcp_peer_t* peer)
peer);
}
#if OPAL_WANT_IPV6
if (peer->peer6_sd >= 0) {
opal_event_set(
&peer->peer6_recv_event,
peer->peer6_sd,
OPAL_EV_READ|OPAL_EV_PERSIST,
mca_oob_tcp_peer_recv_handler,
peer);
opal_event_set(
&peer->peer6_send_event,
peer->peer6_sd,
OPAL_EV_WRITE|OPAL_EV_PERSIST,
mca_oob_tcp_peer_send_handler,
peer);
}
#endif
return ORTE_SUCCESS;
}
@ -249,9 +225,7 @@ mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const orte_process_name_t* name)
peer->peer_name = *name;
peer->peer_addr = NULL;
peer->peer_sd = -1;
#if OPAL_WANT_IPV6
peer->peer6_sd = -1;
#endif
peer->peer_current_af = AF_UNSPEC;
peer->peer_state = MCA_OOB_TCP_CLOSED;
peer->peer_recv_msg = NULL;
peer->peer_send_msg = NULL;
@ -294,24 +268,73 @@ mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const orte_process_name_t* name)
}
static int
mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer,
uint16_t af_family)
{
int flags;
if (peer->peer_current_af == af_family && peer->peer_sd > 0) {
return ORTE_SUCCESS;
} else if (peer->peer_sd > 0) {
mca_oob_tcp_peer_shutdown(peer);
}
peer->peer_sd = socket(af_family, SOCK_STREAM, 0);
peer->peer_current_af = af_family;
if (peer->peer_sd < 0) {
opal_output(0,
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_create_socket: socket() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_shutdown(peer);
return ORTE_ERR_UNREACH;
}
/* setup socket options */
mca_oob_tcp_set_socket_options(peer->peer_sd);
/* setup event callbacks */
mca_oob_tcp_peer_event_init(peer);
/* setup the socket as non-blocking */
if (peer->peer_sd >= 0) {
if((flags = fcntl(peer->peer_sd, F_GETFL, 0)) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
} else {
flags |= O_NONBLOCK;
if(fcntl(peer->peer_sd, F_SETFL, flags) < 0)
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
}
}
return ORTE_SUCCESS;
}
/*
* Try connecting to a peer using all the addresses that peer exported.
*/
static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
{
#if OPAL_WANT_IPV6
struct sockaddr_in6 inaddr;
#else
struct sockaddr_in inaddr;
#endif
struct sockaddr_storage inaddr;
int rc;
int connect_sd = -1;
opal_socklen_t addrlen = 0;
do {
/* pick an address in round-robin fashion from the list exported by the peer */
if(ORTE_SUCCESS != (rc = mca_oob_tcp_addr_get_next(peer->peer_addr, (struct sockaddr_storage*) &inaddr))) {
if(ORTE_SUCCESS != (rc = mca_oob_tcp_addr_get_next(peer->peer_addr, (struct sockaddr*) &inaddr))) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: "
"mca_oob_tcp_addr_get_next failed with error=%d",
ORTE_NAME_ARGS(orte_process_info.my_name),
@ -331,52 +354,24 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
opal_net_get_hostname((struct sockaddr*) &inaddr),
opal_net_get_port((struct sockaddr*) &inaddr));
}
/* start the connect - will likely fail with EINPROGRESS */
#if OPAL_WANT_IPV6
/* select the right outgoing socket according to the
af_family information in inaddr
*/
if (AF_INET6 == inaddr.sin6_family) {
/* if we couldn't initialize an IPv6 device, skip this
address */
if (peer->peer6_sd < 0) {
continue;
}
connect_sd = peer->peer6_sd;
addrlen = sizeof (struct sockaddr_in6);
rc = mca_oob_tcp_peer_create_socket(peer, inaddr.ss_family);
if (ORTE_SUCCESS != rc) {
struct timeval tv = { 1,0 };
opal_evtimer_add(&peer->peer_timer_event, &tv);
return rc;
}
if (AF_INET == inaddr.sin6_family) {
connect_sd = peer->peer_sd;
addrlen = sizeof (struct sockaddr_in);
if (AF_INET == inaddr.ss_family) {
addrlen = sizeof(struct sockaddr_in);
} else if (AF_INET6 == inaddr.ss_family) {
addrlen = sizeof(struct sockaddr_in6);
}
#else
connect_sd = peer->peer_sd;
addrlen = sizeof (struct sockaddr_in);
#endif
if(connect(connect_sd, (struct sockaddr*)&inaddr, addrlen) < 0) {
if (connect(peer->peer_sd, (struct sockaddr*)&inaddr, addrlen) < 0) {
/* non-blocking so wait for completion */
if(opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) {
#if OPAL_WANT_IPV6
/* I don't know what I'm doing here. Let's hope it results
in the right callback. Bug, FIXME
*/
if (AF_INET == inaddr.sin6_family) {
opal_event_add (&peer->peer_send_event, 0);
peer->current_af = AF_INET;
}
if (AF_INET6 == inaddr.sin6_family) {
opal_event_add (&peer->peer6_send_event, 0);
peer->current_af = AF_INET6;
}
#else
opal_event_add(&peer->peer_send_event, 0);
#endif
return ORTE_SUCCESS;
}
@ -392,19 +387,9 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
}
/* send our globally unique process identifier to the peer */
if((rc = mca_oob_tcp_peer_send_connect_ack(peer, connect_sd)) == ORTE_SUCCESS) {
if((rc = mca_oob_tcp_peer_send_connect_ack(peer, peer->peer_sd)) == ORTE_SUCCESS) {
peer->peer_state = MCA_OOB_TCP_CONNECT_ACK;
#if OPAL_WANT_IPV6
if (AF_INET == inaddr.sin6_family) {
opal_event_add (&peer->peer_recv_event, 0);
}
if (AF_INET6 == inaddr.sin6_family) {
opal_event_add (&peer->peer6_recv_event, 0);
}
#else
opal_event_add(&peer->peer_recv_event, 0);
#endif
return ORTE_SUCCESS;
} else {
opal_output(0,
@ -441,83 +426,14 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
static int mca_oob_tcp_peer_start_connect(mca_oob_tcp_peer_t* peer)
{
int flags;
/* create socket */
peer->peer_state = MCA_OOB_TCP_CONNECTING;
/* adi@2006-10-25: Former Bug. */
peer->peer_sd = socket(AF_INET, SOCK_STREAM, 0);
#if OPAL_WANT_IPV6
peer->peer6_sd = socket(AF_INET6, SOCK_STREAM, 0);
if ((peer->peer_sd < 0) && (peer->peer6_sd < 0)) {
#else
if (peer->peer_sd < 0) {
#endif
struct timeval tv = { 1,0 };
opal_output(0,
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_start_connect: socket() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_shutdown(peer);
opal_evtimer_add(&peer->peer_timer_event, &tv);
return ORTE_ERR_UNREACH;
}
/* setup socket options */
if (peer->peer_sd >= 0) {
mca_oob_tcp_set_socket_options(peer->peer_sd);
}
#if OPAL_WANT_IPV6
if (peer->peer6_sd >= 0) {
mca_oob_tcp_set_socket_options(peer->peer6_sd);
}
#endif
/* setup event callbacks */
mca_oob_tcp_peer_event_init(peer);
/* setup the socket as non-blocking */
if (peer->peer_sd >= 0) {
if((flags = fcntl(peer->peer_sd, F_GETFL, 0)) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
} else {
flags |= O_NONBLOCK;
if(fcntl(peer->peer_sd, F_SETFL, flags) < 0)
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
}
}
#if OPAL_WANT_IPV6
if (peer->peer6_sd >= 0) {
if((flags = fcntl(peer->peer6_sd, F_GETFL, 0)) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed with errno=%d\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
opal_socket_errno);
} else {
flags |= O_NONBLOCK;
if(fcntl(peer->peer6_sd, F_SETFL, flags) < 0)
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed with errno=%d\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
opal_socket_errno);
}
}
#endif
/*
* We should parse all the IP addresses exported by the peer and try to connect to each of them.
* We should parse all the IP addresses exported by the peer and
* try to connect to each of them.
*/
return mca_oob_tcp_peer_try_connect(peer);
}
@ -533,18 +449,7 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
opal_socklen_t so_length = sizeof(so_error);
/* unregister from receiving event notifications */
#if OPAL_WANT_IPV6
/* Bug, FIXME: I don't know what I'm doing */
if (sd == peer->peer_sd) {
opal_event_del (&peer->peer_send_event);
}
if (sd == peer->peer6_sd) {
opal_event_del (&peer->peer6_send_event);
}
#else
opal_event_del(&peer->peer_send_event);
#endif
/* check connect completion status */
if(getsockopt(sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
@ -558,18 +463,7 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
}
if(so_error == EINPROGRESS) {
#if OPAL_WANT_IPV6
/* Bug, FIXME: I don't know what I'm doing here */
if (sd == peer->peer_sd) {
opal_event_add (&peer->peer_send_event, 0);
}
if (sd == peer->peer6_sd) {
opal_event_add (&peer->peer6_send_event, 0);
}
#else
opal_event_add(&peer->peer_send_event, 0);
#endif
return;
} else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) {
struct timeval tv = { 1,0 };
@ -579,12 +473,12 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(so_error),
so_error);
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: "
"sending ack, %d",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)), so_error);
}
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: "
"sending ack, %d",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)), so_error);
}
mca_oob_tcp_peer_shutdown(peer);
opal_evtimer_add(&peer->peer_timer_event, &tv);
@ -597,20 +491,9 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
return;
}
if(mca_oob_tcp_peer_send_connect_ack(peer, sd) == ORTE_SUCCESS) {
if (mca_oob_tcp_peer_send_connect_ack(peer, sd) == ORTE_SUCCESS) {
peer->peer_state = MCA_OOB_TCP_CONNECT_ACK;
#if OPAL_WANT_IPV6
/* Bug, FIXME: I don't know what I'm doing... */
if (sd == peer->peer_sd) {
opal_event_add (&peer->peer_recv_event, 0);
}
if (sd == peer->peer6_sd) {
opal_event_add (&peer->peer6_recv_event, 0);
}
#else
opal_event_add(&peer->peer_recv_event, 0);
#endif
} else {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: unable to send connect ack.",
ORTE_NAME_ARGS(orte_process_info.my_name),
@ -633,34 +516,6 @@ static void mca_oob_tcp_peer_connected(mca_oob_tcp_peer_t* peer, int sd)
peer->peer_send_msg = (mca_oob_tcp_msg_t*)
opal_list_remove_first(&peer->peer_send_queue);
}
#if OPAL_WANT_IPV6
/* so we're connected. Let's close the other socket.
(some kind of magic --> might be a bug
*/
if (sd == peer->peer_sd) {
/* we've got an IPv4 connect; gently borrowed from
mca_oob_tcp_peer_shutdown. Refactor? Bug, FIXME
*/
if (peer->peer6_sd >= 0) {
opal_event_del(&peer->peer6_recv_event);
opal_event_del(&peer->peer6_send_event);
close(peer->peer6_sd);
peer->peer6_sd = -1;
}
} else {
if (sd == peer->peer6_sd) {
/* IPv6 connect successful, so let's close
the IPv4 socket. Update the whole event structure.
*/
opal_event_del(&peer->peer_recv_event);
opal_event_del(&peer->peer_send_event);
peer->peer_recv_event = peer->peer6_recv_event;
peer->peer_send_event = peer->peer6_send_event;
close(peer->peer_sd);
peer->peer_sd = sd;
}
}
#endif
opal_event_add(&peer->peer_send_event, 0);
}
}
@ -681,17 +536,6 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t* peer)
peer->peer_state);
}
#if OPAL_WANT_IPV6
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_close(%p) sd6 %d state %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
(void*)peer,
peer->peer6_sd,
peer->peer_state);
}
#endif
/* if we lose the connection to the seed - abort */
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, ORTE_PROC_MY_HNP)) {
/* If we are not already inside orte_finalize, then call abort */
@ -744,16 +588,8 @@ void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer)
opal_event_del(&peer->peer_send_event);
CLOSE_THE_SOCKET(peer->peer_sd);
peer->peer_sd = -1;
peer->peer_current_af = AF_UNSPEC;
}
#if OPAL_WANT_IPV6
if (peer->peer6_sd >= 0) {
opal_event_del(&peer->peer6_recv_event);
opal_event_del(&peer->peer6_send_event);
CLOSE_THE_SOCKET(peer->peer6_sd);
peer->peer6_sd = -1;
}
#endif
opal_event_del(&peer->peer_timer_event);
peer->peer_state = MCA_OOB_TCP_CLOSED;
@ -809,9 +645,6 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd)
strerror(opal_socket_errno));
}
opal_event_del(&peer->peer_recv_event);
#if OPAL_WANT_IPV6
opal_event_del(&peer->peer6_recv_event);
#endif
mca_oob_tcp_peer_shutdown(peer);
opal_evtimer_add(&peer->peer_timer_event, &tv);
return ORTE_SUCCESS;
@ -963,16 +796,7 @@ int mca_oob_tcp_peer_send_ident(mca_oob_tcp_peer_t* peer)
hdr.msg_tag = 0;
MCA_OOB_TCP_HDR_HTON(&hdr);
if(mca_oob_tcp_peer_send_blocking(peer, peer->peer_sd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
#if OPAL_WANT_IPV6
if (-1 < peer->peer6_sd) {
if(mca_oob_tcp_peer_send_blocking(peer, peer->peer6_sd,
&hdr, sizeof(hdr)) != sizeof(hdr)) {
return ORTE_ERR_UNREACH;
}
}
#else
return ORTE_ERR_UNREACH;
#endif
}
return ORTE_SUCCESS;
}
@ -1070,7 +894,8 @@ static void mca_oob_tcp_peer_send_handler(int sd, short flags, void* user)
/* complete the current send */
mca_oob_tcp_msg_t* msg = peer->peer_send_msg;
if(mca_oob_tcp_msg_send_handler(msg, peer)) {
if(ntohl(msg->msg_hdr.msg_type) == MCA_OOB_TCP_PING ||
mca_oob_tcp_msg_send_handler(msg, peer)) {
mca_oob_tcp_msg_complete(msg, &peer->peer_name);
} else {
break;
@ -1108,13 +933,8 @@ static void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg)
char dst[64];
char buff[255];
int sndbuf,rcvbuf,nodelay,flags;
#if OPAL_WANT_IPV6
struct sockaddr_in6 inaddr;
opal_socklen_t addrlen = sizeof(struct sockaddr_in6);
#else
struct sockaddr_in inaddr;
opal_socklen_t addrlen = sizeof(struct sockaddr_in);
#endif
struct sockaddr_storage inaddr;
opal_socklen_t addrlen = sizeof(struct sockaddr_storage);
opal_socklen_t optlen;
getsockname(peer->peer_sd, (struct sockaddr*)&inaddr, &addrlen);
@ -1207,11 +1027,6 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer, int sd)
if (sd == peer->peer_sd) {
opal_event_add(&peer->peer_recv_event, 0);
}
#if OPAL_WANT_IPV6
if (sd == peer->peer6_sd) {
opal_event_add(&peer->peer6_recv_event, 0);
}
#endif
if(mca_oob_tcp_component.tcp_debug > 0) {
mca_oob_tcp_peer_dump(peer, "accepted");
}

Просмотреть файл

@ -59,14 +59,9 @@ struct mca_oob_tcp_peer_t {
int peer_retries; /**< number of times connection attempt has failed */
mca_oob_tcp_addr_t* peer_addr; /**< the addresses of the peer process */
int peer_sd; /**< socket descriptor of the connection */
uint16_t peer_current_af; /**< currently connecting af */
opal_event_t peer_send_event; /**< registration with event thread for send events */
opal_event_t peer_recv_event; /**< registration with event thread for recv events */
#if OPAL_WANT_IPV6
int peer6_sd; /**< socket descriptor of the connection */
uint16_t current_af; /**< EXPERIMENTAL: select currently connecting af */
opal_event_t peer6_send_event; /**< registration with event thread for send events */
opal_event_t peer6_recv_event; /**< registration with event thread for recv events */
#endif
opal_event_t peer_timer_event; /**< timer for retrying connection failures */
opal_mutex_t peer_lock; /**< protect critical data structures */
opal_list_t peer_send_queue; /**< list of messages to send */

Просмотреть файл

@ -69,17 +69,13 @@ static void noop(int fd, short event, void *arg);
* @return OMPI error code (<0) on error number of bytes actually sent.
*/
int mca_oob_tcp_ping(
const orte_process_name_t* name,
const char* uri,
const struct timeval *timeout)
int
mca_oob_tcp_ping(const orte_process_name_t* name,
const char* uri,
const struct timeval *timeout)
{
int sd, flags, rc;
#if OPAL_WANT_IPV6
struct sockaddr_in6 inaddr;
#else
struct sockaddr_in inaddr;
#endif
struct sockaddr_storage inaddr;
fd_set fdset;
mca_oob_tcp_hdr_t hdr;
struct timeval tv;
@ -87,9 +83,10 @@ int mca_oob_tcp_ping(
#ifndef __WINDOWS__
struct opal_event sigpipe_handler;
#endif
socklen_t addrlen;
/* parse uri string */
if(ORTE_SUCCESS != (rc = mca_oob_tcp_parse_uri(uri, &inaddr))) {
if(ORTE_SUCCESS != (rc = mca_oob_tcp_parse_uri(uri, (struct sockaddr*) &inaddr))) {
opal_output(0,
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: invalid uri: %s\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
@ -99,11 +96,7 @@ int mca_oob_tcp_ping(
}
/* create socket */
#if OPAL_WANT_IPV6
sd = socket(inaddr.sin6_family, SOCK_STREAM, 0);
#else
sd = socket(AF_INET, SOCK_STREAM, 0);
#endif
sd = socket(inaddr.ss_family, SOCK_STREAM, 0);
if (sd < 0) {
opal_output(0,
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: socket() failed: %s (%d)\n",
@ -132,11 +125,27 @@ int mca_oob_tcp_ping(
}
}
switch (inaddr.ss_family) {
case AF_INET:
addrlen = sizeof(struct sockaddr_in);
break;
case AF_INET6:
addrlen = sizeof(struct sockaddr_in6);
break;
default:
addrlen = 0;
}
/* start the connect - will likely fail with EINPROGRESS */
FD_ZERO(&fdset);
if(connect(sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
if(connect(sd, (struct sockaddr*)&inaddr, addrlen) < 0) {
/* connect failed? */
if(opal_socket_errno != EINPROGRESS && opal_socket_errno != EWOULDBLOCK) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: connect failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(name),
strerror(opal_socket_errno),
opal_socket_errno);
CLOSE_THE_SOCKET(sd);
return ORTE_ERR_UNREACH;
}

Просмотреть файл

@ -23,136 +23,6 @@
#include "orte/mca/oob/tcp/oob_tcp.h"
/*
* Similiar to unix readv(2)
*
* @param peer (IN) Opaque name of peer process or ORTE_NAME_WILDCARD for wildcard receive.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param types (IN) Parallel array to iovecs describing data type of each iovec element.
* @param count (IN) Number of elements in iovec array.
* @param tag (IN) User supplied tag for matching send/recv.
* @param flags (IN) May be MCA_OOB_PEEK to return up to the number of bytes provided in the
* iovec array without removing the message from the queue.
* @return OMPI error code (<0) on error or number of bytes actually received.
*/
int mca_oob_tcp_recv(
orte_process_name_t* peer,
struct iovec *iov,
int count,
int tag,
int flags)
{
mca_oob_tcp_msg_t *msg;
int i, rc = 0, size = 0;
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) {
opal_output(0, "[%lu,%lu,%lu]-[%ld,%ld,%ld] mca_oob_tcp_recv: tag %d\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(peer),
tag);
}
/* lock the tcp struct */
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock);
/* check to see if a matching receive is on the list */
msg = mca_oob_tcp_msg_match_recv(peer, tag);
if(NULL != msg) {
if(msg->msg_rc < 0) {
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
return msg->msg_rc;
}
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
opal_output(0, "[%lu,%lu,%lu]-[%ld,%ld,%ld] mca_oob_tcp_recv*unexpected*: tag %d size %lu\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(peer),
tag, (unsigned long)(msg->msg_hdr.msg_size) );
}
/* if we are returning an allocated buffer - just take it from the message */
if(flags & MCA_OOB_ALLOC) {
if(NULL == iov || 0 == count) {
return ORTE_ERR_BAD_PARAM;
}
iov[0].iov_base = (ompi_iov_base_ptr_t)msg->msg_rwbuf;
iov[0].iov_len = msg->msg_hdr.msg_size;
msg->msg_rwbuf = NULL;
rc = msg->msg_hdr.msg_size;
} else {
/* if we are just doing peek, return bytes without dequeing message */
rc = mca_oob_tcp_msg_copy(msg, iov, count);
if(rc >= 0 && MCA_OOB_TRUNC & flags) {
rc = 0;
/* skip first iovec element which is the header */
for(i=1; i<msg->msg_rwcnt+1; i++)
rc += msg->msg_rwiov[i].iov_len;
}
if(MCA_OOB_PEEK & flags) {
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
return rc;
}
}
/* otherwise dequeue the message and return to free list */
opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_recv, (opal_list_item_t *) msg);
MCA_OOB_TCP_MSG_RETURN(msg);
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
return rc;
}
/* the message has not already been received. So we add it to the receive queue */
MCA_OOB_TCP_MSG_ALLOC(msg, rc);
if(NULL == msg) {
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
return rc;
}
/* determine overall size of user supplied buffer */
for(i = 0; i < count; i++) {
size += iov[i].iov_len;
}
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
opal_output(0, "[%lu,%lu,%lu]-[%ld,%ld,%ld] mca_oob_tcp_recv*expected*: tag %d size %lu\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(peer),
tag, (unsigned long)(size) );
}
/* fill in the struct */
msg->msg_hdr.msg_size = size;
msg->msg_hdr.msg_tag = tag;
msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA;
msg->msg_hdr.msg_src = *peer;
if (NULL == orte_process_info.my_name) {
msg->msg_hdr.msg_dst = *ORTE_NAME_INVALID;
} else {
msg->msg_hdr.msg_dst = *orte_process_info.my_name;
}
msg->msg_type = MCA_OOB_TCP_POSTED;
msg->msg_rc = 0;
msg->msg_flags = flags;
msg->msg_uiov = iov;
msg->msg_ucnt = count;
msg->msg_cbfunc = NULL;
msg->msg_cbdata = NULL;
msg->msg_complete = false;
msg->msg_peer = *peer;
msg->msg_rwbuf = NULL;
msg->msg_rwiov = NULL;
opal_list_append(&mca_oob_tcp_component.tcp_msg_post, (opal_list_item_t *) msg);
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
/* wait for the receive to complete */
mca_oob_tcp_msg_wait(msg, &rc);
MCA_OOB_TCP_MSG_RETURN(msg);
return rc;
}
/**
* Process a matched posted receive
@ -165,26 +35,18 @@ static void mca_oob_tcp_msg_matched(mca_oob_tcp_msg_t* msg, mca_oob_tcp_msg_t* m
int i,rc;
if(match->msg_rc < 0) {
rc = match->msg_rc;
}
/* if we are returning an allocated buffer - just take it from the message */
else if(msg->msg_flags & MCA_OOB_ALLOC) {
msg->msg_uiov[0].iov_base = (ompi_iov_base_ptr_t)match->msg_rwbuf;
msg->msg_uiov[0].iov_len = match->msg_hdr.msg_size;
match->msg_rwbuf = NULL;
rc = match->msg_hdr.msg_size;
} else {
if (msg->msg_flags & ORTE_RML_ALLOC) match->msg_flags |= ORTE_RML_ALLOC;
/* if we are just doing peek, return bytes without dequeing message */
rc = mca_oob_tcp_msg_copy(match, msg->msg_uiov, msg->msg_ucnt);
if(rc >= 0 && MCA_OOB_TRUNC & msg->msg_flags) {
if(rc >= 0 && ORTE_RML_TRUNC & msg->msg_flags) {
rc = 0;
for(i=1; i<match->msg_rwcnt+1; i++)
rc += match->msg_rwiov[i].iov_len;
}
if(MCA_OOB_PEEK & msg->msg_flags) {
if(ORTE_RML_PEEK & msg->msg_flags) {
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
msg->msg_cbfunc(rc,
&match->msg_peer,
@ -232,7 +94,7 @@ int mca_oob_tcp_recv_nb(
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
mca_oob_tcp_msg_t *msg;
@ -279,7 +141,7 @@ int mca_oob_tcp_recv_nb(
/* acquire the match lock */
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock);
if(flags & MCA_OOB_PERSISTENT) {
if(flags & ORTE_RML_PERSISTENT) {
opal_list_append(&mca_oob_tcp_component.tcp_msg_post, (opal_list_item_t *) msg);
while(NULL != (match = mca_oob_tcp_msg_match_recv(peer,tag))) {

Просмотреть файл

@ -75,102 +75,6 @@ static int mca_oob_tcp_send_self(
return size;
}
/*
* Similiar to unix writev(2).
*
* @param peer (IN) Opaque name of peer process.
* @param msg (IN) Array of iovecs describing user buffers and lengths.
* @param count (IN) Number of elements in iovec array.
* @param flags (IN) Currently unused.
* @return OMPI error code (<0) on error number of bytes actually sent.
*/
int mca_oob_tcp_send(
orte_process_name_t* name,
struct iovec *iov,
int count,
int tag,
int flags)
{
mca_oob_tcp_peer_t* peer = mca_oob_tcp_peer_lookup(name);
mca_oob_tcp_msg_t* msg;
int size;
int rc;
if(NULL == peer)
return ORTE_ERR_UNREACH;
/* calculate the size of the message */
size = 0;
for(rc = 0; rc < count; rc++) {
size += iov[rc].iov_len;
}
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_send: tag %d size %lu\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
tag, (unsigned long)size );
}
MCA_OOB_TCP_MSG_ALLOC(msg, rc);
if(NULL == msg) {
return rc;
}
/* turn the size to network byte order so there will be no problems */
msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA;
msg->msg_hdr.msg_size = size;
msg->msg_hdr.msg_tag = tag;
if (NULL == orte_process_info.my_name) {
msg->msg_hdr.msg_src = *ORTE_NAME_INVALID;
} else {
msg->msg_hdr.msg_src = *orte_process_info.my_name;
}
msg->msg_hdr.msg_dst = *name;
/* create one additional iovect that will hold the header */
msg->msg_type = MCA_OOB_TCP_POSTED;
msg->msg_rc = 0;
msg->msg_flags = flags;
msg->msg_uiov = iov;
msg->msg_ucnt = count;
msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg, count+1);
msg->msg_rwiov[0].iov_base = (ompi_iov_base_ptr_t)(&msg->msg_hdr);
msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr);
msg->msg_rwptr = msg->msg_rwiov;
msg->msg_rwcnt = msg->msg_rwnum = count + 1;
memcpy(msg->msg_rwiov+1, msg->msg_uiov, sizeof(struct iovec)*msg->msg_ucnt);
msg->msg_rwbuf = NULL;
msg->msg_cbfunc = NULL;
msg->msg_cbdata = NULL;
msg->msg_complete = false;
msg->msg_peer = peer->peer_name;
if (NULL != name && NULL != orte_process_info.my_name &&
ORTE_EQUAL == mca_oob_tcp_process_name_compare(name, orte_process_info.my_name)) { /* local delivery */
rc = mca_oob_tcp_send_self(peer,msg,iov,count);
return rc;
}
MCA_OOB_TCP_HDR_HTON(&msg->msg_hdr);
rc = mca_oob_tcp_peer_send(peer, msg);
if(rc != ORTE_SUCCESS) {
if (rc != ORTE_ERR_ADDRESSEE_UNKNOWN) {
MCA_OOB_TCP_MSG_RETURN(msg);
}
return rc;
}
rc = mca_oob_tcp_msg_wait(msg, &size);
MCA_OOB_TCP_MSG_RETURN(msg);
if(rc != ORTE_SUCCESS) {
return rc;
}
size -= sizeof(mca_oob_tcp_hdr_t);
return size;
}
/*
* Non-blocking version of mca_oob_send().
@ -184,14 +88,13 @@ int mca_oob_tcp_send(
* @return OMPI error code (<0) on error number of bytes actually sent.
*
*/
int mca_oob_tcp_send_nb(
orte_process_name_t* name,
struct iovec* iov,
int count,
int tag,
int flags,
mca_oob_callback_fn_t cbfunc,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
mca_oob_tcp_peer_t* peer = mca_oob_tcp_peer_lookup(name);

Просмотреть файл

@ -67,7 +67,7 @@ int orte_pls_base_orted_append_basic_args(int *argc, char ***argv,
int *node_name_index,
orte_std_cntr_t num_procs)
{
char *param = NULL, *uri = NULL;
char *param = NULL, *contact_info = NULL;
int loc_id;
char * amca_param_path = NULL;
char * amca_param_prefix = NULL;
@ -124,25 +124,25 @@ int orte_pls_base_orted_append_basic_args(int *argc, char ***argv,
/* setup ns contact info */
opal_argv_append(argc, argv, "--nsreplica");
if (NULL != orte_process_info.ns_replica_uri) {
uri = strdup(orte_process_info.ns_replica_uri);
contact_info = strdup(orte_process_info.ns_replica_uri);
} else {
uri = orte_rml.get_uri();
contact_info = orte_rml.get_contact_info();
}
asprintf(&param, "\"%s\"", uri);
asprintf(&param, "\"%s\"", contact_info);
opal_argv_append(argc, argv, param);
free(uri);
free(contact_info);
free(param);
/* setup gpr contact info */
opal_argv_append(argc, argv, "--gprreplica");
if (NULL != orte_process_info.gpr_replica_uri) {
uri = strdup(orte_process_info.gpr_replica_uri);
contact_info = strdup(orte_process_info.gpr_replica_uri);
} else {
uri = orte_rml.get_uri();
contact_info = orte_rml.get_contact_info();
}
asprintf(&param, "\"%s\"", uri);
asprintf(&param, "\"%s\"", contact_info);
opal_argv_append(argc, argv, param);
free(uri);
free(contact_info);
free(param);
/*

Просмотреть файл

@ -34,7 +34,7 @@
#include "orte/dss/dss.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/odls/odls_types.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr.h"
@ -105,7 +105,7 @@ int orte_pls_base_orted_exit(struct timeval *timeout, opal_list_t *attrs)
}
/* send it! */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(0, &cmd, ORTE_RML_TAG_DAEMON))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(0, &cmd, ORTE_RML_TAG_DAEMON))) {
ORTE_ERROR_LOG(rc);
}
OBJ_DESTRUCT(&cmd);
@ -159,7 +159,7 @@ int orte_pls_base_orted_kill_local_procs(orte_jobid_t job, struct timeval *timeo
if (allocated) free(jobs); /* not needed any more */
/* send it! */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(0, &cmd, ORTE_RML_TAG_DAEMON))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(0, &cmd, ORTE_RML_TAG_DAEMON))) {
ORTE_ERROR_LOG(rc);
}
OBJ_DESTRUCT(&cmd);
@ -222,7 +222,7 @@ int orte_pls_base_orted_signal_local_procs(orte_jobid_t job, int32_t signal, opa
}
/* send it! */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(0, &cmd, ORTE_RML_TAG_DAEMON))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(0, &cmd, ORTE_RML_TAG_DAEMON))) {
ORTE_ERROR_LOG(rc);
}
OBJ_DESTRUCT(&cmd);

Просмотреть файл

@ -27,6 +27,7 @@
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/pls/base/pls_private.h"
@ -62,7 +63,7 @@ int orte_pls_base_launch_apps(orte_job_map_t *map)
}
/* send the command to the daemons */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(0, buffer, ORTE_RML_TAG_DAEMON))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(0, buffer, ORTE_RML_TAG_DAEMON))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(buffer);
@ -80,7 +81,7 @@ int orte_pls_base_daemon_callback(orte_std_cntr_t num_daemons)
for(i = 0; i < num_daemons; i++) {
OBJ_CONSTRUCT(&ack, orte_buffer_t);
rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &ack, ORTE_RML_TAG_ORTED_CALLBACK);
rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &ack, ORTE_RML_TAG_ORTED_CALLBACK, 0);
if(0 > rc) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&ack);

Просмотреть файл

@ -91,7 +91,7 @@ int orte_pls_proxy_launch(orte_jobid_t job)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) {
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -178,7 +178,7 @@ int orte_pls_proxy_terminate_job(orte_jobid_t job, struct timeval *timeout, opal
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) {
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -259,7 +259,7 @@ int orte_pls_proxy_terminate_orteds(struct timeval *timeout, opal_list_t *attrs)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) {
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -337,7 +337,7 @@ int orte_pls_proxy_signal_job(orte_jobid_t job, int32_t signal, opal_list_t *att
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) {
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -404,7 +404,7 @@ int orte_pls_proxy_terminate_proc(const orte_process_name_t* name)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) {
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -476,7 +476,7 @@ int orte_pls_proxy_signal_proc(const orte_process_name_t* name, int32_t signal)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) {
if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -258,14 +258,14 @@ char **environ;
if (NULL != orte_process_info.ns_replica_uri) {
nsuri = strdup(orte_process_info.ns_replica_uri);
} else {
nsuri = orte_rml.get_uri();
nsuri = orte_rml.get_contact_info();
}
/* setup gpr contact info */
if (NULL != orte_process_info.gpr_replica_uri) {
gpruri = strdup(orte_process_info.gpr_replica_uri);
} else {
gpruri = orte_rml.get_uri();
gpruri = orte_rml.get_contact_info();
}
/* build up the array of task specifications */

Просмотреть файл

@ -77,7 +77,7 @@ int orte_ras_base_proxy_allocate(orte_jobid_t job, opal_list_t *attributes)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_ras_base_proxy_replica, answer, ORTE_RML_TAG_RAS)) {
if (0 > orte_rml.recv_buffer(orte_ras_base_proxy_replica, answer, ORTE_RML_TAG_RAS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
@ -141,7 +141,7 @@ int orte_ras_base_proxy_deallocate(orte_jobid_t job)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_ras_base_proxy_replica, answer, ORTE_RML_TAG_RAS)) {
if (0 > orte_rml.recv_buffer(orte_ras_base_proxy_replica, answer, ORTE_RML_TAG_RAS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -82,7 +82,7 @@ int orte_rds_proxy_query(orte_jobid_t job)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_rds_proxy_replica, answer, ORTE_RML_TAG_RDS)) {
if (0 > orte_rml.recv_buffer(orte_rds_proxy_replica, answer, ORTE_RML_TAG_RDS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -85,7 +85,7 @@ int orte_rmaps_base_proxy_map_job(orte_jobid_t job, opal_list_t *attributes)
}
/* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(ORTE_PROC_MY_HNP, answer, ORTE_RML_TAG_RMAPS)) {
if (0 > orte_rml.recv_buffer(ORTE_PROC_MY_HNP, answer, ORTE_RML_TAG_RMAPS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -38,7 +38,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/smr/smr.h"
#include "orte/runtime/runtime.h"
@ -165,7 +165,7 @@ int orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_message_t *msg)
* destination here since these messages are intended to release
* a process from an xcast gate
*/
if (ORTE_SUCCESS != (rc = orte_rml.xcast(job, buffer, ORTE_RML_TAG_XCAST_BARRIER))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(job, buffer, ORTE_RML_TAG_XCAST_BARRIER))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(buffer);
@ -174,4 +174,3 @@ CLEANUP:
return rc;
}

Просмотреть файл

@ -36,6 +36,8 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/odls/odls_types.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
@ -53,7 +55,7 @@ int orte_rmgr_base_xconnect(orte_jobid_t child, orte_jobid_t parent)
/* get the child's contact info */
name.jobid = child;
if (ORTE_SUCCESS != (rc = orte_rml.get_contact_info(&name, &data))) {
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(&name, &data))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -74,7 +76,7 @@ int orte_rmgr_base_xconnect(orte_jobid_t child, orte_jobid_t parent)
return rc;
}
/* now send it */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(parent, buf, ORTE_RML_TAG_RML))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(parent, buf, ORTE_RML_TAG_RML_INFO_UPDATE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
OBJ_RELEASE(data);
@ -88,7 +90,7 @@ int orte_rmgr_base_xconnect(orte_jobid_t child, orte_jobid_t parent)
/* get the parent's contact info */
name.jobid = parent;
if (ORTE_SUCCESS != (rc = orte_rml.get_contact_info(&name, &data))) {
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(&name, &data))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -109,7 +111,7 @@ int orte_rmgr_base_xconnect(orte_jobid_t child, orte_jobid_t parent)
return rc;
}
/* now send it */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(child, buf, ORTE_RML_TAG_RML))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(child, buf, ORTE_RML_TAG_RML_INFO_UPDATE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
OBJ_RELEASE(data);

Просмотреть файл

@ -142,7 +142,7 @@ static int orte_rmgr_proxy_setup_job(orte_app_context_t** app_context,
/* wait for response */
OBJ_CONSTRUCT(&rsp, orte_buffer_t);
if(0 > (rc = orte_rml.recv_buffer(ORTE_PROC_MY_HNP, &rsp, ORTE_RML_TAG_RMGR))) {
if(0 > (rc = orte_rml.recv_buffer(ORTE_PROC_MY_HNP, &rsp, ORTE_RML_TAG_RMGR, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&rsp);
return rc;
@ -172,6 +172,7 @@ static int orte_rmgr_proxy_setup_job(orte_app_context_t** app_context,
return rc;
}
static int orte_rmgr_proxy_setup_stage_gates(orte_jobid_t jobid)
{
orte_buffer_t cmd;
@ -209,7 +210,7 @@ static int orte_rmgr_proxy_setup_stage_gates(orte_jobid_t jobid)
/* wait for response */
OBJ_CONSTRUCT(&rsp, orte_buffer_t);
if(0 > (rc = orte_rml.recv_buffer(ORTE_PROC_MY_HNP, &rsp, ORTE_RML_TAG_RMGR))) {
if(0 > (rc = orte_rml.recv_buffer(ORTE_PROC_MY_HNP, &rsp, ORTE_RML_TAG_RMGR, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&rsp);
return rc;
@ -291,7 +292,7 @@ static void orte_rmgr_proxy_xconnect_callback(orte_gpr_notify_data_t *data, void
/* wait for response */
OBJ_CONSTRUCT(&rsp, orte_buffer_t);
if(0 > (rc = orte_rml.recv_buffer(ORTE_PROC_MY_HNP, &rsp, ORTE_RML_TAG_RMGR))) {
if(0 > (rc = orte_rml.recv_buffer(ORTE_PROC_MY_HNP, &rsp, ORTE_RML_TAG_RMGR, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&rsp);
return;

Просмотреть файл

@ -17,13 +17,13 @@
#
headers += \
base/base.h
base/base.h \
base/rml_contact.h
libmca_rml_la_SOURCES += \
base/rml_base_open.c \
base/rml_base_close.c \
base/rml_base_select.c \
base/rml_base_components.c \
base/rml_base_receive.c \
base/rml_base_contact.c \
base/data_type_support/rml_data_type_compare_fns.c \
base/data_type_support/rml_data_type_copy_fns.c \
base/data_type_support/rml_data_type_packing_fns.c \

Просмотреть файл

@ -23,10 +23,10 @@
#include "opal/mca/mca.h"
#include "orte/mca/rml/rml.h"
#include "orte/dss/dss_types.h"
BEGIN_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Global functions for the RML
*/
@ -35,18 +35,32 @@ ORTE_DECLSPEC int orte_rml_base_open(void);
ORTE_DECLSPEC int orte_rml_base_select(void);
ORTE_DECLSPEC int orte_rml_base_close(void);
/*
* Data type support
*/
ORTE_DECLSPEC int orte_rml_base_compare_tags(orte_rml_tag_t *value1, orte_rml_tag_t *value2, orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_copy_tag(orte_rml_tag_t **dest, orte_rml_tag_t *src, orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_pack_tag(orte_buffer_t *buffer, const void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_print_tag(char **output, char *prefix, orte_rml_tag_t *src, orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_compare_tags(orte_rml_tag_t *value1,
orte_rml_tag_t *value2,
orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_copy_tag(orte_rml_tag_t **dest,
orte_rml_tag_t *src,
orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_pack_tag(orte_buffer_t *buffer,
const void *src,
orte_std_cntr_t num_vals,
orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_print_tag(char **output,
char *prefix,
orte_rml_tag_t *src,
orte_data_type_t type);
ORTE_DECLSPEC void orte_rml_base_std_obj_release(orte_data_value_t *value);
ORTE_DECLSPEC int orte_rml_base_size_tag(size_t *size, orte_rml_tag_t *src, orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_unpack_tag(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_size_tag(size_t *size,
orte_rml_tag_t *src,
orte_data_type_t type);
ORTE_DECLSPEC int orte_rml_base_unpack_tag(orte_buffer_t *buffer,
void *dest,
orte_std_cntr_t *num_vals,
orte_data_type_t type);
/*
@ -57,22 +71,12 @@ int orte_rml_base_comm_stop(void);
void orte_rml_base_recv(int status, orte_process_name_t* sender,
orte_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
/*
* Global struct holding the base parameters.
*/
struct orte_rml_base_t {
int rml_output;
int rml_debug;
opal_list_t rml_components;
};
typedef struct orte_rml_base_t orte_rml_base_t;
ORTE_DECLSPEC extern orte_rml_base_t orte_rml_base;
ORTE_DECLSPEC extern orte_rml_module_t orte_rml;
ORTE_DECLSPEC extern orte_rml_component_t orte_rml_component;
ORTE_DECLSPEC extern orte_process_name_t orte_rml_name_any;
ORTE_DECLSPEC extern orte_process_name_t orte_rml_name_seed;
ORTE_DECLSPEC extern int orte_rml_base_output;
ORTE_DECLSPEC extern opal_list_t orte_rml_base_components;
/* For FT only, please don't use */
ORTE_DECLSPEC extern orte_rml_component_t *orte_rml_component;
/*
* This is the base priority for a RML wrapper component
@ -81,7 +85,6 @@ ORTE_DECLSPEC extern orte_process_name_t orte_rml_name_seed;
*/
#define RML_SELECT_WRAPPER_PRIORITY -128
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS
#endif /* MCA_RML_BASE_H */

217
orte/mca/rml/base/rml_base_components.c Обычный файл
Просмотреть файл

@ -0,0 +1,217 @@
/*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_component_repository.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/base.h"
#include "orte/dss/dss.h"
#include "orte/dss/dss_types.h"
#include "orte/mca/errmgr/errmgr.h"
/* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct. */
#include "orte/mca/rml/base/static-components.h"
int orte_rml_base_output = -1;
opal_list_t orte_rml_base_subscriptions;
orte_rml_module_t orte_rml;
opal_list_t orte_rml_base_components;
orte_rml_component_t *orte_rml_component = NULL;
static bool component_open_called = false;
int
orte_rml_base_open(void)
{
int ret;
orte_data_type_t tmp;
/* Initialize globals */
OBJ_CONSTRUCT(&orte_rml_base_components, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_base_subscriptions, opal_list_t);
/* register the base system types with the DPS */
tmp = ORTE_RML_TAG;
if (ORTE_SUCCESS != (ret = orte_dss.register_type(orte_rml_base_pack_tag,
orte_rml_base_unpack_tag,
(orte_dss_copy_fn_t)orte_rml_base_copy_tag,
(orte_dss_compare_fn_t)orte_rml_base_compare_tags,
(orte_dss_size_fn_t)orte_rml_base_size_tag,
(orte_dss_print_fn_t)orte_rml_base_print_tag,
(orte_dss_release_fn_t)orte_rml_base_std_obj_release,
ORTE_DSS_UNSTRUCTURED,
"ORTE_RML_TAG", &tmp))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Which RML Wrapper component to use, if any
* - NULL or "" = No wrapper
* - ow. select that specific wrapper component
*/
mca_base_param_reg_string_name("rml", "wrapper",
"Use a Wrapper component around the selected RML component",
false, false,
NULL, NULL);
/* Open up all available components */
ret = mca_base_components_open("rml",
orte_rml_base_output,
mca_rml_base_static_components,
&orte_rml_base_components,
true);
component_open_called = true;
return ret;
}
int
orte_rml_base_select(void)
{
opal_list_item_t *item;
int selected_priority = -1;
orte_rml_component_t *selected_component = NULL;
orte_rml_module_t *selected_module = NULL;
int wrapper_priority = -1;
orte_rml_component_t *wrapper_component = NULL;
orte_rml_module_t *wrapper_module = NULL;
char *rml_wrapper = NULL;
mca_base_param_reg_string_name("rml", "wrapper",
"Use a Wrapper component around the selected RML component",
false, false,
NULL, &rml_wrapper);
for (item = opal_list_get_first(&orte_rml_base_components);
item != opal_list_get_end(&orte_rml_base_components) ;
item = opal_list_get_next(item)) {
mca_base_component_list_item_t *cli;
orte_rml_component_t* component;
cli = (mca_base_component_list_item_t *) item;
component = (orte_rml_component_t *) cli->cli_component;
opal_output_verbose(10, orte_rml_base_output,
"orte_rml_base_select: initializing %s component %s",
component->rml_version.mca_type_name,
component->rml_version.mca_component_name);
if (NULL == component->rml_init) {
opal_output_verbose(10, orte_rml_base_output,
"orte_rml_base_select: no init function; ignoring component");
} else {
int priority = 0;
orte_rml_module_t* module = component->rml_init(&priority);
if (NULL == module) {
opal_output_verbose(10, orte_rml_base_output,
"orte_rml_base_select: init returned failure");
continue;
}
if(NULL != rml_wrapper &&
/* If this is a wrapper component then save it for later */
RML_SELECT_WRAPPER_PRIORITY >= priority) {
if( 0 == strncmp(component->rml_version.mca_component_name,
rml_wrapper,
strlen(rml_wrapper) ) ) {
wrapper_priority = priority;
wrapper_component = component;
wrapper_module = module;
}
} else if (priority > selected_priority) {
/* Otherwise this is a normal module and subject to normal selection */
if (NULL != selected_module && NULL != selected_module->finalize) {
selected_module->finalize();
}
selected_priority = priority;
selected_component = component;
selected_module = module;
}
}
}
/*
* Unload all components that were not selected
*/
item = opal_list_get_first(&orte_rml_base_components);
while (item != opal_list_get_end(&orte_rml_base_components)) {
opal_list_item_t* next = opal_list_get_next(item);
orte_rml_component_t* component;
mca_base_component_list_item_t *cli;
cli = (mca_base_component_list_item_t *) item;
component = (orte_rml_component_t *) cli->cli_component;
/* Keep it if it is the wrapper component */
if (NULL != wrapper_component &&
component == wrapper_component) {
item = next;
continue;
}
/* Not the selected component */
if (component != selected_component) {
opal_output_verbose(10, orte_rml_base_output,
"orte_rml_base_select: module %s unloaded",
component->rml_version.mca_component_name);
mca_base_component_repository_release((mca_base_component_t *) component);
opal_list_remove_item(&orte_rml_base_components, item);
OBJ_RELEASE(item);
}
item = next;
}
/* setup reference to selected module */
if (NULL != selected_module) {
orte_rml = *selected_module;
orte_rml_component = selected_component;
}
/* If a wrapper component was requested then
* Make sure it can switch out the selected module
*/
if( NULL != wrapper_component) {
wrapper_component->rml_init(NULL);
}
if( NULL != rml_wrapper ) {
free(rml_wrapper);
}
if (NULL == selected_component) return ORTE_ERROR;
return ORTE_SUCCESS;
}
int
orte_rml_base_close(void)
{
/* shutdown any remaining opened components */
if (component_open_called) {
mca_base_components_close(orte_rml_base_output,
&orte_rml_base_components, NULL);
}
OBJ_DESTRUCT(&orte_rml_base_components);
OBJ_DESTRUCT(&orte_rml_base_subscriptions);
return ORTE_SUCCESS;
}

318
orte/mca/rml/base/rml_base_contact.c Обычный файл
Просмотреть файл

@ -0,0 +1,318 @@
#include "orte_config.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "opal/util/argv.h"
#include "opal/util/error.h"
#include "opal/util/output.h"
#include "orte/orte_types.h"
#include "opal/class/opal_hash_table.h"
#include "orte/class/orte_proc_table.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/oob/tcp/oob_tcp.h" /* BWB - FIX ME */
extern opal_list_t orte_rml_base_subscriptions;
struct orte_rml_base_subscription_t {
opal_list_item_t item;
orte_jobid_t jobid;
orte_gpr_subscription_id_t subid;
};
typedef struct orte_rml_base_subscription_t orte_rml_base_subscription_t;
OBJ_CLASS_INSTANCE(orte_rml_base_subscription_t, opal_list_item_t,
NULL, NULL);
static int get_contact_info(orte_jobid_t job, char **tokens, orte_gpr_notify_data_t **data)
{
char *segment;
char *keys[] = {
ORTE_OOB_TCP_KEY,
ORTE_PROC_RML_IP_ADDRESS_KEY,
NULL
};
orte_gpr_value_t **values;
orte_std_cntr_t cnt, i, idx;
int rc;
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* get the data */
if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_TOKENS_AND | ORTE_GPR_KEYS_OR,
segment, tokens, keys, &cnt, &values))) {
ORTE_ERROR_LOG(rc);
free(segment);
return rc;
}
/* see if we got data back */
if (0 < cnt) {
/* build the data into the notify_data object. If the data
* pointer is NULL, then we are the first values, so initialize
* it. Otherwise, just add the data to it
*/
if (NULL == *data) {
*data = OBJ_NEW(orte_gpr_notify_data_t);
}
for (i=0; i < cnt; i++) {
if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&idx, (*data)->values, (void*)values[i]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
++(*data)->cnt;
}
}
return ORTE_SUCCESS;
}
int
orte_rml_base_get_contact_info(orte_process_name_t *name,
orte_gpr_notify_data_t **data)
{
char **tokens=NULL;
orte_std_cntr_t num_tokens;
int rc;
/* if the vpid is WILDCARD, then we want the info from all procs in the specified job. This
* is the default condition, so do nothing for this case. If the vpid is not WILDCARD,
* then go get the process tokens
*/
if (ORTE_VPID_WILDCARD != name->vpid) {
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&tokens, &num_tokens, name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* If the jobid is not WILDCARD, then we only want the info from the specified job -
* this is the most common case, so treat it first
*/
if (ORTE_JOBID_WILDCARD != name->jobid) {
if (ORTE_SUCCESS != (rc = get_contact_info(name->jobid, tokens, data))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* if the jobid is WILDCARD, then we want the info from all jobs. */
return ORTE_SUCCESS;
}
int
orte_rml_base_register_subscription(orte_jobid_t jobid, char *trigger)
{
char *sub_name, *segment, *trig_name;
orte_rml_base_subscription_t *subscription;
orte_gpr_subscription_id_t sub_id;
int rc;
/* register subscribe callback to receive notification when all processes have registered */
subscription = OBJ_NEW(orte_rml_base_subscription_t);
subscription->jobid = jobid;
opal_list_append(&orte_rml_base_subscriptions, &subscription->item);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&sub_name,
ORTE_OOB_SUBSCRIPTION, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* attach to the specified trigger */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
trigger, jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
return rc;
}
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&sub_id, trig_name, sub_name,
ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG,
ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR | ORTE_GPR_STRIPPED,
segment,
NULL, /* look at all containers on this segment */
ORTE_OOB_TCP_KEY,
orte_rml_base_contact_info_notify, NULL))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
free(segment);
return rc;
}
/* the id of each subscription is recorded
* here so we can (if desired) cancel that subscription later
*/
subscription->subid = sub_id;
/* done with these, so release any memory */
free(trig_name);
free(sub_name);
return ORTE_SUCCESS;
}
int
orte_rml_base_register_contact_info(void)
{
orte_std_cntr_t i, num_tokens;
orte_data_value_t *values[2];
char *tmp, *tmp2, *tmp3;
char *segment, **tokens;
char *keys[] = { ORTE_OOB_TCP_KEY, ORTE_PROC_RML_IP_ADDRESS_KEY};
int rc;
/* setup to put our contact info on registry */
tmp = orte_rml.get_contact_info();
values[0] = OBJ_NEW(orte_data_value_t);
if (NULL == values[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
values[0]->type = ORTE_STRING;
values[0]->data = strdup(tmp);
free(tmp);
/* setup the IP address for storage */
tmp = orte_rml.get_contact_info();
tmp2 = strrchr(tmp, '/') + 1;
tmp3 = strrchr(tmp, ':');
if(NULL == tmp2 || NULL == tmp3) {
opal_output(0, "[%lu,%lu,%lu] orte_rml_base_init: invalid address \'%s\' "
"returned for selected oob interfaces.\n",
ORTE_NAME_ARGS(orte_process_info.my_name), tmp);
ORTE_ERROR_LOG(ORTE_ERROR);
free(tmp);
return ORTE_ERROR;
}
*tmp3 = '\0';
values[1] = OBJ_NEW(orte_data_value_t);
if (NULL == values[1]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
values[1]->type = ORTE_STRING;
values[1]->data = strdup(tmp2);
free(tmp);
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
OBJ_RELEASE(values[1]);
return rc;
}
/* get the process tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&tokens, &num_tokens,
orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
free(segment);
OBJ_RELEASE(values[0]);
OBJ_RELEASE(values[1]);
return rc;
}
/* put our contact info in registry */
if (ORTE_SUCCESS != (rc = orte_gpr.put_N(ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND,
segment, tokens, 2, keys, values))) {
ORTE_ERROR_LOG(rc);
}
free(segment);
for(i=0; i < num_tokens; i++) {
free(tokens[i]);
tokens[i] = NULL;
}
if (NULL != tokens) free(tokens);
OBJ_RELEASE(values[0]);
OBJ_RELEASE(values[1]);
return rc;
}
void
orte_rml_base_contact_info_notify(orte_gpr_notify_data_t* data,
void* cbdata)
{
orte_std_cntr_t i, j, k;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t *keyval;
char *contact_info;
/* process the callback */
values = (orte_gpr_value_t**)(data->values)->addr;
for(i = 0, k=0; k < data->cnt &&
i < (data->values)->size; i++) {
if (NULL != values[i]) {
k++;
value = values[i];
for(j = 0; j < value->cnt; j++) {
/* check to make sure this is the requested key */
keyval = value->keyvals[j];
if(strcmp(keyval->key, ORTE_OOB_TCP_KEY) != 0)
continue;
orte_dss.get((void**)&(contact_info), keyval->value, ORTE_STRING);
orte_rml.set_contact_info(contact_info);
}
}
}
}
int
orte_rml_base_parse_uris(const char* uri,
orte_process_name_t* peer,
char*** uris)
{
orte_process_name_t* proc_name;
int rc;
/* parse the process name */
char* cinfo = strdup(uri);
char* ptr = strchr(cinfo, ';');
if(NULL == ptr) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
free(cinfo);
return ORTE_ERR_BAD_PARAM;
}
*ptr = '\0';
ptr++;
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(&proc_name, cinfo))) {
ORTE_ERROR_LOG(rc);
free(cinfo);
return rc;
}
*peer = *proc_name;
free(proc_name);
if (NULL != uris) {
/* parse the remainder of the string into an array of uris */
*uris = opal_argv_split(ptr, ';');
}
free(cinfo);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,116 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <stdio.h>
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
#include "orte/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/base/base.h"
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "orte/mca/rml/base/static-components.h"
/*
* Global variables
*/
orte_rml_base_t orte_rml_base;
orte_rml_module_t orte_rml;
orte_rml_component_t orte_rml_component;
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
int orte_rml_base_open(void)
{
int id;
int int_value;
int rc;
char *rml_wrapper = NULL;
orte_data_type_t tmp;
/* Initialize globals */
OBJ_CONSTRUCT(&orte_rml_base.rml_components, opal_list_t);
/* lookup common parameters */
id = mca_base_param_reg_int_name("rml_base", "debug",
"Verbosity level for the rml famework",
false, false, 0, &int_value);
if (0 != int_value) {
orte_rml_base.rml_output = opal_output_open(NULL);
} else {
orte_rml_base.rml_output = -1;
}
orte_rml_base.rml_debug = int_value;
opal_output_set_verbosity(orte_rml_base.rml_output, int_value);
/* register the base system types with the DPS */
tmp = ORTE_RML_TAG;
if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_rml_base_pack_tag,
orte_rml_base_unpack_tag,
(orte_dss_copy_fn_t)orte_rml_base_copy_tag,
(orte_dss_compare_fn_t)orte_rml_base_compare_tags,
(orte_dss_size_fn_t)orte_rml_base_size_tag,
(orte_dss_print_fn_t)orte_rml_base_print_tag,
(orte_dss_release_fn_t)orte_rml_base_std_obj_release,
ORTE_DSS_UNSTRUCTURED,
"ORTE_RML_TAG", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/*
* Which RML Wrapper component to use, if any
* - NULL or "" = No wrapper
* - ow. select that specific wrapper component
*/
mca_base_param_reg_string_name("rml", "wrapper",
"Use a Wrapper component around the selected RML component",
false, false,
NULL, &rml_wrapper);
if( NULL != rml_wrapper) {
free(rml_wrapper);
}
/* Open up all available components */
if (ORTE_SUCCESS != (rc = mca_base_components_open("rml",
orte_rml_base.rml_output,
mca_rml_base_static_components,
&orte_rml_base.rml_components,
true)) ) {
return rc;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -38,6 +38,7 @@
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
static bool recv_issued=false;
@ -50,7 +51,7 @@ int orte_rml_base_comm_start(void)
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_RML,
ORTE_RML_TAG_RML_INFO_UPDATE,
ORTE_RML_PERSISTENT,
orte_rml_base_recv,
NULL))) {
@ -70,7 +71,7 @@ int orte_rml_base_comm_stop(void)
return ORTE_SUCCESS;
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_RML))) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_RML_INFO_UPDATE))) {
ORTE_ERROR_LOG(rc);
}
recv_issued = false;
@ -107,7 +108,7 @@ void orte_rml_base_recv(int status, orte_process_name_t* sender,
ORTE_ERROR_LOG(rc);
return;
}
orte_rml.update_contact_info(data, NULL);
orte_rml_base_contact_info_notify(data, NULL);
break;
default:

Просмотреть файл

@ -1,156 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_component_repository.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/base.h"
/**
* Call the init function on all available components to find out if
* they want to run. Select the single component with the highest
* priority.
*/
int orte_rml_base_select(void)
{
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
int selected_priority = -1;
orte_rml_component_t *selected_component = NULL;
orte_rml_module_t *selected_module = NULL;
int wrapper_priority = -1;
orte_rml_component_t *wrapper_component = NULL;
orte_rml_module_t *wrapper_module = NULL;
char *rml_wrapper = NULL;
mca_base_param_reg_string_name("rml", "wrapper",
"Use a Wrapper component around the selected RML component",
false, false,
NULL, &rml_wrapper);
/* Traverse the list of opened modules; call their init functions. */
for(item = opal_list_get_first(&orte_rml_base.rml_components);
item != opal_list_get_end(&orte_rml_base.rml_components);
item = opal_list_get_next(item)) {
orte_rml_component_t* component;
cli = (mca_base_component_list_item_t *) item;
component = (orte_rml_component_t *) cli->cli_component;
opal_output_verbose(10, orte_rml_base.rml_output,
"orte_rml_base_select: initializing %s component %s",
component->rml_version.mca_type_name,
component->rml_version.mca_component_name);
if (NULL == component->rml_init) {
opal_output_verbose(10, orte_rml_base.rml_output,
"orte_rml_base_select: no init function; ignoring component");
} else {
int priority = 0;
orte_rml_module_t* module = component->rml_init(&priority);
/*
* If the component didn't initialize, remove it from the opened
* list and remove it from the component repository
*/
if (NULL == module) {
opal_output_verbose(10, orte_rml_base.rml_output,
"orte_rml_base_select: init returned failure");
continue;
}
/*
* If this is a wrapper component then save it for later
*/
if(NULL != rml_wrapper &&
RML_SELECT_WRAPPER_PRIORITY >= priority) {
if( 0 == strncmp(component->rml_version.mca_component_name,
rml_wrapper,
strlen(rml_wrapper) ) ) {
wrapper_priority = priority;
wrapper_component = component;
wrapper_module = module;
}
}
/*
* Otherwise this is a normal module and subject to normal selection
*/
else if(priority > selected_priority) {
selected_priority = priority;
selected_component = component;
selected_module = module;
}
}
}
/*
* Unload all components that were not selected
*/
item = opal_list_get_first(&orte_rml_base.rml_components);
while(item != opal_list_get_end(&orte_rml_base.rml_components)) {
opal_list_item_t* next = opal_list_get_next(item);
orte_rml_component_t* component;
cli = (mca_base_component_list_item_t *) item;
component = (orte_rml_component_t *) cli->cli_component;
/* Keep it if it is the wrapper component */
if( NULL != wrapper_component &&
component == wrapper_component ) {
item = next;
continue;
}
/* Not the selected component */
if(component != selected_component) {
opal_output_verbose(10, orte_rml_base.rml_output,
"orte_rml_base_select: module %s unloaded",
component->rml_version.mca_component_name);
mca_base_component_repository_release((mca_base_component_t *) component);
opal_list_remove_item(&orte_rml_base.rml_components, item);
OBJ_RELEASE(item);
}
item = next;
}
/* setup reference to selected module */
if(NULL != selected_module) {
orte_rml = *selected_module;
orte_rml_component = *selected_component;
}
/* If a wrapper component was requested then
* Make sure it can switch out the selected module
*/
if( NULL != wrapper_component) {
wrapper_module->init();
}
if( NULL != rml_wrapper ) {
free(rml_wrapper);
}
return ORTE_SUCCESS;
}

31
orte/mca/rml/base/rml_contact.h Обычный файл
Просмотреть файл

@ -0,0 +1,31 @@
#include "orte/mca/ns/ns.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/gpr/gpr_types.h"
BEGIN_C_DECLS
/*
* Get contact info for a process or job
* Returns contact info for the specified process. If the vpid in the process name
* is WILDCARD, then it returns the contact info for all processes in the specified
* job. If the jobid is WILDCARD, then it returns the contact info for processes
* of the specified vpid across all jobs. Obviously, combining the two WILDCARD
* values will return contact info for everyone!
*/
int orte_rml_base_get_contact_info(orte_process_name_t *name,
orte_gpr_notify_data_t **data);
int orte_rml_base_register_subscription(orte_jobid_t jobid, char *trigger);
int orte_rml_base_register_contact_info(void);
void orte_rml_base_contact_info_notify(orte_gpr_notify_data_t* data,
void* cbdata);
int orte_rml_base_parse_uris(const char* uri,
orte_process_name_t* peer,
char*** uris);
END_C_DECLS

Просмотреть файл

@ -56,31 +56,30 @@ orte_rml_component_t mca_rml_cnos_component = {
};
orte_rml_module_t orte_rml_cnos_module = {
orte_rml_cnos_module_init,
orte_rml_cnos_module_enable_comm,
orte_rml_cnos_module_fini,
orte_rml_cnos_get_uri,
orte_rml_cnos_set_uri,
orte_rml_cnos_parse_uris,
orte_rml_cnos_get_contact_info,
orte_rml_cnos_set_contact_info,
orte_rml_cnos_get_new_name,
orte_rml_cnos_ping,
orte_rml_cnos_send,
orte_rml_cnos_send_nb,
orte_rml_cnos_send_buffer,
orte_rml_cnos_send_buffer_nb,
orte_rml_cnos_recv,
orte_rml_cnos_recv_nb,
orte_rml_cnos_recv_buffer,
orte_rml_cnos_recv_buffer_nb,
orte_rml_cnos_recv_cancel,
orte_rml_cnos_xcast,
orte_rml_cnos_xcast_nb,
orte_rml_cnos_xcast_gate,
orte_rml_cnos_add_exception_handler,
orte_rml_cnos_del_exception_handler,
NULL, /* No FT Event function */
orte_rml_cnos_register_contact_info,
orte_rml_cnos_register_subscription,
orte_rml_cnos_get_contact_info,
orte_rml_cnos_update_contact_info
NULL /* No FT Event function */
};
@ -107,7 +106,7 @@ orte_rml_cnos_close(void)
}
int
orte_rml_cnos_module_init(void)
orte_rml_cnos_module_enable_comm(void)
{
return ORTE_SUCCESS;
}
@ -119,24 +118,25 @@ orte_rml_cnos_module_fini(void)
}
char *
orte_rml_cnos_get_uri(void)
orte_rml_cnos_get_contact_info(void)
{
return "(none)";
}
int
orte_rml_cnos_set_uri(const char *name)
orte_rml_cnos_set_contact_info(const char *name)
{
return ORTE_ERR_NOT_SUPPORTED;
}
int
orte_rml_cnos_parse_uris(const char *uri,
orte_process_name_t * peer, char ***uris)
orte_rml_cnos_get_new_name(orte_process_name_t *name)
{
return ORTE_ERR_NOT_SUPPORTED;
}
int
orte_rml_cnos_ping(const char *uri, const struct timeval *tv)
{
@ -168,7 +168,7 @@ orte_rml_cnos_recv(orte_process_name_t * peer,
int
orte_rml_cnos_recv_buffer(orte_process_name_t * peer,
orte_buffer_t * buf, orte_rml_tag_t tag)
orte_buffer_t * buf, orte_rml_tag_t tag, int flags)
{
return ORTE_ERR_NOT_SUPPORTED;
}
@ -220,71 +220,6 @@ orte_rml_cnos_recv_cancel(orte_process_name_t * peer, orte_rml_tag_t tag)
return ORTE_ERR_NOT_SUPPORTED;
}
int
orte_rml_cnos_barrier(void)
{
#if OMPI_RML_CNOS_HAVE_BARRIER
cnos_barrier();
#endif
return ORTE_SUCCESS;
}
int
orte_rml_cnos_xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
return ORTE_SUCCESS;
}
int
orte_rml_cnos_xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
return ORTE_SUCCESS;
}
int
orte_rml_cnos_xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc)
{
orte_rml_cnos_barrier();
if (NULL != cbfunc) {
orte_gpr_notify_message_t *msg;
msg = OBJ_NEW(orte_gpr_notify_message_t);
if (NULL == msg) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
cbfunc(msg);
OBJ_RELEASE(msg);
}
return ORTE_SUCCESS;
}
int orte_rml_cnos_register_contact_info(void)
{
return ORTE_SUCCESS;
}
int orte_rml_cnos_register_subscription(orte_jobid_t job, char *trigger)
{
return ORTE_SUCCESS;
}
int orte_rml_cnos_get_contact_info(orte_process_name_t *name, orte_gpr_notify_data_t **data)
{
return ORTE_SUCCESS;
}
void orte_rml_cnos_update_contact_info(orte_gpr_notify_data_t* data,
void* cbdata)
{
return;
}
int orte_rml_cnos_add_exception_handler(orte_rml_exception_callback_t cbfunc)
{
return ORTE_SUCCESS;

Просмотреть файл

@ -35,13 +35,13 @@ extern "C"
int orte_rml_cnos_close(void);
orte_rml_module_t * orte_rml_cnos_init(int *priority);
int orte_rml_cnos_module_init(void);
int orte_rml_cnos_module_enable_comm(void);
int orte_rml_cnos_module_fini(void);
char *orte_rml_cnos_get_uri(void);
int orte_rml_cnos_set_uri(const char *);
int orte_rml_cnos_parse_uris(const char *uri,
orte_process_name_t * peer, char ***uris);
char *orte_rml_cnos_get_contact_info(void);
int orte_rml_cnos_set_contact_info(const char *);
int orte_rml_cnos_get_new_name(orte_process_name_t *name);
int orte_rml_cnos_ping(const char *uri, const struct timeval *tv);
int orte_rml_cnos_send(orte_process_name_t * peer,
@ -71,7 +71,7 @@ extern "C"
int count, orte_rml_tag_t tag, int flags);
int orte_rml_cnos_recv_buffer(orte_process_name_t * peer,
orte_buffer_t * buf, orte_rml_tag_t tag);
orte_buffer_t * buf, orte_rml_tag_t tag, int flags);
int orte_rml_cnos_recv_nb(orte_process_name_t * peer,
struct iovec *msg,
@ -89,28 +89,6 @@ extern "C"
int orte_rml_cnos_recv_cancel(orte_process_name_t * peer,
orte_rml_tag_t tag);
int orte_rml_cnos_xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
int orte_rml_cnos_xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
int orte_rml_cnos_xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc);
int orte_rml_cnos_barrier(void);
int orte_rml_cnos_register_contact_info(void);
int orte_rml_cnos_register_subscription(orte_jobid_t job, char *trigger);
int orte_rml_cnos_get_contact_info(orte_process_name_t *name, orte_gpr_notify_data_t **data);
void orte_rml_cnos_update_contact_info(orte_gpr_notify_data_t* data,
void* cbdata);
int orte_rml_cnos_add_exception_handler(orte_rml_exception_callback_t cbfunc);
int orte_rml_cnos_del_exception_handler(orte_rml_exception_callback_t cbfunc);

Просмотреть файл

@ -46,7 +46,7 @@ extern "C" {
/*
* Init (Module)
*/
int orte_rml_ftrm_module_init(void);
int orte_rml_ftrm_module_enable_comm(void);
/*
* Finalize (Module)
@ -56,18 +56,14 @@ extern "C" {
/*
* Get URI
*/
char * orte_rml_ftrm_get_uri(void);
char * orte_rml_ftrm_get_contact_info(void);
/*
* Set URI
*/
int orte_rml_ftrm_set_uri(const char* uri);
int orte_rml_ftrm_set_contact_info(const char* uri);
/*
* Parse URis
*/
int orte_rml_ftrm_parse_uris(const char* uri,
orte_process_name_t* peer, char*** uris);
int orte_rml_ftrm_get_new_name(orte_process_name_t *name);
/*
* Ping
@ -137,7 +133,8 @@ extern "C" {
*/
int orte_rml_ftrm_recv_buffer(orte_process_name_t* peer,
orte_buffer_t *buf,
orte_rml_tag_t tag);
orte_rml_tag_t tag,
int flags);
/*
* Recv Buffer Non-blocking
@ -153,19 +150,6 @@ extern "C" {
*/
int orte_rml_ftrm_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
/*
* Xcast
*/
int orte_rml_ftrm_xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
int orte_rml_ftrm_xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag);
int orte_rml_ftrm_xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc);
/*
* Register a callback on loss of connection
*/
@ -177,14 +161,6 @@ extern "C" {
*/
int orte_rml_ftrm_ft_event(int state);
int orte_rml_ftrm_register_contact_info(void);
int orte_rml_ftrm_register_subscription(orte_jobid_t job, char *trigger);
int orte_rml_ftrm_get_contact_info(orte_process_name_t *name, orte_gpr_notify_data_t **data);
void orte_rml_ftrm_update_contact_info(orte_gpr_notify_data_t* data, void* cbdata);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -24,9 +24,6 @@
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "rml_ftrm.h"
@ -63,13 +60,13 @@ orte_rml_component_t mca_rml_ftrm_component = {
};
orte_rml_module_t orte_rml_ftrm_module = {
orte_rml_ftrm_module_init,
orte_rml_ftrm_module_enable_comm,
orte_rml_ftrm_module_finalize,
orte_rml_ftrm_get_uri,
orte_rml_ftrm_set_uri,
orte_rml_ftrm_parse_uris,
orte_rml_ftrm_get_contact_info,
orte_rml_ftrm_set_contact_info,
orte_rml_ftrm_get_new_name,
orte_rml_ftrm_ping,
orte_rml_ftrm_send,
@ -83,19 +80,10 @@ orte_rml_module_t orte_rml_ftrm_module = {
orte_rml_ftrm_recv_buffer_nb,
orte_rml_ftrm_recv_cancel,
orte_rml_ftrm_xcast,
orte_rml_ftrm_xcast_nb,
orte_rml_ftrm_xcast_gate,
orte_rml_ftrm_add_exception_handler,
orte_rml_ftrm_del_exception_handler,
orte_rml_ftrm_ft_event,
orte_rml_ftrm_register_contact_info,
orte_rml_ftrm_register_subscription,
orte_rml_ftrm_get_contact_info,
orte_rml_ftrm_update_contact_info
orte_rml_ftrm_ft_event
};
int rml_ftrm_output_handle;

Просмотреть файл

@ -26,9 +26,6 @@
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "rml_ftrm.h"
orte_rml_component_t wrapped_component;
@ -39,7 +36,7 @@ orte_rml_module_t wrapped_module;
*/
static int num_inits = 0;
int orte_rml_ftrm_module_init(void)
int orte_rml_ftrm_module_enable_comm(void)
{
int ret;
@ -51,10 +48,10 @@ int orte_rml_ftrm_module_init(void)
if( 0 == num_inits ) {
/* Copy the wrapped versions */
wrapped_module = orte_rml;
wrapped_component = orte_rml_component;
wrapped_component = *orte_rml_component;
/* Replace with ourselves */
orte_rml = orte_rml_ftrm_module;
orte_rml_component = mca_rml_ftrm_component;
orte_rml_component = &mca_rml_ftrm_component;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: module_init(): Wrapped Component (%s)",
@ -70,8 +67,8 @@ int orte_rml_ftrm_module_init(void)
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: module_init(): Normal...");
if( NULL != wrapped_module.init ) {
if( ORTE_SUCCESS != (ret = wrapped_module.init() ) ) {
if( NULL != wrapped_module.enable_comm ) {
if( ORTE_SUCCESS != (ret = wrapped_module.enable_comm() ) ) {
return ret;
}
}
@ -93,8 +90,25 @@ int orte_rml_ftrm_module_finalize(void)
"orte_rml_ftrm: module_finalize()");
if( NULL != wrapped_module.fini ) {
if( ORTE_SUCCESS != (ret = wrapped_module.fini() ) ) {
if( NULL != wrapped_module.finalize ) {
if( ORTE_SUCCESS != (ret = wrapped_module.finalize() ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
int orte_rml_ftrm_get_new_name(orte_process_name_t *name)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: get_new_name()");
if( NULL != wrapped_module.get_new_name ) {
if( ORTE_SUCCESS != (ret = wrapped_module.get_new_name(name) ) ) {
return ret;
}
}
@ -106,7 +120,7 @@ int orte_rml_ftrm_module_finalize(void)
/*
* Get URI
*/
char * orte_rml_ftrm_get_uri(void)
char * orte_rml_ftrm_get_contact_info(void)
{
char * rtn_val = NULL;
@ -114,45 +128,25 @@ char * orte_rml_ftrm_get_uri(void)
"orte_rml_ftrm: get_uri()");
if( NULL != wrapped_module.get_uri ) {
rtn_val = wrapped_module.get_uri();
if( NULL != wrapped_module.get_contact_info ) {
rtn_val = wrapped_module.get_contact_info();
}
return rtn_val;
}
/*
* Set URI
* Set CONTACT_INFO
*/
int orte_rml_ftrm_set_uri(const char* uri)
int orte_rml_ftrm_set_contact_info(const char* contact_info)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: set_uri()");
"orte_rml_ftrm: set_contact_info()");
if( NULL != wrapped_module.set_uri ) {
if( ORTE_SUCCESS != (ret = wrapped_module.set_uri(uri) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* Parse URis
*/
int orte_rml_ftrm_parse_uris(const char* uri,
orte_process_name_t* peer, char*** uris)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: parse_uris()");
if( NULL != wrapped_module.parse_uris ) {
if( ORTE_SUCCESS != (ret = wrapped_module.parse_uris(uri, peer, uris) ) ) {
if( NULL != wrapped_module.set_contact_info ) {
if( ORTE_SUCCESS != (ret = wrapped_module.set_contact_info(contact_info) ) ) {
return ret;
}
}
@ -329,7 +323,8 @@ int orte_rml_ftrm_recv_nb(orte_process_name_t* peer,
*/
int orte_rml_ftrm_recv_buffer(orte_process_name_t* peer,
orte_buffer_t *buf,
orte_rml_tag_t tag)
orte_rml_tag_t tag,
int flags)
{
int ret;
@ -337,7 +332,7 @@ int orte_rml_ftrm_recv_buffer(orte_process_name_t* peer,
"orte_rml_ftrm: recv_buffer()");
if( NULL != wrapped_module.recv_buffer ) {
if( ORTE_SUCCESS != (ret = wrapped_module.recv_buffer(peer, buf, tag) ) ) {
if( ORTE_SUCCESS != (ret = wrapped_module.recv_buffer(peer, buf, tag, flags) ) ) {
return ret;
}
}
@ -388,62 +383,6 @@ int orte_rml_ftrm_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
}
/*
* Xcast
*/
int orte_rml_ftrm_xcast(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: xcast()");
if( NULL != wrapped_module.xcast ) {
if( ORTE_SUCCESS != (ret = wrapped_module.xcast(job, buffer, tag) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
int orte_rml_ftrm_xcast_nb(orte_jobid_t job,
orte_buffer_t *buffer,
orte_rml_tag_t tag)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: xcast_nb()");
if( NULL != wrapped_module.xcast_gate ) {
if( ORTE_SUCCESS != (ret = wrapped_module.xcast_nb(job, buffer, tag) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
int orte_rml_ftrm_xcast_gate(orte_gpr_trigger_cb_fn_t cbfunc)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: xcast_gate()");
if( NULL != wrapped_module.xcast_gate ) {
if( ORTE_SUCCESS != (ret = wrapped_module.xcast_gate(cbfunc) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
/*
* Register a callback on loss of connection
*/
@ -479,65 +418,6 @@ int orte_rml_ftrm_del_exception_handler(orte_rml_exception_callback_t cbfunc)
return ORTE_SUCCESS;
}
int orte_rml_ftrm_register_contact_info(void)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: register_contact_info()");
if( NULL != wrapped_module.register_contact_info ) {
if( ORTE_SUCCESS != (ret = wrapped_module.register_contact_info() ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
int orte_rml_ftrm_register_subscription(orte_jobid_t job, char *trigger)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: register_subscription()");
if( NULL != wrapped_module.register_subscription ) {
if( ORTE_SUCCESS != (ret = wrapped_module.register_subscription(job, trigger) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
int orte_rml_ftrm_get_contact_info(orte_process_name_t *name, orte_gpr_notify_data_t **data)
{
int ret;
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: get_contact_info()");
if( NULL != wrapped_module.get_contact_info ) {
if( ORTE_SUCCESS != (ret = wrapped_module.get_contact_info(name, data) ) ) {
return ret;
}
}
return ORTE_SUCCESS;
}
void orte_rml_ftrm_update_contact_info(orte_gpr_notify_data_t* data, void* cbdata)
{
opal_output_verbose(20, rml_ftrm_output_handle,
"orte_rml_ftrm: update_contact_info()");
if( NULL != wrapped_module.update_contact_info ) {
wrapped_module.update_contact_info(data, cbdata);
}
}
/*
* FT Event
*/

Просмотреть файл

@ -21,8 +21,13 @@
sources = \
rml_oob.h \
rml_oob_component.c
rml_oob.h \
rml_oob_component.c \
rml_oob_contact.c \
rml_oob_exception.c \
rml_oob_ping.c \
rml_oob_recv.c \
rml_oob_send.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

Просмотреть файл

@ -15,24 +15,142 @@
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_RML_OOB_H
#define MCA_RML_OOB_H
#ifndef MCA_RML_OOB_RML_OOB_H
#define MCA_RML_OOB_RML_OOB_H
#include "opal/threads/condition.h"
#include "opal/threads/mutex.h"
#include "orte/mca/rml/rml.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#include "orte/mca/oob/oob.h"
#include "orte/dss/dss_types.h"
BEGIN_C_DECLS
struct orte_rml_oob_module_t {
struct orte_rml_module_t super;
mca_oob_t *active_oob;
opal_list_t exceptions;
opal_mutex_t exceptions_lock;
};
typedef struct orte_rml_oob_module_t orte_rml_oob_module_t;
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component;
extern orte_rml_module_t orte_rml_oob_module;
extern orte_rml_oob_module_t orte_rml_oob_module;
typedef enum {
ORTE_RML_BLOCKING_SEND,
ORTE_RML_NONBLOCKING_IOV_SEND,
ORTE_RML_NONBLOCKING_BUFFER_SEND,
ORTE_RML_BLOCKING_RECV,
ORTE_RML_NONBLOCKING_IOV_RECV,
ORTE_RML_NONBLOCKING_BUFFER_RECV
} orte_rml_oob_msg_type_t;
struct orte_rml_oob_msg_header_t {
orte_process_name_t origin;
orte_process_name_t destination;
int tag;
};
typedef struct orte_rml_oob_msg_header_t orte_rml_oob_msg_header_t;
struct orte_rml_oob_msg_t {
opal_object_t super;
opal_mutex_t msg_lock;
opal_condition_t msg_cond;
orte_rml_oob_msg_type_t msg_type;
int msg_status;
volatile bool msg_complete;
bool msg_persistent;
union {
orte_rml_callback_fn_t iov;
orte_rml_buffer_callback_fn_t buffer;
} msg_cbfunc;
void *msg_cbdata;
struct iovec *msg_data;
/** buffer for non-blocking buffer sends */
orte_buffer_t msg_recv_buffer;
/** pointer to user buffer for buffered sends */
orte_buffer_t *user_buffer;
orte_rml_oob_msg_header_t msg_header;
};
typedef struct orte_rml_oob_msg_t orte_rml_oob_msg_t;
OBJ_CLASS_DECLARATION(orte_rml_oob_msg_t);
int orte_rml_oob_init(void);
int orte_rml_oob_fini(void);
int orte_rml_oob_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
int orte_rml_oob_send(orte_process_name_t* peer,
struct iovec *msg,
int count,
int tag,
int flags);
int orte_rml_oob_send_nb(orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
int flags,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_send_buffer(orte_process_name_t* peer,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
int flags);
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
int flags,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_recv(orte_process_name_t* peer,
struct iovec *msg,
int count,
orte_rml_tag_t tag,
int flags);
int orte_rml_oob_recv_nb(orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
int flags,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_recv_buffer(orte_process_name_t* peer,
orte_buffer_t *buf,
orte_rml_tag_t tag,
int flags);
int orte_rml_oob_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
int flags,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_recv_cancel(orte_process_name_t* peer,
orte_rml_tag_t tag);
int orte_rml_oob_ping(const char* uri,
const struct timeval* tv);
char* orte_rml_oob_get_uri(void);
int orte_rml_oob_set_uri(const char*);
int orte_rml_oob_get_new_name(orte_process_name_t *name);
int orte_rml_oob_add_exception(orte_rml_exception_callback_t cbfunc);
int orte_rml_oob_del_exception(orte_rml_exception_callback_t cbfunc);
void orte_rml_oob_exception_callback(const orte_process_name_t *peer,
orte_rml_exception_t exception);
END_C_DECLS
#endif

Просмотреть файл

@ -17,19 +17,29 @@
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "rml_oob.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/orte_constants.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/routed/routed.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "rml_oob.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ns/ns.h"
static orte_rml_module_t* orte_rml_oob_init(int* priority);
static int orte_rml_oob_open(void);
static int orte_rml_oob_close(void);
static orte_rml_module_t* rml_oob_init(int* priority);
static int rml_oob_open(void);
static int rml_oob_close(void);
static void rml_oob_recv_route_callback(int status,
struct orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
void *cbdata);
/**
@ -49,8 +59,8 @@ orte_rml_component_t mca_rml_oob_component = {
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_rml_oob_open, /* component open */
orte_rml_oob_close, /* component close */
rml_oob_open, /* component open */
rml_oob_close, /* component close */
},
/* Next the MCA v1.0.0 component meta data */
@ -58,54 +68,41 @@ orte_rml_component_t mca_rml_oob_component = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
orte_rml_oob_init
rml_oob_init
};
orte_rml_module_t orte_rml_oob_module = {
mca_oob_base_module_init,
NULL,
(orte_rml_module_get_uri_fn_t)mca_oob_get_my_contact_info,
(orte_rml_module_set_uri_fn_t)mca_oob_set_contact_info,
(orte_rml_module_parse_uris_fn_t)mca_oob_parse_contact_info,
(orte_rml_module_ping_fn_t)mca_oob_ping,
(orte_rml_module_send_fn_t)mca_oob_send,
(orte_rml_module_send_nb_fn_t)mca_oob_send_nb,
(orte_rml_module_send_buffer_fn_t)mca_oob_send_packed,
(orte_rml_module_send_buffer_nb_fn_t)mca_oob_send_packed_nb,
(orte_rml_module_recv_fn_t)mca_oob_recv,
(orte_rml_module_recv_nb_fn_t)mca_oob_recv_nb,
(orte_rml_module_recv_buffer_fn_t)mca_oob_recv_packed,
(orte_rml_module_recv_buffer_nb_fn_t)mca_oob_recv_packed_nb,
(orte_rml_module_recv_cancel_fn_t)mca_oob_recv_cancel,
(orte_rml_module_xcast_fn_t)mca_oob_xcast,
(orte_rml_module_xcast_nb_fn_t)mca_oob_xcast_nb,
(orte_rml_module_xcast_gate_fn_t)mca_oob_xcast_gate,
(orte_rml_module_exception_fn_t)mca_oob_add_exception_handler,
(orte_rml_module_exception_fn_t)mca_oob_del_exception_handler,
(orte_rml_module_ft_event_fn_t)orte_rml_oob_ft_event,
(orte_rml_module_register_contact_info_fn_t) mca_oob_register_contact_info,
(orte_rml_module_register_subscription_fn_t) mca_oob_register_subscription,
(orte_rml_module_get_contact_info_fn_t) mca_oob_get_contact_info,
(orte_rml_module_update_contact_info_fn_t) mca_oob_update_contact_info
orte_rml_oob_module_t orte_rml_oob_module = {
{
orte_rml_oob_init,
orte_rml_oob_fini,
orte_rml_oob_get_uri,
orte_rml_oob_set_uri,
orte_rml_oob_get_new_name,
orte_rml_oob_ping,
orte_rml_oob_send,
orte_rml_oob_send_nb,
orte_rml_oob_send_buffer,
orte_rml_oob_send_buffer_nb,
orte_rml_oob_recv,
orte_rml_oob_recv_nb,
orte_rml_oob_recv_buffer,
orte_rml_oob_recv_buffer_nb,
orte_rml_oob_recv_cancel,
orte_rml_oob_add_exception,
orte_rml_oob_del_exception,
orte_rml_oob_ft_event
}
};
static orte_rml_module_t* orte_rml_oob_init(int* priority)
{
if(mca_oob_base_init() != ORTE_SUCCESS)
return NULL;
*priority = 1;
return &orte_rml_oob_module;
}
/*
* initialize the underlying oob infrastructure so that all the
* pointers in the RML struct can be valid.
*/
static int
orte_rml_oob_open(void)
rml_oob_open(void)
{
int rc;
@ -118,15 +115,11 @@ orte_rml_oob_open(void)
}
/*
* shut down the OOB, since we started it.
*/
static int
orte_rml_oob_close(void)
rml_oob_close(void)
{
int rc;
if (ORTE_SUCCESS != (rc = mca_oob_base_close())) {
return rc;
}
@ -134,7 +127,65 @@ orte_rml_oob_close(void)
return rc;
}
int orte_rml_oob_ft_event(int state) {
static orte_rml_module_t*
rml_oob_init(int* priority)
{
if (mca_oob_base_init() != ORTE_SUCCESS)
return NULL;
*priority = 1;
OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions_lock, opal_mutex_t);
orte_rml_oob_module.active_oob = &mca_oob;
orte_rml_oob_module.active_oob->oob_exception_callback =
orte_rml_oob_exception_callback;
return &orte_rml_oob_module.super;
}
int
orte_rml_oob_init(void)
{
int ret;
struct iovec iov[1];
ret = orte_rml_oob_module.active_oob->oob_init();
iov[0].iov_base = NULL;
iov[0].iov_len = 0;
ret = orte_rml_oob_module.active_oob->oob_recv_nb(ORTE_NAME_WILDCARD,
iov, 1,
ORTE_RML_TAG_RML_ROUTE,
ORTE_RML_ALLOC|ORTE_RML_PERSISTENT,
rml_oob_recv_route_callback,
NULL);
return ret;
}
int
orte_rml_oob_fini(void)
{
opal_list_item_t *item;
while (NULL !=
(item = opal_list_remove_first(&orte_rml_oob_module.exceptions))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&orte_rml_oob_module.exceptions);
OBJ_DESTRUCT(&orte_rml_oob_module.exceptions_lock);
orte_rml_oob_module.active_oob->oob_exception_callback = NULL;
return ORTE_SUCCESS;
}
int
orte_rml_oob_ft_event(int state) {
int exit_status = ORTE_SUCCESS;
int ret;
@ -154,7 +205,8 @@ int orte_rml_oob_ft_event(int state) {
;
}
if( ORTE_SUCCESS != (ret = mca_oob.oob_ft_event(state)) ) {
if( ORTE_SUCCESS !=
(ret = orte_rml_oob_module.active_oob->oob_ft_event(state)) ) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
@ -187,11 +239,11 @@ int orte_rml_oob_ft_event(int state) {
}
if(NULL != orte_process_info.ns_replica_uri) {
mca_oob_set_contact_info(orte_process_info.ns_replica_uri);
orte_rml_oob_set_uri(orte_process_info.ns_replica_uri);
}
if(NULL != orte_process_info.gpr_replica_uri) {
mca_oob_set_contact_info(orte_process_info.gpr_replica_uri);
orte_rml_oob_set_uri(orte_process_info.gpr_replica_uri);
}
}
else if(OPAL_CRS_TERM == state ) {
@ -204,3 +256,81 @@ int orte_rml_oob_ft_event(int state) {
cleanup:
return exit_status;
}
static void
msg_construct(orte_rml_oob_msg_t *msg)
{
OBJ_CONSTRUCT(&msg->msg_lock, opal_mutex_t);
OBJ_CONSTRUCT(&msg->msg_cond, opal_condition_t);
msg->msg_status = 0;
msg->msg_complete = false;
msg->msg_persistent = false;
OBJ_CONSTRUCT(&msg->msg_recv_buffer, orte_buffer_t);
msg->msg_data = NULL;
}
static void
msg_destruct(orte_rml_oob_msg_t *msg)
{
if (NULL != msg->msg_data) free(msg->msg_data);
OBJ_DESTRUCT(&msg->msg_recv_buffer);
OBJ_DESTRUCT(&msg->msg_lock);
OBJ_DESTRUCT(&msg->msg_cond);
}
OBJ_CLASS_INSTANCE(orte_rml_oob_msg_t, opal_object_t,
msg_construct, msg_destruct);
static void
rml_oob_recv_route_send_callback(int status,
struct orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
/* BWB -- propogate errors here... */
if (NULL != iov[0].iov_base) free(iov[0].iov_base);
}
static void
rml_oob_recv_route_callback(int status,
struct orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
void *cbdata)
{
orte_rml_oob_msg_header_t *hdr =
(orte_rml_oob_msg_header_t*) iov[0].iov_base;
int real_tag;
int ret;
orte_process_name_t next;
/* BWB -- propogate errors here... */
assert(status >= 0);
next = orte_routed.get_route(&hdr->destination);
if (next.vpid == ORTE_VPID_INVALID) {
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
abort();
}
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &next, peer)) {
real_tag = hdr->tag;
} else {
real_tag = ORTE_RML_TAG_RML_ROUTE;
}
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
iov,
count,
real_tag,
0,
rml_oob_recv_route_send_callback,
NULL);
assert(ret == ORTE_SUCCESS);
}

75
orte/mca/rml/oob/rml_oob_contact.c Обычный файл
Просмотреть файл

@ -0,0 +1,75 @@
/*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "rml_oob.h"
#include "opal/util/argv.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/base/rml_contact.h"
char*
orte_rml_oob_get_uri(void)
{
char *proc_name = NULL;
char *proc_addr = NULL;
char *contact_info = NULL;
int rc;
proc_addr = orte_rml_oob_module.active_oob->oob_get_addr();
if (NULL == proc_addr) return NULL;
if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string(&proc_name,
orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
if (0 > asprintf(&contact_info, "%s;%s", proc_name, proc_addr)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
free(proc_name);
free(proc_addr);
return contact_info;
}
int
orte_rml_oob_set_uri(const char* uri)
{
orte_process_name_t name;
char** uris;
char** ptr;
int rc = orte_rml_base_parse_uris(uri, &name, &uris);
if(rc != ORTE_SUCCESS)
return rc;
for(ptr = uris; ptr != NULL && *ptr != NULL; ptr++) {
orte_rml_oob_module.active_oob->oob_set_addr(&name, *ptr);
}
if(uris != NULL) {
opal_argv_free(uris);
}
return ORTE_SUCCESS;
}
int
orte_rml_oob_get_new_name(orte_process_name_t *name)
{
if (NULL != ORTE_PROC_MY_NAME) {
return ORTE_ERR_NOT_SUPPORTED;
}
return orte_rml_oob_module.active_oob->oob_get_new_name(name);
}

85
orte/mca/rml/oob/rml_oob_exception.c Обычный файл
Просмотреть файл

@ -0,0 +1,85 @@
/*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "rml_oob.h"
#include "opal/class/opal_list.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
struct orte_rml_oob_exception_t {
opal_list_item_t super;
orte_rml_exception_callback_t cbfunc;
};
typedef struct orte_rml_oob_exception_t orte_rml_oob_exception_t;
static OBJ_CLASS_INSTANCE(orte_rml_oob_exception_t, opal_list_item_t,
NULL, NULL);
void
orte_rml_oob_exception_callback(const orte_process_name_t *peer,
orte_rml_exception_t exception)
{
opal_list_item_t *item;
OPAL_THREAD_LOCK(&orte_rml_oob_module.exceptions_lock);
for (item = opal_list_get_first(&orte_rml_oob_module.exceptions) ;
item != opal_list_get_end(&orte_rml_oob_module.exceptions) ;
item = opal_list_get_next(item)) {
orte_rml_oob_exception_t *ex = (orte_rml_oob_exception_t*) item;
ex->cbfunc(peer, exception);
}
OPAL_THREAD_UNLOCK(&orte_rml_oob_module.exceptions_lock);
}
int
orte_rml_oob_add_exception(orte_rml_exception_callback_t cbfunc)
{
orte_rml_oob_exception_t *ex = OBJ_NEW(orte_rml_oob_exception_t);
if (NULL == ex) return ORTE_ERROR;
OPAL_THREAD_LOCK(&orte_rml_oob_module.exceptions_lock);
ex->cbfunc = cbfunc;
opal_list_append(&orte_rml_oob_module.exceptions, &ex->super);
OPAL_THREAD_UNLOCK(&orte_rml_oob_module.exceptions_lock);
return ORTE_SUCCESS;
}
int
orte_rml_oob_del_exception(orte_rml_exception_callback_t cbfunc)
{
opal_list_item_t *item;
OPAL_THREAD_LOCK(&orte_rml_oob_module.exceptions_lock);
for (item = opal_list_get_first(&orte_rml_oob_module.exceptions) ;
item != opal_list_get_end(&orte_rml_oob_module.exceptions) ;
item = opal_list_get_next(item)) {
orte_rml_oob_exception_t *ex = (orte_rml_oob_exception_t*) item;
if (cbfunc == ex->cbfunc) {
opal_list_remove_item(&orte_rml_oob_module.exceptions, item);
OPAL_THREAD_UNLOCK(&orte_rml_oob_module.exceptions_lock);
return ORTE_SUCCESS;
}
}
OPAL_THREAD_UNLOCK(&orte_rml_oob_module.exceptions_lock);
return ORTE_ERR_NOT_FOUND;
}

39
orte/mca/rml/oob/rml_oob_ping.c Обычный файл
Просмотреть файл

@ -0,0 +1,39 @@
/*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "rml_oob.h"
#include "opal/util/argv.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
int
orte_rml_oob_ping(const char* uri,
const struct timeval* tv)
{
orte_process_name_t name;
char** uris;
char** ptr;
int rc;
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(uri, &name, &uris))) {
return rc;
}
ptr = uris;
while(ptr && *ptr) {
if(ORTE_SUCCESS == (rc = orte_rml_oob_module.active_oob->oob_ping(&name, *ptr, tv)))
break;
ptr++;
}
opal_argv_free(uris);
return rc;
}

229
orte/mca/rml/oob/rml_oob_recv.c Обычный файл
Просмотреть файл

@ -0,0 +1,229 @@
/*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "rml_oob.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "orte/dss/dss.h"
static void
orte_rml_recv_msg_callback(int status,
struct orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
orte_rml_oob_msg_t *msg = (orte_rml_oob_msg_t*) cbdata;
orte_rml_oob_msg_header_t *hdr =
(orte_rml_oob_msg_header_t*) iov[0].iov_base;
if (msg->msg_type == ORTE_RML_BLOCKING_RECV) {
/* blocking send */
msg->msg_status = status;
msg->msg_complete = true;
opal_condition_broadcast(&msg->msg_cond);
} else if (msg->msg_type == ORTE_RML_NONBLOCKING_IOV_RECV) {
/* non-blocking iovec send */
if (status > 0) {
status -= sizeof(orte_rml_oob_msg_header_t);
}
msg->msg_cbfunc.iov(status, &hdr->origin, iov + 1, count - 1,
hdr->tag, msg->msg_cbdata);
if (!msg->msg_persistent) OBJ_RELEASE(msg);
} else if (msg->msg_type == ORTE_RML_NONBLOCKING_BUFFER_RECV) {
/* non-blocking buffer send */
status = orte_dss.load(&msg->msg_recv_buffer,
iov[1].iov_base,
iov[1].iov_len);
msg->msg_cbfunc.buffer(status, &hdr->origin, &msg->msg_recv_buffer,
hdr->tag, msg->msg_cbdata);
if (!msg->msg_persistent) OBJ_RELEASE(msg);
} else {
abort();
}
}
int
orte_rml_oob_recv(orte_process_name_t* peer,
struct iovec *iov,
int count,
orte_rml_tag_t tag,
int flags)
{
orte_rml_oob_msg_t *msg = OBJ_NEW(orte_rml_oob_msg_t);
int ret;
int i;
msg->msg_type = ORTE_RML_BLOCKING_RECV;
flags |= ORTE_RML_FLAG_RECURSIVE_CALLBACK;
msg->msg_data = malloc(sizeof(struct iovec) * (count + 1));
msg->msg_data[0].iov_base = &msg->msg_header;
msg->msg_data[0].iov_len = sizeof(orte_rml_oob_msg_header_t);
for (i = 0 ; i < count ; ++i) {
msg->msg_data[i + 1].iov_base = iov[i].iov_base;
msg->msg_data[i + 1].iov_len = iov[i].iov_len;
}
ret = orte_rml_oob_module.active_oob->oob_recv_nb(peer, msg->msg_data,
count + 1, tag, flags,
orte_rml_recv_msg_callback,
msg);
if (0 < ret) goto cleanup;
OPAL_THREAD_LOCK(&msg->msg_lock);
while (!msg->msg_complete) {
opal_condition_wait(&msg->msg_cond, &msg->msg_lock);
}
ret = msg->msg_status;
OPAL_THREAD_UNLOCK(&msg->msg_lock);
cleanup:
OBJ_RELEASE(msg);
if (ret > 0) {
ret -= sizeof(struct orte_rml_oob_msg_header_t);
}
return ret;
}
int
orte_rml_oob_recv_nb(orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
int flags,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_oob_msg_t *msg = OBJ_NEW(orte_rml_oob_msg_t);
int ret;
int i;
msg->msg_type = ORTE_RML_NONBLOCKING_IOV_RECV;
msg->msg_persistent = (flags & ORTE_RML_PERSISTENT) ? true : false;
msg->msg_cbfunc.iov = cbfunc;
msg->msg_cbdata = cbdata;
msg->msg_data = malloc(sizeof(struct iovec) * (count + 1));
msg->msg_data[0].iov_base = &msg->msg_header;
msg->msg_data[0].iov_len = sizeof(orte_rml_oob_msg_header_t);
for (i = 0 ; i < count ; ++i) {
msg->msg_data[i + 1].iov_base = iov[i].iov_base;
msg->msg_data[i + 1].iov_len = iov[i].iov_len;
}
ret = orte_rml_oob_module.active_oob->oob_recv_nb(peer, msg->msg_data,
count + 1, tag, flags,
orte_rml_recv_msg_callback,
msg);
if (0 < ret) OBJ_RELEASE(msg);
return ret;
}
int
orte_rml_oob_recv_buffer(orte_process_name_t* peer,
orte_buffer_t *buf,
orte_rml_tag_t tag,
int flags)
{
orte_rml_oob_msg_t *msg = OBJ_NEW(orte_rml_oob_msg_t);
int ret;
msg->msg_type = ORTE_RML_BLOCKING_RECV;
flags |= (ORTE_RML_FLAG_RECURSIVE_CALLBACK | ORTE_RML_ALLOC);
msg->msg_data = malloc(sizeof(struct iovec) * 2);
msg->msg_data[0].iov_base = &msg->msg_header;
msg->msg_data[0].iov_len = sizeof(orte_rml_oob_msg_header_t);
msg->msg_data[1].iov_base = NULL;
msg->msg_data[1].iov_len = 0;
ret = orte_rml_oob_module.active_oob->oob_recv_nb(peer, msg->msg_data,
2, tag, flags,
orte_rml_recv_msg_callback,
msg);
if (0 < ret) goto cleanup;
OPAL_THREAD_LOCK(&msg->msg_lock);
while (!msg->msg_complete) {
opal_condition_wait(&msg->msg_cond, &msg->msg_lock);
}
ret = msg->msg_status;
OPAL_THREAD_UNLOCK(&msg->msg_lock);
if (ret > 0) {
ret = orte_dss.load(buf,
msg->msg_data[1].iov_base,
msg->msg_data[1].iov_len);
}
cleanup:
OBJ_RELEASE(msg);
return ret;
}
int
orte_rml_oob_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
int flags,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_oob_msg_t *msg = OBJ_NEW(orte_rml_oob_msg_t);
int ret;
msg->msg_data = malloc(sizeof(struct iovec) * 2);
msg->msg_data[0].iov_base = &msg->msg_header;
msg->msg_data[0].iov_len = sizeof(orte_rml_oob_msg_header_t);
msg->msg_data[1].iov_base = NULL;
msg->msg_data[1].iov_len = 0;
msg->msg_type = ORTE_RML_NONBLOCKING_BUFFER_RECV;
msg->msg_persistent = (flags & ORTE_RML_PERSISTENT) ? true : false;
msg->msg_cbfunc.buffer = cbfunc;
msg->msg_cbdata = cbdata;
flags |= ORTE_RML_ALLOC;
ret = orte_rml_oob_module.active_oob->oob_recv_nb(peer,
msg->msg_data,
2,
tag, flags,
orte_rml_recv_msg_callback,
msg);
if (0 < ret) OBJ_RELEASE(msg);
return ret;
}
int
orte_rml_oob_recv_cancel(orte_process_name_t* peer,
orte_rml_tag_t tag)
{
return orte_rml_oob_module.active_oob->oob_recv_cancel(peer, tag);
}

285
orte/mca/rml/oob/rml_oob_send.c Обычный файл
Просмотреть файл

@ -0,0 +1,285 @@
/*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "rml_oob.h"
#include "orte/mca/routed/routed.h"
#include "orte/mca/oob/oob.h"
#include "orte/mca/oob/base/base.h"
#include "orte/dss/dss.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
static void
orte_rml_send_msg_callback(int status,
struct orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
orte_rml_oob_msg_t *msg = (orte_rml_oob_msg_t*) cbdata;
orte_rml_oob_msg_header_t *hdr =
(orte_rml_oob_msg_header_t*) iov[0].iov_base;
if (msg->msg_type == ORTE_RML_BLOCKING_SEND) {
/* blocking send */
if (status > 0) {
msg->msg_status = status - sizeof(orte_rml_oob_msg_header_t);
} else {
msg->msg_status = status;
}
msg->msg_complete = true;
opal_condition_broadcast(&msg->msg_cond);
} else if (msg->msg_type == ORTE_RML_NONBLOCKING_IOV_SEND) {
/* non-blocking iovec send */
if (status > 0) {
status -= sizeof(orte_rml_oob_msg_header_t);
}
msg->msg_cbfunc.iov(status, peer, iov + 1, count - 1,
hdr->tag, msg->msg_cbdata);
OBJ_RELEASE(msg);
} else if (msg->msg_type == ORTE_RML_NONBLOCKING_BUFFER_SEND) {
/* non-blocking buffer send */
if (status > 0) {
status -= sizeof(orte_rml_oob_msg_header_t);
}
msg->msg_cbfunc.buffer(status, peer, msg->user_buffer,
hdr->tag, msg->msg_cbdata);
OBJ_RELEASE(msg->user_buffer);
OBJ_RELEASE(msg);
} else {
abort();
}
}
int
orte_rml_oob_send(orte_process_name_t* peer,
struct iovec *iov,
int count,
int tag,
int flags)
{
orte_rml_oob_msg_t *msg = OBJ_NEW(orte_rml_oob_msg_t);
int ret;
orte_process_name_t next;
int real_tag;
int i;
int bytes = 0;
msg->msg_type = ORTE_RML_BLOCKING_SEND;
flags |= ORTE_RML_FLAG_RECURSIVE_CALLBACK;
next = orte_routed.get_route(peer);
if (next.vpid == ORTE_VPID_INVALID) {
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
return ORTE_ERR_ADDRESSEE_UNKNOWN;
}
msg->msg_data = malloc(sizeof(struct iovec) * (count + 1));
msg->msg_data[0].iov_base = &msg->msg_header;
msg->msg_data[0].iov_len = sizeof(orte_rml_oob_msg_header_t);
bytes += msg->msg_data[0].iov_len;
for (i = 0 ; i < count ; ++i) {
msg->msg_data[i + 1].iov_base = iov[i].iov_base;
msg->msg_data[i + 1].iov_len = iov[i].iov_len;
bytes += msg->msg_data[i + 1].iov_len;
}
msg->msg_header.origin = *ORTE_PROC_MY_NAME;
msg->msg_header.destination = *peer;
msg->msg_header.tag = tag;
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &next, peer)) {
real_tag = tag;
} else {
real_tag = ORTE_RML_TAG_RML_ROUTE;
}
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
msg->msg_data,
count + 1,
real_tag,
flags,
orte_rml_send_msg_callback,
msg);
if (0 < ret) goto cleanup;
OPAL_THREAD_LOCK(&msg->msg_lock);
while (!msg->msg_complete) {
opal_condition_wait(&msg->msg_cond, &msg->msg_lock);
}
ret = msg->msg_status;
OPAL_THREAD_UNLOCK(&msg->msg_lock);
cleanup:
OBJ_RELEASE(msg);
return ret;
}
int
orte_rml_oob_send_nb(orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
int flags,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_oob_msg_t *msg = OBJ_NEW(orte_rml_oob_msg_t);
int ret;
int real_tag;
orte_process_name_t next;
int i;
int bytes = 0;
msg->msg_type = ORTE_RML_NONBLOCKING_IOV_SEND;
msg->msg_cbfunc.iov = cbfunc;
msg->msg_cbdata = cbdata;
next = orte_routed.get_route(peer);
if (next.vpid == ORTE_VPID_INVALID) {
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
return ORTE_ERR_ADDRESSEE_UNKNOWN;
}
msg->msg_data = malloc(sizeof(struct iovec) * (count + 1));
msg->msg_data[0].iov_base = &msg->msg_header;
msg->msg_data[0].iov_len = sizeof(orte_rml_oob_msg_header_t);
bytes += msg->msg_data[0].iov_len;
for (i = 0 ; i < count ; ++i) {
msg->msg_data[i + 1].iov_base = iov[i].iov_base;
msg->msg_data[i + 1].iov_len = iov[i].iov_len;
bytes += msg->msg_data[i + 1].iov_len;
}
msg->msg_header.origin = *ORTE_PROC_MY_NAME;
msg->msg_header.destination = *peer;
msg->msg_header.tag = tag;
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &next, peer)) {
real_tag = tag;
} else {
real_tag = ORTE_RML_TAG_RML_ROUTE;
}
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
msg->msg_data,
count + 1,
real_tag,
flags,
orte_rml_send_msg_callback,
msg);
if (ret < 0) OBJ_RELEASE(msg);
return ret;
}
int
orte_rml_oob_send_buffer(orte_process_name_t* peer,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
int flags)
{
int ret;
void *dataptr;
orte_std_cntr_t datalen;
struct iovec iov[1];
/* first build iovec from buffer information */
ret = orte_dss.unload(buffer, &dataptr, &datalen);
if (ret != ORTE_SUCCESS) return ret;
orte_dss.load(buffer, dataptr, datalen);
iov[0].iov_base = (IOVBASE_TYPE*)dataptr;
iov[0].iov_len = datalen;
return orte_rml_oob_send(peer, iov, 1, tag, flags);
}
int
orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
orte_buffer_t* buffer,
orte_rml_tag_t tag,
int flags,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_oob_msg_t *msg = OBJ_NEW(orte_rml_oob_msg_t);
void *dataptr;
orte_std_cntr_t datalen;
int ret;
int real_tag;
orte_process_name_t next;
int bytes = 0;
/* first build iovec from buffer information */
ret = orte_dss.unload(buffer, &dataptr, &datalen);
if (ORTE_SUCCESS != ret) {
OBJ_RELEASE(msg);
return ret;
}
orte_dss.load(buffer, dataptr, datalen);
msg->msg_type = ORTE_RML_NONBLOCKING_BUFFER_SEND;
msg->msg_cbfunc.buffer = cbfunc;
msg->msg_cbdata = cbdata;
msg->user_buffer = buffer;
msg->msg_data = malloc(sizeof(struct iovec) * 2);
next = orte_routed.get_route(peer);
if (next.vpid == ORTE_VPID_INVALID) {
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
return ORTE_ERR_ADDRESSEE_UNKNOWN;
}
msg->msg_data[0].iov_base = &msg->msg_header;
msg->msg_data[0].iov_len = sizeof(orte_rml_oob_msg_header_t);
bytes += msg->msg_data[0].iov_len;
msg->msg_data[1].iov_base = (IOVBASE_TYPE*)dataptr;
msg->msg_data[1].iov_len = datalen;
bytes += msg->msg_data[1].iov_len;
msg->msg_header.origin = *ORTE_PROC_MY_NAME;
msg->msg_header.destination = *peer;
msg->msg_header.tag = tag;
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &next, peer)) {
real_tag = tag;
} else {
real_tag = ORTE_RML_TAG_RML_ROUTE;
}
OBJ_RETAIN(buffer);
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
msg->msg_data,
2,
real_tag,
flags,
orte_rml_send_msg_callback,
msg);
if (ret < 0) {
OBJ_RELEASE(msg);
OBJ_RELEASE(buffer);
}
return ret;
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше