1
1

Merge pull request #6350 from rhc54/topic/reg

Move from the use of regex to compression
Этот коммит содержится в:
Ralph Castain 2019-02-08 12:54:14 -08:00 коммит произвёл GitHub
родитель fcbc7ea298 fc0b0938a7
Коммит 89c1b0c7ed
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
115 изменённых файлов: 2448 добавлений и 5216 удалений

Просмотреть файл

@ -18,6 +18,8 @@
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow

Просмотреть файл

@ -3,6 +3,7 @@
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -14,7 +15,6 @@ headers += \
base/base.h
libmca_compress_la_SOURCES += \
base/compress_base_open.c \
base/compress_base_close.c \
base/compress_base_frame.c \
base/compress_base_select.c \
base/compress_base_fns.c

Просмотреть файл

@ -3,6 +3,7 @@
* University Research and Technology
* Corporation. All rights reserved.
*
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -27,6 +28,12 @@
extern "C" {
#endif
typedef struct {
size_t compress_limit;
} opal_compress_base_t;
OPAL_DECLSPEC extern opal_compress_base_t opal_compress_base;
/**
* Initialize the COMPRESS MCA framework
*

Просмотреть файл

@ -1,36 +0,0 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <string.h>
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/include/opal/constants.h"
#include "opal/mca/compress/compress.h"
#include "opal/mca/compress/base/base.h"
int opal_compress_base_close(void)
{
/* Compression currently only used with C/R */
if( !opal_cr_is_enabled ) {
opal_output_verbose(10, opal_compress_base_framework.framework_output,
"compress:open: FT is not enabled, skipping!");
return OPAL_SUCCESS;
}
/* Call the component's finalize routine */
if( NULL != opal_compress.finalize ) {
opal_compress.finalize();
}
/* Close all available modules that are open */
return mca_base_framework_components_close (&opal_compress_base_framework, NULL);
}

Просмотреть файл

@ -6,6 +6,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -23,14 +24,31 @@
/*
* Globals
*/
static bool compress_block(uint8_t *inbytes,
size_t inlen,
uint8_t **outbytes,
size_t *olen)
{
return false;
}
static bool decompress_block(uint8_t **outbytes, size_t olen,
uint8_t *inbytes, size_t len)
{
return false;
}
opal_compress_base_module_t opal_compress = {
NULL, /* init */
NULL, /* finalize */
NULL, /* compress */
NULL, /* compress_nb */
NULL, /* decompress */
NULL /* decompress_nb */
NULL, /* decompress_nb */
compress_block,
decompress_block
};
opal_compress_base_t opal_compress_base = {0};
opal_compress_base_component_t opal_compress_base_selected_component = {{0}};
@ -42,6 +60,12 @@ MCA_BASE_FRAMEWORK_DECLARE(opal, compress, "COMPRESS MCA",
static int opal_compress_base_register(mca_base_register_flag_t flags)
{
opal_compress_base.compress_limit = 4096;
(void) mca_base_var_register("opal", "compress", "base", "limit",
"Threshold beyond which data will be compressed",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY, &opal_compress_base.compress_limit);
return OPAL_SUCCESS;
}
@ -51,13 +75,17 @@ static int opal_compress_base_register(mca_base_register_flag_t flags)
*/
int opal_compress_base_open(mca_base_open_flag_t flags)
{
/* Compression currently only used with C/R */
if(!opal_cr_is_enabled) {
opal_output_verbose(10, opal_compress_base_framework.framework_output,
"compress:open: FT is not enabled, skipping!");
return OPAL_SUCCESS;
}
/* Open up all available components */
return mca_base_framework_components_open(&opal_compress_base_framework, flags);
}
int opal_compress_base_close(void)
{
/* Call the component's finalize routine */
if( NULL != opal_compress.finalize ) {
opal_compress.finalize();
}
/* Close all available modules that are open */
return mca_base_framework_components_close (&opal_compress_base_framework, NULL);
}

Просмотреть файл

@ -7,6 +7,7 @@
*
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -29,17 +30,10 @@
int opal_compress_base_select(void)
{
int ret, exit_status = OPAL_SUCCESS;
int ret = OPAL_SUCCESS;
opal_compress_base_component_t *best_component = NULL;
opal_compress_base_module_t *best_module = NULL;
/* Compression currently only used with C/R */
if( !opal_cr_is_enabled ) {
opal_output_verbose(10, opal_compress_base_framework.framework_output,
"compress:open: FT is not enabled, skipping!");
return OPAL_SUCCESS;
}
/*
* Select the best component
*/
@ -47,8 +41,8 @@ int opal_compress_base_select(void)
&opal_compress_base_framework.framework_components,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component, NULL) ) {
/* This will only happen if no component was selected */
exit_status = OPAL_ERROR;
/* This will only happen if no component was selected,
* in which case we use the default one */
goto cleanup;
}
@ -58,12 +52,11 @@ int opal_compress_base_select(void)
/* Initialize the winner */
if (NULL != best_module) {
if (OPAL_SUCCESS != (ret = best_module->init()) ) {
exit_status = ret;
goto cleanup;
}
opal_compress = *best_module;
}
cleanup:
return exit_status;
return ret;
}

Просмотреть файл

@ -4,6 +4,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -65,22 +66,39 @@ opal_compress_bzip_component_t mca_compress_bzip_component = {
}
};
static bool nocompress(uint8_t *inbytes,
size_t inlen,
uint8_t **outbytes,
size_t *olen)
{
return false;
}
static bool nodecompress(uint8_t **outbytes, size_t olen,
uint8_t *inbytes, size_t len)
{
return false;
}
/*
* Bzip module
*/
static opal_compress_base_module_t loc_module = {
/** Initialization Function */
opal_compress_bzip_module_init,
.init = opal_compress_bzip_module_init,
/** Finalization Function */
opal_compress_bzip_module_finalize,
.finalize = opal_compress_bzip_module_finalize,
/** Compress Function */
opal_compress_bzip_compress,
opal_compress_bzip_compress_nb,
.compress = opal_compress_bzip_compress,
.compress_nb = opal_compress_bzip_compress_nb,
/** Decompress Function */
opal_compress_bzip_decompress,
opal_compress_bzip_decompress_nb
.decompress = opal_compress_bzip_decompress,
.decompress_nb = opal_compress_bzip_decompress_nb,
.compress_block = nocompress,
.decompress_block = nodecompress
};
static int compress_bzip_register (void)

Просмотреть файл

@ -6,6 +6,7 @@
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -82,6 +83,20 @@ typedef int (*opal_compress_base_module_decompress_fn_t)
typedef int (*opal_compress_base_module_decompress_nb_fn_t)
(char * cname, char **fname, pid_t *child_pid);
/**
* Compress a string
*
* Arguments:
*
*/
typedef bool (*opal_compress_base_module_compress_string_fn_t)(uint8_t *inbytes,
size_t inlen,
uint8_t **outbytes,
size_t *olen);
typedef bool (*opal_compress_base_module_decompress_string_fn_t)(uint8_t **outbytes, size_t olen,
uint8_t *inbytes, size_t len);
/**
* Structure for COMPRESS components.
*/
@ -117,6 +132,10 @@ struct opal_compress_base_module_1_0_0_t {
/** Decompress Interface */
opal_compress_base_module_decompress_fn_t decompress;
opal_compress_base_module_decompress_nb_fn_t decompress_nb;
/* COMPRESS STRING */
opal_compress_base_module_compress_string_fn_t compress_block;
opal_compress_base_module_decompress_string_fn_t decompress_block;
};
typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_1_0_0_t;
typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_t;

Просмотреть файл

Просмотреть файл

@ -1,5 +1,5 @@
#
# Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#

Просмотреть файл

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow

0
opal/mca/compress/gzip/.opal_ignore Обычный файл
Просмотреть файл

Просмотреть файл

@ -4,6 +4,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -65,22 +66,39 @@ opal_compress_gzip_component_t mca_compress_gzip_component = {
}
};
static bool nocompress(uint8_t *inbytes,
size_t inlen,
uint8_t **outbytes,
size_t *olen)
{
return false;
}
static bool nodecompress(uint8_t **outbytes, size_t olen,
uint8_t *inbytes, size_t len)
{
return false;
}
/*
* Gzip module
*/
static opal_compress_base_module_t loc_module = {
/** Initialization Function */
opal_compress_gzip_module_init,
.init = opal_compress_gzip_module_init,
/** Finalization Function */
opal_compress_gzip_module_finalize,
.finalize = opal_compress_gzip_module_finalize,
/** Compress Function */
opal_compress_gzip_compress,
opal_compress_gzip_compress_nb,
.compress = opal_compress_gzip_compress,
.compress_nb = opal_compress_gzip_compress_nb,
/** Decompress Function */
opal_compress_gzip_decompress,
opal_compress_gzip_decompress_nb
.decompress = opal_compress_gzip_decompress,
.decompress_nb = opal_compress_gzip_decompress_nb,
.compress_block = nocompress,
.decompress_block = nodecompress
};
static int compress_gzip_register (void)

0
opal/mca/compress/reverse/.opal_ignore Обычный файл
Просмотреть файл

Просмотреть файл

@ -1,5 +1,5 @@
#
# Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#

Просмотреть файл

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2018 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow

42
opal/mca/compress/zlib/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,42 @@
#
# Copyright (c) 2004-2010 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(compress_zlib_CPPFLAGS)
sources = \
compress_zlib.h \
compress_zlib_component.c \
compress_zlib.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_compress_zlib_DSO
component_noinst =
component_install = mca_compress_zlib.la
else
component_noinst = libmca_compress_zlib.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_compress_zlib_la_SOURCES = $(sources)
mca_compress_zlib_la_LDFLAGS = -module -avoid-version $(compress_zlib_LDFLAGS)
mca_compress_zlib_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la $(compress_zlib_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_compress_zlib_la_SOURCES = $(sources)
libmca_compress_zlib_la_LDFLAGS = -module -avoid-version $(compress_zlib_LDFLAGS)
libmca_compress_zlib_la_LIBADD = $(compress_zlib_LIBS)

133
opal/mca/compress/zlib/compress_zlib.c Обычный файл
Просмотреть файл

@ -0,0 +1,133 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#if HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <zlib.h>
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/util/printf.h"
#include "opal/constants.h"
#include "opal/util/basename.h"
#include "opal/mca/compress/compress.h"
#include "opal/mca/compress/base/base.h"
#include "compress_zlib.h"
int opal_compress_zlib_module_init(void)
{
return OPAL_SUCCESS;
}
int opal_compress_zlib_module_finalize(void)
{
return OPAL_SUCCESS;
}
bool opal_compress_zlib_compress_block(uint8_t *inbytes,
size_t inlen,
uint8_t **outbytes,
size_t *olen)
{
z_stream strm;
size_t len;
uint8_t *tmp;
if (inlen < opal_compress_base.compress_limit) {
return false;
}
opal_output_verbose(2, opal_compress_base_framework.framework_output,
"COMPRESSING");
/* set default output */
*outbytes = NULL;
*olen = 0;
/* setup the stream */
memset (&strm, 0, sizeof (strm));
deflateInit (&strm, 9);
/* get an upper bound on the required output storage */
len = deflateBound(&strm, inlen);
if (NULL == (tmp = (uint8_t*)malloc(len))) {
return false;
}
strm.next_in = inbytes;
strm.avail_in = inlen;
/* allocating the upper bound guarantees zlib will
* always successfully compress into the available space */
strm.avail_out = len;
strm.next_out = tmp;
deflate (&strm, Z_FINISH);
deflateEnd (&strm);
*outbytes = tmp;
*olen = len - strm.avail_out;
opal_output_verbose(2, opal_compress_base_framework.framework_output,
"\tINSIZE %d OUTSIZE %d", (int)inlen, (int)*olen);
return true; // we did the compression
}
bool opal_compress_zlib_uncompress_block(uint8_t **outbytes, size_t olen,
uint8_t *inbytes, size_t len)
{
uint8_t *dest;
z_stream strm;
/* set the default error answer */
*outbytes = NULL;
opal_output_verbose(2, opal_compress_base_framework.framework_output, "DECOMPRESS");
/* setting destination to the fully decompressed size */
dest = (uint8_t*)malloc(olen);
if (NULL == dest) {
return false;
}
memset (&strm, 0, sizeof (strm));
if (Z_OK != inflateInit(&strm)) {
free(dest);
return false;
}
strm.avail_in = len;
strm.next_in = inbytes;
strm.avail_out = olen;
strm.next_out = dest;
if (Z_STREAM_END != inflate (&strm, Z_FINISH)) {
opal_output(0, "\tDECOMPRESS FAILED: %s", strm.msg);
}
inflateEnd (&strm);
*outbytes = dest;
opal_output_verbose(2, opal_compress_base_framework.framework_output,
"\tINSIZE: %d OUTSIZE %d", (int)len, (int)olen);
return true;
}

66
opal/mca/compress/zlib/compress_zlib.h Обычный файл
Просмотреть файл

@ -0,0 +1,66 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* ZLIB COMPRESS component
*
* Uses the zlib library
*/
#ifndef MCA_COMPRESS_ZLIB_EXPORT_H
#define MCA_COMPRESS_ZLIB_EXPORT_H
#include "opal_config.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/compress/compress.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Local Component structures
*/
struct opal_compress_zlib_component_t {
opal_compress_base_component_t super; /** Base COMPRESS component */
};
typedef struct opal_compress_zlib_component_t opal_compress_zlib_component_t;
extern opal_compress_zlib_component_t mca_compress_zlib_component;
int opal_compress_zlib_component_query(mca_base_module_t **module, int *priority);
/*
* Module functions
*/
int opal_compress_zlib_module_init(void);
int opal_compress_zlib_module_finalize(void);
/*
* Actual funcationality
*/
bool opal_compress_zlib_compress_block(uint8_t *inbytes,
size_t inlen,
uint8_t **outbytes,
size_t *olen);
bool opal_compress_zlib_uncompress_block(uint8_t **outbytes, size_t olen,
uint8_t *inbytes, size_t len);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_COMPRESS_ZLIB_EXPORT_H */

Просмотреть файл

@ -0,0 +1,149 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/mca/compress/compress.h"
#include "opal/mca/compress/base/base.h"
#include "compress_zlib.h"
/*
* Public string for version number
*/
const char *opal_compress_zlib_component_version_string =
"OPAL COMPRESS zlib MCA component version " OPAL_VERSION;
/*
* Local functionality
*/
static int compress_zlib_register (void);
static int compress_zlib_open(void);
static int compress_zlib_close(void);
/*
* Instantiate the public struct with all of our public information
* and pointer to our public functions in it
*/
opal_compress_zlib_component_t mca_compress_zlib_component = {
/* First do the base component stuff */
{
/* Handle the general mca_component_t struct containing
* meta information about the component itzlib
*/
.base_version = {
OPAL_COMPRESS_BASE_VERSION_2_0_0,
/* Component name and version */
.mca_component_name = "zlib",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
/* Component open and close functions */
.mca_open_component = compress_zlib_open,
.mca_close_component = compress_zlib_close,
.mca_query_component = opal_compress_zlib_component_query,
.mca_register_component_params = compress_zlib_register
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.verbose = 0,
.output_handle = -1,
}
};
/*
* Zlib module
*/
static opal_compress_base_module_t loc_module = {
/** Initialization Function */
.init = opal_compress_zlib_module_init,
/** Finalization Function */
.finalize = opal_compress_zlib_module_finalize,
/** Compress Function */
.compress_block = opal_compress_zlib_compress_block,
/** Decompress Function */
.decompress_block = opal_compress_zlib_uncompress_block,
};
static int compress_zlib_register (void)
{
int ret;
mca_compress_zlib_component.super.priority = 50;
ret = mca_base_component_var_register (&mca_compress_zlib_component.super.base_version,
"priority", "Priority of the COMPRESS zlib component "
"(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
&mca_compress_zlib_component.super.priority);
if (0 > ret) {
return ret;
}
mca_compress_zlib_component.super.verbose = 0;
ret = mca_base_component_var_register (&mca_compress_zlib_component.super.base_version,
"verbose",
"Verbose level for the COMPRESS zlib component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_compress_zlib_component.super.verbose);
return (0 > ret) ? ret : OPAL_SUCCESS;
}
static int compress_zlib_open(void)
{
/* If there is a custom verbose level for this component than use it
* otherwise take our parents level and output channel
*/
if ( 0 != mca_compress_zlib_component.super.verbose) {
mca_compress_zlib_component.super.output_handle = opal_output_open(NULL);
opal_output_set_verbosity(mca_compress_zlib_component.super.output_handle,
mca_compress_zlib_component.super.verbose);
} else {
mca_compress_zlib_component.super.output_handle = opal_compress_base_framework.framework_output;
}
/*
* Debug output
*/
opal_output_verbose(10, mca_compress_zlib_component.super.output_handle,
"compress:zlib: open()");
opal_output_verbose(20, mca_compress_zlib_component.super.output_handle,
"compress:zlib: open: priority = %d",
mca_compress_zlib_component.super.priority);
opal_output_verbose(20, mca_compress_zlib_component.super.output_handle,
"compress:zlib: open: verbosity = %d",
mca_compress_zlib_component.super.verbose);
return OPAL_SUCCESS;
}
static int compress_zlib_close(void)
{
return OPAL_SUCCESS;
}
int opal_compress_zlib_component_query(mca_base_module_t **module, int *priority)
{
*module = (mca_base_module_t *)&loc_module;
*priority = mca_compress_zlib_component.super.priority;
return OPAL_SUCCESS;
}

102
opal/mca/compress/zlib/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,102 @@
# -*- shell-script -*-
#
# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved.
# Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_compress_zlib_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_opal_compress_zlib_CONFIG],[
AC_CONFIG_FILES([opal/mca/compress/zlib/Makefile])
OPAL_VAR_SCOPE_PUSH([opal_zlib_dir opal_zlib_libdir opal_zlib_standard_lib_location opal_zlib_standard_header_location opal_check_zlib_save_CPPFLAGS opal_check_zlib_save_LDFLAGS opal_check_zlib_save_LIBS])
AC_ARG_WITH([zlib],
[AC_HELP_STRING([--with-zlib=DIR],
[Search for zlib headers and libraries in DIR ])])
AC_ARG_WITH([zlib-libdir],
[AC_HELP_STRING([--with-zlib-libdir=DIR],
[Search for zlib libraries in DIR ])])
opal_check_zlib_save_CPPFLAGS="$CPPFLAGS"
opal_check_zlib_save_LDFLAGS="$LDFLAGS"
opal_check_zlib_save_LIBS="$LIBS"
opal_zlib_support=0
if test "$with_zlib" != "no"; then
AC_MSG_CHECKING([for zlib in])
if test ! -z "$with_zlib" && test "$with_zlib" != "yes"; then
opal_zlib_dir=$with_zlib
opal_zlib_source=$with_zlib
opal_zlib_standard_header_location=no
opal_zlib_standard_lib_location=no
AS_IF([test -z "$with_zlib_libdir" || test "$with_zlib_libdir" = "yes"],
[if test -d $with_zlib/lib; then
opal_zlib_libdir=$with_zlib/lib
elif test -d $with_zlib/lib64; then
opal_zlib_libdir=$with_zlib/lib64
else
AC_MSG_RESULT([Could not find $with_zlib/lib or $with_zlib/lib64])
AC_MSG_ERROR([Can not continue])
fi
AC_MSG_RESULT([$opal_zlib_dir and $opal_zlib_libdir])],
[AC_MSG_RESULT([$with_zlib_libdir])])
else
AC_MSG_RESULT([(default search paths)])
opal_zlib_source=standard
opal_zlib_standard_header_location=yes
opal_zlib_standard_lib_location=yes
fi
AS_IF([test ! -z "$with_zlib_libdir" && test "$with_zlib_libdir" != "yes"],
[opal_zlib_libdir="$with_zlib_libdir"
opal_zlib_standard_lib_location=no])
OPAL_CHECK_PACKAGE([opal_zlib],
[zlib.h],
[z],
[deflate],
[-lz],
[$opal_zlib_dir],
[$opal_zlib_libdir],
[opal_zlib_support=1],
[opal_zlib_support=0])
fi
if test ! -z "$with_zlib" && test "$with_zlib" != "no" && test "$opal_zlib_support" != "1"; then
AC_MSG_WARN([ZLIB SUPPORT REQUESTED AND NOT FOUND])
AC_MSG_ERROR([CANNOT CONTINUE])
fi
AC_MSG_CHECKING([will zlib support be built])
if test "$opal_zlib_support" != "1"; then
AC_MSG_RESULT([no])
else
AC_MSG_RESULT([yes])
fi
CPPFLAGS="$opal_check_zlib_save_CPPFLAGS"
LDFLAGS="$opal_check_zlib_save_LDFLAGS"
LIBS="$opal_check_zlib_save_LIBS"
AS_IF([test "$opal_zlib_support" = "1"],
[$1
OPAL_SUMMARY_ADD([[External Packages]],[[ZLIB]], [opal_zlib], [yes ($opal_zlib_source)])],
[$2])
# substitute in the things needed to build psm2
AC_SUBST([compress_zlib_CFLAGS])
AC_SUBST([compress_zlib_CPPFLAGS])
AC_SUBST([compress_zlib_LDFLAGS])
AC_SUBST([compress_zlib_LIBS])
OPAL_VAR_SCOPE_POP
])dnl

Просмотреть файл

@ -3,5 +3,5 @@
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: INTEL
status: maintenance
owner:project
status:maintenance

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
@ -61,9 +61,7 @@
#include "opal/mca/if/base/base.h"
#include "opal/dss/dss.h"
#include "opal/mca/shmem/base/base.h"
#if OPAL_ENABLE_FT_CR == 1
#include "opal/mca/compress/base/base.h"
#endif
#include "opal/threads/threads.h"
#include "opal/threads/tsd.h"
@ -524,7 +522,8 @@ opal_init_util(int* pargc, char*** pargv)
static mca_base_framework_t *opal_init_frameworks[] = {
&opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework,
&opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework,
&opal_shmem_base_framework, &opal_reachable_base_framework, NULL,
&opal_shmem_base_framework, &opal_reachable_base_framework, &opal_compress_base_framework,
NULL,
};
int
@ -585,5 +584,10 @@ opal_init(int* pargc, char*** pargv)
return opal_init_error ("opal_reachable_base_select", ret);
}
/* Intitialize compress framework */
if (OPAL_SUCCESS != (ret = opal_compress_base_select())) {
return opal_init_error ("opal_compress_base_select", ret);
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -9,7 +9,7 @@
* Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -277,8 +277,7 @@ static void job_errors(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid),
ORTE_NAME_PRINT(&jdata->originator)));
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&jdata->originator, answer,
if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer,
ORTE_RML_TAG_LAUNCH_RESP,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
@ -358,7 +357,6 @@ static void proc_errors(int fd, short args, void *cbdata)
orte_proc_state_t state = caddy->proc_state;
int i;
int32_t i32, *i32ptr;
char *rtmod;
ORTE_ACQUIRE_OBJECT(caddy);
@ -381,7 +379,6 @@ static void proc_errors(int fd, short args, void *cbdata)
goto cleanup;
}
pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
/* we MUST handle a communication failure before doing anything else
* as it requires some special care to avoid normal termination issues
@ -412,9 +409,9 @@ static void proc_errors(int fd, short args, void *cbdata)
"%s Comm failure: daemons terminating - recording daemon %s as gone",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc)));
/* remove from dependent routes, if it is one */
orte_routed.route_lost(rtmod, proc);
orte_routed.route_lost(proc);
/* if all my routes and local children are gone, then terminate ourselves */
if (0 == orte_routed.num_routes(rtmod)) {
if (0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_ALIVE) && proct->state < ORTE_PROC_STATE_UNTERMINATED) {
@ -435,7 +432,7 @@ static void proc_errors(int fd, short args, void *cbdata)
OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output,
"%s Comm failure: %d routes remain alive",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)orte_routed.num_routes(rtmod)));
(int)orte_routed.num_routes()));
}
goto cleanup;
}
@ -493,7 +490,7 @@ static void proc_errors(int fd, short args, void *cbdata)
}
/* if all my routes and children are gone, then terminate
ourselves nicely (i.e., this is a normal termination) */
if (0 == orte_routed.num_routes(rtmod)) {
if (0 == orte_routed.num_routes()) {
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
"%s errmgr:default:hnp all routes gone - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -718,7 +715,7 @@ static void proc_errors(int fd, short args, void *cbdata)
default_hnp_abort(jdata);
}
/* remove from dependent routes, if it is one */
orte_routed.route_lost(rtmod, proc);
orte_routed.route_lost(proc);
break;
case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG:
@ -841,7 +838,7 @@ static void default_hnp_abort(orte_job_t *jdata)
i32ptr = &i32;
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) {
/* warn user */
orte_show_help("help-errmgr-base.txt", "normal-termination-but", true,
orte_show_help("help-errmgr-base.txt", "normal-termination-but", true,
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
i32, (1 == i32) ? "process returned\na non-zero exit code" :

Просмотреть файл

@ -8,7 +8,7 @@
* reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
@ -204,8 +204,7 @@ static void orted_abort(int error_code, char *fmt, ...)
}
/* send it */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -303,8 +302,7 @@ static void job_errors(int fd, short args, void *cbdata)
goto cleanup;
}
/* send it */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -321,7 +319,6 @@ static void proc_errors(int fd, short args, void *cbdata)
orte_job_t *jdata;
orte_process_name_t *proc = &caddy->name;
orte_proc_state_t state = caddy->proc_state;
char *rtmod;
orte_proc_t *child, *ptr;
opal_buffer_t *alert;
orte_plm_cmd_flag_t cmd;
@ -386,9 +383,6 @@ static void proc_errors(int fd, short args, void *cbdata)
goto cleanup;
}
/* get our management conduit's routed module name */
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
if (ORTE_PROC_STATE_COMM_FAILED == state) {
/* if it is our own connection, ignore it */
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, proc)) {
@ -427,56 +421,6 @@ static void proc_errors(int fd, short args, void *cbdata)
"%s errmgr:default:orted daemon %s exited",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
/* if we are using static ports, then it is possible that the HNP
* will not see this termination. So if the HNP didn't order us
* to terminate, then we should ensure it knows */
if (orte_static_ports && !orte_orteds_term_ordered) {
/* send an alert to the HNP */
alert = OBJ_NEW(opal_buffer_t);
/* pack update state command */
cmd = ORTE_PLM_UPDATE_PROC_STATE;
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) {
ORTE_ERROR_LOG(rc);
return;
}
/* get the proc_t */
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
goto cleanup;
}
/* set the exit code to reflect the problem */
child->exit_code = ORTE_ERR_COMM_FAILURE;
/* pack only the data for this daemon - have to start with the jobid
* so the receiver can unpack it correctly
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &proc->jobid, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return;
}
/* now pack the daemon's info */
if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, child))) {
ORTE_ERROR_LOG(rc);
return;
}
/* send it */
OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output,
"%s errmgr:default_orted reporting lost connection to daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(alert);
}
/* mark that we notified the HNP for this job so we don't do it again */
orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
/* continue on */
goto cleanup;
}
if (orte_orteds_term_ordered) {
/* are any of my children still alive */
@ -494,7 +438,7 @@ static void proc_errors(int fd, short args, void *cbdata)
}
/* if all my routes and children are gone, then terminate
ourselves nicely (i.e., this is a normal termination) */
if (0 == orte_routed.num_routes(rtmod)) {
if (0 == orte_routed.num_routes()) {
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
"%s errmgr:default:orted all routes gone - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -503,7 +447,7 @@ static void proc_errors(int fd, short args, void *cbdata)
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
"%s errmgr:default:orted not exiting, num_routes() == %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)orte_routed.num_routes(rtmod)));
(int)orte_routed.num_routes()));
}
}
/* if not, then we can continue */
@ -563,8 +507,7 @@ static void proc_errors(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name),
jdata->num_local_procs));
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -629,7 +572,7 @@ static void proc_errors(int fd, short args, void *cbdata)
}
/* if all my routes and children are gone, then terminate
ourselves nicely (i.e., this is a normal termination) */
if (0 == orte_routed.num_routes(rtmod)) {
if (0 == orte_routed.num_routes()) {
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
"%s errmgr:default:orted all routes gone - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -671,8 +614,7 @@ static void proc_errors(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(&child->name),
jdata->num_local_procs));
/* send it */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -727,8 +669,7 @@ static void proc_errors(int fd, short args, void *cbdata)
OBJ_RELEASE(jdata);
/* send it */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2018 The University of Tennessee and The University
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -12,9 +12,9 @@
* Copyright (c) 2009 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2019 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
@ -57,7 +57,6 @@
#include "orte/mca/iof/base/base.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/odls/base/base.h"
#include "orte/mca/regx/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/base.h"
#include "orte/mca/filem/base/base.h"
@ -110,7 +109,6 @@ int orte_ess_base_orted_setup(void)
hwloc_obj_t obj;
unsigned i, j;
orte_topology_t *t;
opal_list_t transports;
orte_ess_base_signal_t *sig;
int idx;
@ -449,27 +447,6 @@ int orte_ess_base_orted_setup(void)
goto error;
}
/* get a conduit for our use - we never route IO over fabric */
OBJ_CONSTRUCT(&transports, opal_list_t);
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
error = "orte_rml_open_mgmt_conduit";
goto error;
}
OPAL_LIST_DESTRUCT(&transports);
OBJ_CONSTRUCT(&transports, opal_list_t);
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
error = "orte_rml_open_coll_conduit";
goto error;
}
OPAL_LIST_DESTRUCT(&transports);
/*
* Group communications
*/
@ -515,17 +492,6 @@ int orte_ess_base_orted_setup(void)
error = "orte_rmaps_base_select";
goto error;
}
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_regx_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_regx_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_regx_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_regx_base_select";
goto error;
}
/* if a topology file was given, then the rmaps framework open
* will have reset our topology. Ensure we always get the right
@ -542,46 +508,6 @@ int orte_ess_base_orted_setup(void)
opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
}
/* if we were given the host list, then we need to setup
* the daemon info so the RML can function properly
* without requiring a wireup stage. This must be done
* after we enable_comm as that function determines our
* own port, which we need in order to construct the nidmap
*/
if (NULL != orte_node_regex) {
if (ORTE_SUCCESS != (ret = orte_regx.nidmap_parse(orte_node_regex))) {
ORTE_ERROR_LOG(ret);
error = "construct nidmap";
goto error;
}
/* be sure to update the routing tree so any tree spawn operation
* properly gets the number of children underneath us */
orte_routed.update_routing_plan(NULL);
}
if (orte_static_ports || orte_fwd_mpirun_port) {
if (NULL == orte_node_regex) {
/* we didn't get the node info */
error = "cannot construct daemon map for static ports - no node map info";
goto error;
}
/* extract the node info from the environment and
* build a nidmap from it - this will update the
* routing plan as well
*/
if (ORTE_SUCCESS != (ret = orte_regx.build_daemon_nidmap())) {
ORTE_ERROR_LOG(ret);
error = "construct daemon map from static ports";
goto error;
}
/* be sure to update the routing tree so the initial "phone home"
* to mpirun goes through the tree if static ports were enabled
*/
orte_routed.update_routing_plan(NULL);
/* routing can be enabled */
orte_routed_base.routing_enabled = true;
}
/* Now provide a chance for the PLM
* to perform any module-specific init functions. This
* needs to occur AFTER the communications are setup
@ -661,28 +587,20 @@ int orte_ess_base_orted_finalize(void)
pmix_server_finalize();
(void) mca_base_framework_close(&opal_pmix_base_framework);
/* release the conduits */
orte_rml.close_conduit(orte_mgmt_conduit);
orte_rml.close_conduit(orte_coll_conduit);
/* close frameworks */
(void) mca_base_framework_close(&orte_filem_base_framework);
(void) mca_base_framework_close(&orte_grpcomm_base_framework);
(void) mca_base_framework_close(&orte_iof_base_framework);
/* first stage shutdown of the errmgr, deregister the handler but keep
* the required facilities until the rml and oob are offline */
orte_errmgr.finalize();
(void) mca_base_framework_close(&orte_errmgr_base_framework);
(void) mca_base_framework_close(&orte_plm_base_framework);
/* make sure our local procs are dead */
orte_odls.kill_local_procs(NULL);
(void) mca_base_framework_close(&orte_regx_base_framework);
(void) mca_base_framework_close(&orte_rmaps_base_framework);
(void) mca_base_framework_close(&orte_rtc_base_framework);
(void) mca_base_framework_close(&orte_odls_base_framework);
(void) mca_base_framework_close(&orte_routed_base_framework);
(void) mca_base_framework_close(&orte_rml_base_framework);
(void) mca_base_framework_close(&orte_oob_base_framework);
(void) mca_base_framework_close(&orte_errmgr_base_framework);
(void) mca_base_framework_close(&orte_state_base_framework);
/* remove our use of the session directory tree */
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
@ -751,8 +669,7 @@ static void signal_forward_callback(int fd, short event, void *arg)
}
/* send it to ourselves */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_NAME, cmd,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, cmd,
ORTE_RML_TAG_DAEMON,
NULL, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
*
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
@ -94,7 +94,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
{
int ret;
char *error = NULL;
opal_list_t transports;
opal_list_t info;
opal_value_t *kv, *knext, val;
opal_pmix_query_t *q;
@ -222,13 +221,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
goto error;
}
/* get a conduit for our use - we never route IO over fabric */
OBJ_CONSTRUCT(&transports, opal_list_t);
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
orte_mgmt_conduit = orte_rml.open_conduit(&transports);
OPAL_LIST_DESTRUCT(&transports);
/* we -may- need to know the name of the head
* of our session directory tree, particularly the
* tmp base where any other session directories on
@ -269,7 +261,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
val.data.string = NULL;
OBJ_DESTRUCT(&val);
/* set the route to be direct */
if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
orte_finalize();
exit(1);
@ -277,7 +269,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
/* connect to the HNP so we can recv forwarded output */
buf = OBJ_NEW(opal_buffer_t);
ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_HNP,
ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP,
buf, ORTE_RML_TAG_WARMUP_CONNECTION,
orte_rml_send_callback, NULL);
if (ORTE_SUCCESS != ret) {
@ -287,7 +279,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
}
/* set the target hnp as our lifeline so we will terminate if it exits */
orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP);
orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
/* setup the IOF */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
@ -317,8 +309,6 @@ int orte_ess_base_tool_finalize(void)
{
orte_wait_finalize();
orte_rml.close_conduit(orte_mgmt_conduit);
/* if I am a tool, then all I will have done is
* a very small subset of orte_init - ensure that
* I only back those elements out

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -66,7 +66,6 @@
#include "orte/mca/grpcomm/base/base.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/regx/base/base.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/plm/plm.h"
#include "orte/mca/odls/base/base.h"
@ -142,7 +141,6 @@ static int rte_init(void)
uint32_t h;
int idx;
orte_topology_t *t;
opal_list_t transports;
orte_ess_base_signal_t *sig;
opal_value_t val;
@ -371,27 +369,6 @@ static int rte_init(void)
goto error;
}
/* get a conduit for our use - we never route IO over fabric */
OBJ_CONSTRUCT(&transports, opal_list_t);
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
error = "orte_rml_open_mgmt_conduit";
goto error;
}
OPAL_LIST_DESTRUCT(&transports);
OBJ_CONSTRUCT(&transports, opal_list_t);
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
error = "orte_rml_open_coll_conduit";
goto error;
}
OPAL_LIST_DESTRUCT(&transports);
/* it is now safe to start the pmix server */
pmix_server_start();
@ -555,16 +532,6 @@ static int rte_init(void)
error = "orte_rmaps_base_find_available";
goto error;
}
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_regx_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_regx_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_regx_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_regx_base_select";
goto error;
}
/* if a topology file was given, then the rmaps framework open
* will have reset our topology. Ensure we always get the right
@ -787,10 +754,6 @@ static int rte_finalize(void)
fflush(stdout);
fflush(stderr);
/* release the conduits */
orte_rml.close_conduit(orte_mgmt_conduit);
orte_rml.close_conduit(orte_coll_conduit);
(void) mca_base_framework_close(&orte_iof_base_framework);
(void) mca_base_framework_close(&orte_rtc_base_framework);
(void) mca_base_framework_close(&orte_odls_base_framework);

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -208,8 +208,7 @@ static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sende
return;
}
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer,
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
ORTE_RML_TAG_FILEM_BASE_RESP,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -301,8 +300,7 @@ static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender,
goto CLEANUP;
}
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer,
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
ORTE_RML_TAG_FILEM_BASE_RESP,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -870,8 +870,7 @@ static void send_complete(char *file, int status)
OBJ_RELEASE(buf);
return;
}
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, buf,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
ORTE_RML_TAG_FILEM_BASE_RESP,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -33,7 +33,7 @@
#include "opal/dss/dss.h"
#include "orte/util/compress.h"
#include "opal/mca/compress/compress.h"
#include "orte/util/proc_info.h"
#include "orte/util/error_strings.h"
#include "orte/mca/errmgr/errmgr.h"
@ -231,7 +231,6 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
orte_namelist_t *nm;
opal_list_t children;
size_t n;
char *routed;
/* search the existing tracker list to see if this already exists */
OPAL_LIST_FOREACH(coll, &orte_grpcomm_base.ongoing, orte_grpcomm_coll_t) {
@ -279,38 +278,30 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
return NULL;
}
/* get the routed module for our conduit */
routed = orte_rml.get_routed(orte_coll_conduit);
if (NULL == routed) {
/* this conduit is not routed, so we expect all daemons
* to directly participate */
coll->nexpected = coll->ndmns;
} else {
/* cycle thru the array of daemons and compare them to our
* children in the routing tree, counting the ones that match
* so we know how many daemons we should receive contributions from */
OBJ_CONSTRUCT(&children, opal_list_t);
orte_routed.get_routing_list(routed, &children);
while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) {
for (n=0; n < coll->ndmns; n++) {
if (nm->name.vpid == coll->dmns[n]) {
coll->nexpected++;
break;
}
}
OBJ_RELEASE(nm);
}
OPAL_LIST_DESTRUCT(&children);
/* see if I am in the array of participants - note that I may
* be in the rollup tree even though I'm not participating
* in the collective itself */
/* cycle thru the array of daemons and compare them to our
* children in the routing tree, counting the ones that match
* so we know how many daemons we should receive contributions from */
OBJ_CONSTRUCT(&children, opal_list_t);
orte_routed.get_routing_list(&children);
while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) {
for (n=0; n < coll->ndmns; n++) {
if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) {
if (nm->name.vpid == coll->dmns[n]) {
coll->nexpected++;
break;
}
}
OBJ_RELEASE(nm);
}
OPAL_LIST_DESTRUCT(&children);
/* see if I am in the array of participants - note that I may
* be in the rollup tree even though I'm not participating
* in the collective itself */
for (n=0; n < coll->ndmns; n++) {
if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) {
coll->nexpected++;
break;
}
}
return coll;
@ -506,8 +497,8 @@ static int pack_xcast(orte_grpcomm_signature_t *sig,
}
/* see if we want to compress this message */
if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used,
&cmpdata, &cmplen)) {
if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used,
&cmpdata, &cmplen)) {
/* the data was compressed - mark that we compressed it */
flag = 1;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &flag, 1, OPAL_INT8))) {

Просмотреть файл

@ -1,41 +0,0 @@
#
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(grpcomm_brucks_CPPFLAGS)
sources = \
grpcomm_brucks.h \
grpcomm_brucks_module.c \
grpcomm_brucks_component.c
# Make the output library in this brucksory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_grpcomm_brucks_DSO
component_noinst =
component_install = mca_grpcomm_brucks.la
else
component_noinst = libmca_grpcomm_brucks.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_grpcomm_brucks_la_SOURCES = $(sources)
mca_grpcomm_brucks_la_LDFLAGS = -module -avoid-version
mca_grpcomm_brucks_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_grpcomm_brucks_la_SOURCES =$(sources)
libmca_grpcomm_brucks_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,31 +0,0 @@
/* -*- C -*-
*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#ifndef GRPCOMM_BRUCKS_H
#define GRPCOMM_BRUCKS_H
#include "orte_config.h"
#include "orte/mca/grpcomm/grpcomm.h"
BEGIN_C_DECLS
/*
* Grpcomm interfaces
*/
ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_brucks_component;
extern orte_grpcomm_base_module_t orte_grpcomm_brucks_module;
END_C_DECLS
#endif

Просмотреть файл

@ -1,84 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/mca/mca.h"
#include "opal/runtime/opal_params.h"
#include "orte/util/proc_info.h"
#include "grpcomm_brucks.h"
static int my_priority=5;
static int brucks_open(void);
static int brucks_close(void);
static int brucks_query(mca_base_module_t **module, int *priority);
static int brucks_register(void);
/*
* Struct of function pointers that need to be initialized
*/
orte_grpcomm_base_component_t mca_grpcomm_brucks_component = {
.base_version = {
ORTE_GRPCOMM_BASE_VERSION_3_0_0,
.mca_component_name = "brucks",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
.mca_open_component = brucks_open,
.mca_close_component = brucks_close,
.mca_query_component = brucks_query,
.mca_register_component_params = brucks_register,
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};
static int brucks_register(void)
{
mca_base_component_t *c = &mca_grpcomm_brucks_component.base_version;
/* make the priority adjustable so users can select
* brucks for use by apps without affecting daemons
*/
my_priority = 50;
(void) mca_base_component_var_register(c, "priority",
"Priority of the grpcomm brucks component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&my_priority);
return ORTE_SUCCESS;
}
/* Open the component */
static int brucks_open(void)
{
return ORTE_SUCCESS;
}
static int brucks_close(void)
{
return ORTE_SUCCESS;
}
static int brucks_query(mca_base_module_t **module, int *priority)
{
*priority = my_priority;
*module = (mca_base_module_t *)&orte_grpcomm_brucks_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,388 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "orte/runtime/orte_wait.h"
#include <math.h>
#include <string.h>
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
#include "orte/mca/grpcomm/base/base.h"
#include "grpcomm_brucks.h"
/* Static API's */
static int init(void);
static void finalize(void);
static int allgather(orte_grpcomm_coll_t *coll,
opal_buffer_t *buf);
static void brucks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance);
static int brucks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance);
static void brucks_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
static int brucks_finalize_coll(orte_grpcomm_coll_t *coll, int ret);
/* Module def */
orte_grpcomm_base_module_t orte_grpcomm_brucks_module = {
init,
finalize,
NULL,
allgather
};
/**
* Initialize the module
*/
static int init(void)
{
/* setup recv for distance data */
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ALLGATHER_BRUCKS,
ORTE_RML_PERSISTENT,
brucks_allgather_recv_dist, NULL);
return OPAL_SUCCESS;
}
/**
* Finalize the module
*/
static void finalize(void)
{
/* cancel the recv */
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_BRUCKS);
}
static int allgather(orte_grpcomm_coll_t *coll,
opal_buffer_t *sendbuf)
{
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks algo employed for %d processes",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns));
/* get my own rank */
coll->my_rank = ORTE_VPID_INVALID;
for (orte_vpid_t nv = 0; nv < coll->ndmns; nv++) {
if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) {
coll->my_rank = nv;
break;
}
}
/* check for bozo case */
if (ORTE_VPID_INVALID == coll->my_rank) {
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"Peer not found"));
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
brucks_finalize_coll(coll, ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* record that we contributed */
coll->nreported = 1;
/* mark local data received */
if (coll->ndmns > 1) {
opal_bitmap_init (&coll->distance_mask_recv, (uint32_t) log2 (coll->ndmns) + 1);
}
/* start by seeding the collection with our own data */
opal_dss.copy_payload(&coll->bucket, sendbuf);
/* process data */
brucks_allgather_process_data (coll, 0);
return ORTE_SUCCESS;
}
static int brucks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) {
opal_buffer_t *send_buf;
int rc;
send_buf = OBJ_NEW(opal_buffer_t);
/* pack the signature */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->sig, 1, ORTE_SIGNATURE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
/* pack the current distance */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
/* pack the number of daemons included in the payload */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->nreported, 1, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
/* pack the data */
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks SENDING TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
if (0 > (rc = orte_rml.send_buffer_nb(peer, send_buf,
ORTE_RML_TAG_ALLGATHER_BRUCKS,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
};
return ORTE_SUCCESS;
}
static int brucks_allgather_process_buffered (orte_grpcomm_coll_t *coll, uint32_t distance) {
opal_buffer_t *buffer;
size_t nreceived;
int32_t cnt = 1;
int rc;
/* check whether data for next distance is available*/
if (NULL == coll->buffers || NULL == coll->buffers[distance]) {
return 0;
}
buffer = coll->buffers[distance];
coll->buffers[distance] = NULL;
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks %u distance data found",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
rc = opal_dss.unpack (buffer, &nreceived, &cnt, OPAL_SIZE);
if (OPAL_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return rc;
}
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) {
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return rc;
}
coll->nreported += nreceived;
orte_grpcomm_base_mark_distance_recv (coll, distance);
OBJ_RELEASE(buffer);
return 1;
}
static void brucks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) {
/* Communication step:
At every step i, rank r:
- doubles the distance
- sends message containing all data collected so far to rank r - distance
- receives message containing all data collected so far from rank (r + distance)
*/
uint32_t log2ndmns = (uint32_t) log2 (coll->ndmns);
uint32_t last_round;
orte_process_name_t peer;
orte_vpid_t nv;
int rc;
/* NTH: calculate in which round we should send the final data. this is the first
* round in which we have data from at least (coll->ndmns - (1 << log2ndmns))
* daemons. alternatively we could just send when distance reaches log2ndmns but
* that could end up sending more data than needed */
last_round = (uint32_t) ceil (log2 ((double) (coll->ndmns - (1 << log2ndmns))));
peer.jobid = ORTE_PROC_MY_NAME->jobid;
while (distance < log2ndmns) {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks process distance %u)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
/* first send my current contents */
nv = (coll->ndmns + coll->my_rank - (1 << distance)) % coll->ndmns;
peer.vpid = coll->dmns[nv];
brucks_allgather_send_dist(coll, &peer, distance);
if (distance == last_round) {
/* have enough data to send the final round now */
nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns;
peer.vpid = coll->dmns[nv];
brucks_allgather_send_dist(coll, &peer, log2ndmns);
}
rc = brucks_allgather_process_buffered (coll, distance);
if (!rc) {
break;
} else if (rc < 0) {
return;
}
++distance;
}
if (distance == log2ndmns) {
if (distance == last_round) {
/* need to send the final round now */
nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns;
peer.vpid = coll->dmns[nv];
brucks_allgather_send_dist(coll, &peer, log2ndmns);
}
/* check if the final message is already queued */
rc = brucks_allgather_process_buffered (coll, distance);
if (rc < 0) {
return;
}
}
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks reported %lu process from %lu",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported,
(unsigned long)coll->ndmns));
/* if we are done, then complete things. we may get data from more daemons than expected */
if (coll->nreported >= coll->ndmns){
brucks_finalize_coll(coll, ORTE_SUCCESS);
}
}
static void brucks_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int32_t cnt;
int rc;
orte_grpcomm_signature_t *sig;
orte_grpcomm_coll_t *coll;
uint32_t distance;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks RECEIVING FROM %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* unpack the signature */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) {
ORTE_ERROR_LOG(rc);
return;
}
/* check for the tracker and create it if not found */
if (NULL == (coll = orte_grpcomm_base_get_tracker(sig, true))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(sig);
return;
}
/* unpack the distance */
distance = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return;
}
assert(0 == orte_grpcomm_base_check_distance_recv(coll, distance));
/* Check whether we can process next distance */
if (coll->nreported && (!distance || orte_grpcomm_base_check_distance_recv(coll, distance - 1))) {
size_t nreceived;
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks data from %d distance received, "
"Process the next distance.",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
/* capture any provided content */
rc = opal_dss.unpack (buffer, &nreceived, &cnt, OPAL_SIZE);
if (OPAL_SUCCESS != rc) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return;
}
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return;
}
coll->nreported += nreceived;
orte_grpcomm_base_mark_distance_recv(coll, distance);
brucks_allgather_process_data(coll, distance + 1);
} else {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks data from %d distance received, "
"still waiting for data.",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
if (NULL == coll->buffers) {
if (NULL == (coll->buffers = (opal_buffer_t **) calloc ((uint32_t) log2 (coll->ndmns) + 1, sizeof(opal_buffer_t *)))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return;
}
}
if (NULL == (coll->buffers[distance] = OBJ_NEW(opal_buffer_t))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return;
}
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance], buffer))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brucks_finalize_coll(coll, rc);
return;
}
}
OBJ_RELEASE(sig);
}
static int brucks_finalize_coll(orte_grpcomm_coll_t *coll, int ret)
{
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brucks declared collective complete",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* execute the callback */
if (NULL != coll->cbfunc) {
coll->cbfunc(ret, &coll->bucket, coll->cbdata);
}
opal_list_remove_item(&orte_grpcomm_base.ongoing, &coll->super);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -5,7 +5,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
* rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -24,15 +24,15 @@
#include "opal/dss/dss.h"
#include "opal/class/opal_list.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/compress/compress.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/regx/regx.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/base/base.h"
#include "orte/mca/state/state.h"
#include "orte/util/compress.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/proc_info.h"
#include "orte/mca/grpcomm/base/base.h"
@ -112,8 +112,7 @@ static int xcast(orte_vpid_t *vpids,
/* send it to the HNP (could be myself) for relay */
OBJ_RETAIN(buf); // we'll let the RML release it
if (0 > (rc = orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
@ -153,8 +152,7 @@ static int allgather(orte_grpcomm_coll_t *coll,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* send the info to ourselves for tracking */
rc = orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_NAME, relay,
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay,
ORTE_RML_TAG_ALLGATHER_DIRECT,
orte_rml_send_callback, NULL);
return rc;
@ -245,8 +243,7 @@ static void allgather_recv(int status, orte_process_name_t* sender,
/* transfer the collected bucket */
opal_dss.copy_payload(reply, &coll->bucket);
/* send the info to our parent */
rc = orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_PARENT, reply,
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, reply,
ORTE_RML_TAG_ALLGATHER_DIRECT,
orte_rml_send_callback, NULL);
}
@ -271,7 +268,6 @@ static void xcast_recv(int status, orte_process_name_t* sender,
opal_list_t coll;
orte_grpcomm_signature_t *sig;
orte_rml_tag_t tag;
char *rtmod, *nidmap;
size_t inlen, cmplen;
uint8_t *packed_data, *cmpdata;
int32_t nvals, i;
@ -336,7 +332,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
return;
}
/* decompress the data */
if (orte_util_uncompress_block(&cmpdata, cmplen,
if (opal_compress.decompress_block(&cmpdata, cmplen,
packed_data, inlen)) {
/* the data has been uncompressed */
opal_dss.load(&datbuf, cmpdata, cmplen);
@ -372,9 +368,6 @@ static void xcast_recv(int status, orte_process_name_t* sender,
return;
}
/* get our conduit's routed module name */
rtmod = orte_rml.get_routed(orte_coll_conduit);
/* if this is headed for the daemon command processor,
* then we first need to check for add_local_procs
* as that command includes some needed wireup info */
@ -409,48 +402,27 @@ static void xcast_recv(int status, orte_process_name_t* sender,
ORTE_ERROR_LOG(ret);
goto relay;
}
/* unpack the nidmap string - may be NULL */
cnt = 1;
if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &nidmap, &cnt, OPAL_STRING))) {
ORTE_ERROR_LOG(ret);
goto relay;
}
if (NULL != nidmap) {
if (ORTE_SUCCESS != (ret = orte_regx.nidmap_parse(nidmap))) {
ORTE_ERROR_LOG(ret);
goto relay;
}
free(nidmap);
}
/* see if they included info on node capabilities */
/* unpack flag indicating if nidmap included */
cnt = 1;
if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) {
ORTE_ERROR_LOG(ret);
goto relay;
}
if (0 != flag) {
/* update our local nidmap, if required - the decode function
* knows what to do
*/
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:direct:xcast updating daemon nidmap",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (ret = orte_regx.decode_daemon_nodemap(data))) {
if (1 == flag) {
if (ORTE_SUCCESS != (ret = orte_util_decode_nidmap(data))) {
ORTE_ERROR_LOG(ret);
goto relay;
}
if (!ORTE_PROC_IS_HNP) {
/* update the routing plan - the HNP already did
* it when it computed the VM, so don't waste time
* re-doing it here */
orte_routed.update_routing_plan(rtmod);
orte_routed.update_routing_plan();
}
/* routing is now possible */
orte_routed_base.routing_enabled = true;
/* unpack the byte object */
/* unpack the wireup byte object */
cnt=1;
if (ORTE_SUCCESS != (ret = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(ret);
@ -544,7 +516,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
relay:
if (!orte_do_not_launch) {
/* get the list of next recipients from the routed module */
orte_routed.get_routing_list(rtmod, &coll);
orte_routed.get_routing_list(&coll);
/* if list is empty, no relay is required */
if (opal_list_is_empty(&coll)) {
@ -590,8 +562,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH);
continue;
}
if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(orte_coll_conduit,
&nm->name, rly, ORTE_RML_TAG_XCAST,
if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&nm->name, rly, ORTE_RML_TAG_XCAST,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(rly);

Просмотреть файл

@ -1,41 +0,0 @@
#
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(grpcomm_rcd_CPPFLAGS)
sources = \
grpcomm_rcd.h \
grpcomm_rcd.c \
grpcomm_rcd_component.c
# Make the output library in this rcdory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_grpcomm_rcd_DSO
component_noinst =
component_install = mca_grpcomm_rcd.la
else
component_noinst = libmca_grpcomm_rcd.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_grpcomm_rcd_la_SOURCES = $(sources)
mca_grpcomm_rcd_la_LDFLAGS = -module -avoid-version
mca_grpcomm_rcd_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_grpcomm_rcd_la_SOURCES =$(sources)
libmca_grpcomm_rcd_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,329 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All
* rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "orte/runtime/orte_wait.h"
#include <math.h>
#include <string.h>
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
#include "orte/mca/grpcomm/base/base.h"
#include "grpcomm_rcd.h"
/* Static API's */
static int init(void);
static void finalize(void);
static int allgather(orte_grpcomm_coll_t *coll,
opal_buffer_t *buf);
static void rcd_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance);
static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance);
static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret);
/* Module def */
orte_grpcomm_base_module_t orte_grpcomm_rcd_module = {
init,
finalize,
NULL,
allgather
};
/**
* Initialize the module
*/
static int init(void)
{
/* setup recv for distance data */
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ALLGATHER_RCD,
ORTE_RML_PERSISTENT,
rcd_allgather_recv_dist, NULL);
return OPAL_SUCCESS;
}
/**
* Finalize the module
*/
static void finalize(void)
{
/* cancel the recv */
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_RCD);
}
static int allgather(orte_grpcomm_coll_t *coll,
opal_buffer_t *sendbuf)
{
uint32_t log2ndmns;
/* check the number of involved daemons - if it is not a power of two,
* then we cannot do it */
if (0 == ((coll->ndmns != 0) && !(coll->ndmns & (coll->ndmns - 1)))) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
log2ndmns = log2 (coll->ndmns);
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub algo employed for %d daemons",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns));
/* mark local data received */
if (log2ndmns) {
opal_bitmap_init (&coll->distance_mask_recv, log2ndmns);
}
/* get my own rank */
coll->my_rank = ORTE_VPID_INVALID;
for (orte_vpid_t nv = 0 ; nv < coll->ndmns ; ++nv) {
if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) {
coll->my_rank = nv;
break;
}
}
/* check for bozo case */
if (ORTE_VPID_INVALID == coll->my_rank) {
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"My peer not found in daemons array"));
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rcd_finalize_coll(coll, ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* start by seeding the collection with our own data */
opal_dss.copy_payload(&coll->bucket, sendbuf);
coll->nreported = 1;
/* process data */
rcd_allgather_process_data (coll, 0);
return ORTE_SUCCESS;
}
static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) {
opal_buffer_t *send_buf;
int rc;
send_buf = OBJ_NEW(opal_buffer_t);
/* pack the signature */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->sig, 1, ORTE_SIGNATURE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
/* pack the distance */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
/* pack the data */
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub SENDING TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
if (0 > (rc = orte_rml.send_buffer_nb(orte_coll_conduit,
peer, send_buf,
ORTE_RML_TAG_ALLGATHER_RCD,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
};
return ORTE_SUCCESS;
}
static void rcd_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) {
/* Communication step:
At every step i, rank r:
- exchanges message containing all data collected so far with rank peer = (r ^ 2^i).
*/
uint32_t log2ndmns = log2(coll->ndmns);
orte_process_name_t peer;
orte_vpid_t nv;
int rc;
peer.jobid = ORTE_PROC_MY_NAME->jobid;
while (distance < log2ndmns) {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub process distance %u",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
/* first send my current contents */
nv = coll->my_rank ^ (1 << distance);
assert (nv < coll->ndmns);
peer.vpid = coll->dmns[nv];
rcd_allgather_send_dist(coll, &peer, distance);
/* check whether data for next distance is available */
if (NULL == coll->buffers || NULL == coll->buffers[distance]) {
break;
}
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub %u distance data found",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, coll->buffers[distance]))) {
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
coll->nreported += 1 << distance;
orte_grpcomm_base_mark_distance_recv(coll, distance);
OBJ_RELEASE(coll->buffers[distance]);
coll->buffers[distance] = NULL;
++distance;
}
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub reported %lu process from %lu",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported,
(unsigned long)coll->ndmns));
/* if we are done, then complete things */
if (coll->nreported == coll->ndmns) {
rcd_finalize_coll(coll, ORTE_SUCCESS);
}
}
static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int32_t cnt;
uint32_t distance;
int rc;
orte_grpcomm_signature_t *sig;
orte_grpcomm_coll_t *coll;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub RECEIVING FROM %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* unpack the signature */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) {
ORTE_ERROR_LOG(rc);
return;
}
/* check for the tracker and create it if not found */
if (NULL == (coll = orte_grpcomm_base_get_tracker(sig, true))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(sig);
return;
}
/* unpack the distance */
distance = -1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_UINT32))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
assert(distance >= 0 && 0 == orte_grpcomm_base_check_distance_recv(coll, distance));
/* Check whether we can process next distance */
if (coll->nreported && (!distance || orte_grpcomm_base_check_distance_recv(coll, (distance - 1)))) {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub data from %d distance received, "
"Process the next distance.",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
/* capture any provided content */
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
coll->nreported += (1 << distance);
orte_grpcomm_base_mark_distance_recv (coll, distance);
rcd_allgather_process_data (coll, distance + 1);
} else {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub data from %d distance received, "
"still waiting for data.",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
if (NULL == coll->buffers) {
coll->buffers = (opal_buffer_t **) calloc (log2 (coll->ndmns), sizeof (coll->buffers[0]));
if (NULL == coll->buffers) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
rcd_finalize_coll(coll, OPAL_ERR_OUT_OF_RESOURCE);
return;
}
}
if (NULL == (coll->buffers[distance] = OBJ_NEW(opal_buffer_t))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
rcd_finalize_coll(coll, OPAL_ERR_OUT_OF_RESOURCE);
return;
}
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance], buffer))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
}
OBJ_RELEASE(sig);
}
static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret)
{
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub declared collective complete",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* execute the callback */
if (NULL != coll->cbfunc) {
coll->cbfunc(ret, &coll->bucket, coll->cbdata);
}
opal_list_remove_item(&orte_grpcomm_base.ongoing, &coll->super);
OBJ_RELEASE(coll);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,31 +0,0 @@
/* -*- C -*-
*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#ifndef GRPCOMM_RCD_H
#define GRPCOMM_RCD_H
#include "orte_config.h"
#include "orte/mca/grpcomm/grpcomm.h"
BEGIN_C_DECLS
/*
* Grpcomm interfaces
*/
ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_rcd_component;
extern orte_grpcomm_base_module_t orte_grpcomm_rcd_module;
END_C_DECLS
#endif

Просмотреть файл

@ -1,84 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/mca/mca.h"
#include "opal/runtime/opal_params.h"
#include "orte/util/proc_info.h"
#include "grpcomm_rcd.h"
static int my_priority=5;
static int rcd_open(void);
static int rcd_close(void);
static int rcd_query(mca_base_module_t **module, int *priority);
static int rcd_register(void);
/*
* Struct of function pointers that need to be initialized
*/
orte_grpcomm_base_component_t mca_grpcomm_rcd_component = {
.base_version = {
ORTE_GRPCOMM_BASE_VERSION_3_0_0,
.mca_component_name = "rcd",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
.mca_open_component = rcd_open,
.mca_close_component = rcd_close,
.mca_query_component = rcd_query,
.mca_register_component_params = rcd_register,
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};
static int rcd_register(void)
{
mca_base_component_t *c = &mca_grpcomm_rcd_component.base_version;
/* make the priority adjustable so users can select
* rcd for use by apps without affecting daemons
*/
my_priority = 80;
(void) mca_base_component_var_register(c, "priority",
"Priority of the grpcomm rcd component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&my_priority);
return ORTE_SUCCESS;
}
/* Open the component */
static int rcd_open(void)
{
return ORTE_SUCCESS;
}
static int rcd_close(void)
{
return ORTE_SUCCESS;
}
static int rcd_query(mca_base_module_t **module, int *priority)
{
*priority = my_priority;
*module = (mca_base_module_t *)&orte_grpcomm_rcd_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: INTEL
status: maintenance

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC
* All rights reserved
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -109,9 +109,8 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host,
/* send the buffer to the host - this is either a daemon or
* a tool that requested IOF
*/
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
host, buf, ORTE_RML_TAG_IOF_PROXY,
orte_rml_send_callback, NULL))) {
if (0 > (rc = orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -149,8 +149,7 @@ BEGIN_C_DECLS
opal_dss.pack(buf, (b), 1, ORTE_NAME); \
\
/* send the buffer to the HNP */ \
orte_rml.send_buffer_nb(orte_mgmt_conduit, \
ORTE_PROC_MY_HNP, buf, \
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, \
ORTE_RML_TAG_IOF_HNP, \
orte_rml_send_callback, NULL); \
} while(0);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -513,8 +513,7 @@ static int orted_output(const orte_process_name_t* peer,
"%s iof:orted:output sending %d bytes to HNP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)strlen(msg)+1));
orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
orte_rml_send_callback, NULL);
return ORTE_SUCCESS;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -140,9 +140,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
"%s iof:orted:read handler sending %d bytes to HNP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));
orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
orte_rml_send_callback, NULL);
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
orte_rml_send_callback, NULL);
/* re-add the event */
ORTE_IOF_READ_ACTIVATE(rev);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2016 Intel Corporation. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -72,9 +72,8 @@ void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag)
(ORTE_IOF_XON == tag) ? "xon" : "xoff"));
/* send the buffer to the HNP */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
send_cb, NULL))) {
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
send_cb, NULL))) {
ORTE_ERROR_LOG(rc);
}
}

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -170,9 +170,8 @@ static int tool_pull(const orte_process_name_t* src_name,
/* send the buffer to the correct HNP */
ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid);
orte_rml.send_buffer_nb(orte_mgmt_conduit,
&hnp, buf, ORTE_RML_TAG_IOF_HNP,
send_cb, NULL);
orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP,
send_cb, NULL);
return ORTE_SUCCESS;
}
@ -220,9 +219,8 @@ static int tool_close(const orte_process_name_t* src_name,
/* send the buffer to the correct HNP */
ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid);
orte_rml.send_buffer_nb(orte_mgmt_conduit,
&hnp, buf, ORTE_RML_TAG_IOF_HNP,
send_cb, NULL);
orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP,
send_cb, NULL);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
@ -67,7 +67,6 @@
#include "orte/mca/ess/base/base.h"
#include "orte/mca/grpcomm/base/base.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/regx/regx.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rmaps/base/base.h"
@ -79,6 +78,7 @@
#include "orte/util/context_fns.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/session_dir.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
@ -148,7 +148,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
int8_t flag;
void *nptr;
uint32_t key;
char *nidmap;
orte_proc_t *dmn, *proc;
opal_value_t *val = NULL, *kv;
opal_list_t *modex, ilist;
@ -167,33 +166,21 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
return ORTE_SUCCESS;
}
/* if we couldn't provide the allocation regex on the orted
* cmd line, then we need to provide all the info here */
if (!orte_nidmap_communicated) {
if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &nidmap))) {
ORTE_ERROR_LOG(rc);
return rc;
}
orte_nidmap_communicated = true;
} else {
nidmap = NULL;
}
opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING);
if (NULL != nidmap) {
free(nidmap);
}
/* if we haven't already done so, provide the info on the
* capabilities of each node */
/* provide the nidmap - i.e., the map of hostnames
* and the vpid of the daemon running on each node.
* In a DVM, we should only have to do this once */
if (1 < orte_process_info.num_procs &&
(!orte_node_info_communicated ||
orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL))) {
/* mark that we did include this info */
flag = 1;
opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
if (ORTE_SUCCESS != (rc = orte_regx.encode_nodemap(buffer))) {
/* load the nidmap */
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, buffer))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* get wireup info for daemons */
if (NULL == (jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
@ -227,104 +214,100 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
} else {
/* the data is returned as a list of key-value pairs in the opal_value_t */
if (OPAL_PTR != val->type) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(wireup);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
}
modex = (opal_list_t*)val->data.ptr;
numbytes = (int32_t)opal_list_get_size(modex);
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
}
OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
}
}
OPAL_LIST_RELEASE(modex);
OBJ_RELEASE(val);
}
}
/* if we didn't rollup the connection info, then we have
* to provide a complete map of connection info */
if (!orte_static_ports && !orte_fwd_mpirun_port) {
for (v=1; v < jptr->procs->size; v++) {
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) {
continue;
/* the data is returned as a list of key-value pairs in the opal_value_t */
if (OPAL_PTR != val->type) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(wireup);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
}
modex = (opal_list_t*)val->data.ptr;
numbytes = (int32_t)opal_list_get_size(modex);
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
}
OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
}
val = NULL;
if (opal_pmix.legacy_get()) {
if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) {
}
OPAL_LIST_RELEASE(modex);
OBJ_RELEASE(val);
}
/* provide a complete map of connection info */
for (v=1; v < jptr->procs->size; v++) {
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) {
continue;
}
val = NULL;
if (opal_pmix.legacy_get()) {
if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
} else {
/* pack the name of the daemon */
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
} else {
/* pack the name of the daemon */
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
/* pack the URI */
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
OBJ_RELEASE(val);
}
} else {
if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) {
/* pack the URI */
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
} else {
/* the data is returned as a list of key-value pairs in the opal_value_t */
if (OPAL_PTR != val->type) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(buffer);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
modex = (opal_list_t*)val->data.ptr;
numbytes = (int32_t)opal_list_get_size(modex);
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
}
OPAL_LIST_RELEASE(modex);
OBJ_RELEASE(val);
}
OBJ_RELEASE(val);
}
} else {
if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
return rc;
} else {
/* the data is returned as a list of key-value pairs in the opal_value_t */
if (OPAL_PTR != val->type) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(buffer);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
modex = (opal_list_t*)val->data.ptr;
numbytes = (int32_t)opal_list_get_size(modex);
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
OBJ_RELEASE(wireup);
return rc;
}
}
OPAL_LIST_RELEASE(modex);
OBJ_RELEASE(val);
}
}
}
@ -417,17 +400,11 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
}
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
/* compute and pack the ppn regex */
if (ORTE_SUCCESS != (rc = orte_regx.generate_ppn(jdata, &nidmap))) {
/* compute and pack the ppn */
if (ORTE_SUCCESS != (rc = orte_util_generate_ppn(jdata, buffer))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
free(nidmap);
return rc;
}
free(nidmap);
}
/* get any application prep info */
@ -485,7 +462,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
orte_proc_t *pptr, *dmn;
orte_app_context_t *app;
int8_t flag;
char *ppn;
opal_value_t *kv;
opal_list_t local_support, cache;
opal_pmix_lock_t lock;
@ -623,29 +599,21 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
* and sent us the complete array of procs in the orte_job_t, so we
* don't need to do anything more here */
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
/* extract the ppn regex */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) {
/* load the ppn info into the job and node arrays - the
* function will ignore the data on the HNP as it already
* has the info */
if (ORTE_SUCCESS != (rc = orte_util_decode_ppn(jdata, buffer))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
if (!ORTE_PROC_IS_HNP) {
/* populate the node array of the job map and the proc array of
* the job object so we know how many procs are on each node */
if (ORTE_SUCCESS != (rc = orte_regx.parse_ppn(jdata, ppn))) {
ORTE_ERROR_LOG(rc);
free(ppn);
goto REPORT_ERROR;
}
/* now assign locations to the procs */
/* assign locations to the procs */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
ORTE_ERROR_LOG(rc);
free(ppn);
goto REPORT_ERROR;
}
}
free(ppn);
/* compute the ranks and add the proc objects
* to the jdata->procs array */

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -93,6 +93,9 @@ typedef uint8_t orte_daemon_cmd_flag_t;
/* tell DVM daemons to cleanup resources from job */
#define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34
/* pass node info */
#define ORTE_DAEMON_PASS_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 35
/*
* Struct written up the pipe from the child to the parent.
*/

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -144,11 +144,6 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata);
*/
ORTE_DECLSPEC void orte_oob_base_get_addr(char **uri);
/* Get the available transports and their attributes */
#define ORTE_OOB_GET_TRANSPORTS(u) orte_oob_base_get_transports(u)
ORTE_DECLSPEC void orte_oob_base_get_transports(opal_list_t *transports);
#if OPAL_ENABLE_FT_CR == 1
ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata);
#endif

Просмотреть файл

@ -2,7 +2,7 @@
/*
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -107,7 +107,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
component = (mca_oob_base_component_t*)cli->cli_component;
if (NULL != component->is_reachable) {
if (component->is_reachable(msg->routed, &msg->dst)) {
if (component->is_reachable(&msg->dst)) {
/* there is a way to reach this peer - record it
* so we don't waste this time again
*/
@ -170,7 +170,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
component = (mca_oob_base_component_t*)cli->cli_component;
/* is this peer reachable via this component? */
if (!component->is_reachable(msg->routed, &msg->dst)) {
if (!component->is_reachable(&msg->dst)) {
continue;
}
/* it is addressable, so attempt to send via that transport */
@ -384,30 +384,6 @@ static void process_uri(char *uri)
opal_argv_free(uris);
}
void orte_oob_base_get_transports(opal_list_t *transports)
{
mca_base_component_list_item_t *cli;
mca_oob_base_component_t *component;
orte_rml_pathway_t *p;
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s: get transports",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
component = (mca_oob_base_component_t*)cli->cli_component;
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s:get transports for component %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
component->oob_base.mca_component_name);
if (NULL != component->query_transports) {
if (NULL != (p = component->query_transports())) {
opal_list_append(transports, &p->super);
}
}
}
}
#if OPAL_ENABLE_FT_CR == 1
void orte_oob_base_ft_event(int sd, short argc, void *cbdata)
{

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -55,9 +55,8 @@ typedef int (*mca_oob_base_component_send_fn_t)(orte_rml_send_t *msg);
typedef char* (*mca_oob_base_component_get_addr_fn_t)(void);
typedef int (*mca_oob_base_component_set_addr_fn_t)(orte_process_name_t *peer,
char **uris);
typedef bool (*mca_oob_base_component_is_reachable_fn_t)(char *routed, orte_process_name_t *peer);
typedef bool (*mca_oob_base_component_is_reachable_fn_t)(orte_process_name_t *peer);
typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata);
typedef orte_rml_pathway_t* (*mca_oob_base_component_query_transports_fn_t)(void);
#if OPAL_ENABLE_FT_CR == 1
typedef int (*mca_oob_base_component_ft_event_fn_t)(int state);
@ -75,7 +74,6 @@ typedef struct {
mca_oob_base_component_get_addr_fn_t get_addr;
mca_oob_base_component_set_addr_fn_t set_addr;
mca_oob_base_component_is_reachable_fn_t is_reachable;
mca_oob_base_component_query_transports_fn_t query_transports;
#if OPAL_ENABLE_FT_CR == 1
mca_oob_base_component_ft_event_fn_t ft_event;
#endif

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -180,7 +180,7 @@ static void send_nb(orte_rml_send_t *msg)
/* do we have a route to this peer (could be direct)? */
hop = orte_routed.get_route(msg->routed, &msg->dst);
hop = orte_routed.get_route(&msg->dst);
/* do we know this hop? */
if (NULL == (peer = mca_oob_tcp_peer_lookup(&hop))) {
/* push this back to the component so it can try

Просмотреть файл

@ -14,7 +14,7 @@
* reserved.
* Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -103,8 +103,7 @@ static int component_send(orte_rml_send_t *msg);
static char* component_get_addr(void);
static int component_set_addr(orte_process_name_t *peer,
char **uris);
static bool component_is_reachable(char *rtmod, orte_process_name_t *peer);
static orte_rml_pathway_t* component_query_transports(void);
static bool component_is_reachable(orte_process_name_t *peer);
#if OPAL_ENABLE_FT_CR == 1
static int component_ft_event(int state);
#endif
@ -135,7 +134,6 @@ mca_oob_tcp_component_t mca_oob_tcp_component = {
.get_addr = component_get_addr,
.set_addr = component_set_addr,
.is_reachable = component_is_reachable,
.query_transports = component_query_transports,
#if OPAL_ENABLE_FT_CR == 1
.ft_event = component_ft_event,
#endif
@ -334,11 +332,6 @@ static int tcp_component_register(void)
if (NULL != mca_oob_tcp_component.tcp_static_ports ||
NULL != mca_oob_tcp_component.tcp6_static_ports) {
/* can't fwd mpirun port _and_ have static ports */
if (ORTE_PROC_IS_HNP && orte_fwd_mpirun_port) {
orte_show_help("help-oob-tcp.txt", "static-fwd", true);
return ORTE_ERR_NOT_AVAILABLE;
}
orte_static_ports = true;
}
@ -632,37 +625,6 @@ static int component_available(void)
return ORTE_SUCCESS;
}
static orte_rml_pathway_t* component_query_transports(void)
{
orte_rml_pathway_t *p;
char *qual;
/* if neither IPv4 or IPv6 connections are available, then
* we have nothing to support */
if (NULL == mca_oob_tcp_component.ipv4conns &&
NULL == mca_oob_tcp_component.ipv6conns) {
return NULL;
}
/* if we get here, then we support Ethernet and TCP */
p = OBJ_NEW(orte_rml_pathway_t);
p->component = strdup("oob");
orte_set_attribute(&p->attributes, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, "Ethernet", OPAL_STRING);
orte_set_attribute(&p->attributes, ORTE_RML_PROTOCOL_TYPE, ORTE_ATTR_LOCAL, "TCP", OPAL_STRING);
/* setup our qualifiers - we route communications, may have IPv4 and/or IPv6, etc. */
if (NULL != mca_oob_tcp_component.ipv4conns &&
NULL != mca_oob_tcp_component.ipv6conns) {
qual = "routed=true:ipv4:ipv6";
} else if (NULL == mca_oob_tcp_component.ipv6conns) {
qual = "routed=true:ipv4";
} else {
qual = "routed=true:ipv6";
}
orte_set_attribute(&p->attributes, ORTE_RML_QUALIFIER_ATTRIB, ORTE_ATTR_LOCAL, qual, OPAL_STRING);
return p;
}
/* Start all modules */
static int component_startup(void)
{
@ -1013,12 +975,12 @@ static int component_set_addr(orte_process_name_t *peer,
return ORTE_ERR_TAKE_NEXT_OPTION;
}
static bool component_is_reachable(char *rtmod, orte_process_name_t *peer)
static bool component_is_reachable(orte_process_name_t *peer)
{
orte_process_name_t hop;
/* if we have a route to this peer, then we can reach it */
hop = orte_routed.get_route(rtmod, peer);
hop = orte_routed.get_route(peer);
if (ORTE_JOBID_INVALID == hop.jobid ||
ORTE_VPID_INVALID == hop.vpid) {
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
@ -1107,7 +1069,7 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
if (!orte_finalizing) {
/* activate the proc state */
if (ORTE_SUCCESS != orte_routed.route_lost(pop->rtmod, &pop->peer)) {
if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) {
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST);
} else {
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED);
@ -1221,7 +1183,6 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata)
snd->count = mop->snd->hdr.nbytes;
snd->cbfunc.iov = NULL;
snd->cbdata = NULL;
snd->routed = strdup(mop->snd->hdr.routed);
/* activate the OOB send state */
ORTE_OOB_SEND(snd);
/* protect the data */
@ -1421,15 +1382,11 @@ OBJ_CLASS_INSTANCE(mca_oob_tcp_addr_t,
static void pop_cons(mca_oob_tcp_peer_op_t *pop)
{
pop->rtmod = NULL;
pop->net = NULL;
pop->port = NULL;
}
static void pop_des(mca_oob_tcp_peer_op_t *pop)
{
if (NULL != pop->rtmod) {
free(pop->rtmod);
}
if (NULL != pop->net) {
free(pop->net);
}

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
@ -334,7 +334,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
* an event in the component event base, and so it will fire async
* from us if we are in our own progress thread
*/
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_failed_to_connect);
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_failed_to_connect);
/* FIXME: post any messages in the send queue back to the OOB
* level for reassignment
*/
@ -937,7 +937,7 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
/* set the peer into the component and OOB-level peer tables to indicate
* that we know this peer and we will be handling him
*/
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module);
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module);
/* connected */
tcp_peer_connected(peer);
@ -968,7 +968,7 @@ static void tcp_peer_connected(mca_oob_tcp_peer_t* peer)
}
/* update the route */
orte_routed.update_route(NULL, &peer->name, &peer->name);
orte_routed.update_route(&peer->name, &peer->name);
/* initiate send of first message on queue */
if (NULL == peer->send_msg) {
@ -1027,7 +1027,7 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t *peer)
/* inform the component-level that we have lost a connection so
* it can decide what to do about it.
*/
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_lost_connection);
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_lost_connection);
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
/* nothing more to do */
@ -1238,7 +1238,7 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer)
/* set the peer into the component and OOB-level peer tables to indicate
* that we know this peer and we will be handling him
*/
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module);
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module);
tcp_peer_connected(peer);
if (!peer->recv_ev_active) {

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -73,21 +73,15 @@ typedef struct {
uint16_t af_family;
char *net;
char *port;
char *rtmod;
} mca_oob_tcp_peer_op_t;
OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t);
#define ORTE_ACTIVATE_TCP_CMP_OP(p, r, cbfunc) \
#define ORTE_ACTIVATE_TCP_CMP_OP(p, cbfunc) \
do { \
mca_oob_tcp_peer_op_t *pop; \
char *proxy; \
pop = OBJ_NEW(mca_oob_tcp_peer_op_t); \
pop->peer.jobid = (p)->name.jobid; \
pop->peer.vpid = (p)->name.vpid; \
proxy = (r); \
if (NULL != proxy) { \
pop->rtmod = strdup(proxy); \
} \
ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \
(cbfunc), ORTE_MSG_PRI); \
} while(0);

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -586,7 +586,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
snd->data = peer->recv_msg->data;
snd->seq_num = peer->recv_msg->hdr.seq_num;
snd->count = peer->recv_msg->hdr.nbytes;
snd->routed = strdup(peer->recv_msg->hdr.routed);
snd->cbfunc.iov = NULL;
snd->cbdata = NULL;
/* activate the OOB send state */

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -109,10 +109,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t);
_s->hdr.type = MCA_OOB_TCP_USER; \
_s->hdr.tag = (m)->tag; \
_s->hdr.seq_num = (m)->seq_num; \
if (NULL != (m)->routed) { \
(void)opal_string_copy(_s->hdr.routed, (m)->routed, \
ORTE_MAX_RTD_SIZE); \
} \
/* point to the actual message */ \
_s->msg = (m); \
/* set the total number of bytes to be sent */ \
@ -157,10 +153,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t);
_s->hdr.type = MCA_OOB_TCP_USER; \
_s->hdr.tag = (m)->tag; \
_s->hdr.seq_num = (m)->seq_num; \
if (NULL != (m)->routed) { \
(void)opal_string_copy(_s->hdr.routed, (m)->routed, \
ORTE_MAX_RTD_SIZE); \
} \
/* point to the actual message */ \
_s->msg = (m); \
/* set the total number of bytes to be sent */ \

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -44,8 +44,10 @@
#include "opal/dss/dss.h"
#include "opal/mca/hwloc/hwloc-internal.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/compress/compress.h"
#include "orte/util/dash_host/dash_host.h"
#include "orte/util/nidmap.h"
#include "orte/util/session_dir.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
@ -53,7 +55,6 @@
#include "orte/mca/iof/base/base.h"
#include "orte/mca/odls/base/base.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/regx/regx.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmaps/base/base.h"
#include "orte/mca/rml/rml.h"
@ -72,7 +73,6 @@
#include "orte/runtime/runtime.h"
#include "orte/runtime/orte_locks.h"
#include "orte/runtime/orte_quit.h"
#include "orte/util/compress.h"
#include "orte/util/name_fns.h"
#include "orte/util/pre_condition_transports.h"
#include "orte/util/proc_info.h"
@ -130,7 +130,11 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_topology_t *t;
orte_node_t *node;
int i;
int i, rc;
uint8_t u8;
opal_buffer_t buf;
orte_grpcomm_signature_t *sig;
orte_daemon_cmd_flag_t command = ORTE_DAEMON_PASS_NODE_INFO_CMD;
ORTE_ACQUIRE_OBJECT(caddy);
@ -175,7 +179,79 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
orte_ras_base_display_alloc();
}
/* ensure we update the routing plan */
orte_routed.update_routing_plan(NULL);
orte_routed.update_routing_plan();
/* prep the buffer */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* load the command */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_DAEMON_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy);
return;
}
/* if we did not execute a tree-spawn, then the daemons do
* not currently have a nidmap for the job - in that case,
* send one to them */
if (!orte_nidmap_communicated) {
u8 = 1;
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy);
return;
}
if (OPAL_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &buf))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy);
return;
}
orte_nidmap_communicated = true;
} else {
u8 = 0;
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy);
return;
}
}
/* we always send the topologies and the #slots on each node. Note
* that we cannot send the #slots until after the above step since,
* for unmanaged allocations, we might have just determined it! */
if (OPAL_SUCCESS != (rc = orte_util_pass_node_info(&buf))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy);
return;
}
/* goes to all daemons */
sig = OBJ_NEW(orte_grpcomm_signature_t);
sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
sig->signature[0].vpid = ORTE_VPID_WILDCARD;
sig->sz = 1;
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, &buf))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sig);
OBJ_DESTRUCT(&buf);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy);
return;
}
OBJ_DESTRUCT(&buf);
/* maintain accounting */
OBJ_RELEASE(sig);
/* progress the job */
caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
@ -580,7 +656,7 @@ void orte_plm_base_send_launch_msg(int fd, short args, void *cbdata)
uint8_t *cmpdata;
size_t cmplen;
/* report the size of the launch message */
compressed = orte_util_compress_block((uint8_t*)jdata->launch_msg.base_ptr,
compressed = opal_compress.compress_block((uint8_t*)jdata->launch_msg.base_ptr,
jdata->launch_msg.bytes_used,
&cmpdata, &cmplen);
if (compressed) {
@ -736,8 +812,7 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid),
ORTE_NAME_PRINT(&jdata->originator)));
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&jdata->originator, answer,
if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer,
ORTE_RML_TAG_LAUNCH_RESP,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
@ -857,7 +932,7 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender,
goto CLEANUP;
}
/* decompress the data */
if (orte_util_uncompress_block(&cmpdata, cmplen,
if (opal_compress.decompress_block(&cmpdata, cmplen,
packed_data, inlen)) {
/* the data has been uncompressed */
opal_dss.load(&datbuf, cmpdata, cmplen);
@ -1184,7 +1259,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
goto CLEANUP;
}
/* decompress the data */
if (orte_util_uncompress_block(&cmpdata, cmplen,
if (opal_compress.decompress_block(&cmpdata, cmplen,
packed_data, inlen)) {
/* the data has been uncompressed */
opal_dss.load(&datbuf, cmpdata, cmplen);
@ -1270,8 +1345,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
goto CLEANUP;
}
/* send it */
orte_rml.send_buffer_nb(orte_mgmt_conduit,
&dname, relay,
orte_rml.send_buffer_nb(&dname, relay,
ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL);
/* we will count this node as completed
@ -1515,46 +1589,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, param);
free(param);
/* convert the nodes with daemons to a regex */
param = NULL;
if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &param))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL != orte_node_regex) {
free(orte_node_regex);
}
orte_node_regex = param;
/* if this is too long, then we'll have to do it with
* a phone home operation instead */
if (strlen(param) < orte_plm_globals.node_regex_threshold) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "orte_node_regex");
opal_argv_append(argc, argv, orte_node_regex);
/* mark that the nidmap has been communicated */
orte_nidmap_communicated = true;
}
if (!orte_static_ports && !orte_fwd_mpirun_port) {
/* if we are using static ports, or we are forwarding
* mpirun's port, then we would have built all the
* connection info and so there is nothing to be passed.
* Otherwise, we have to pass the HNP uri so we can
* phone home */
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "orte_hnp_uri");
opal_argv_append(argc, argv, orte_process_info.my_hnp_uri);
}
/* if requested, pass our port */
if (orte_fwd_mpirun_port) {
opal_asprintf(&param, "%d", orte_process_info.my_port);
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "oob_tcp_static_ipv4_ports");
opal_argv_append(argc, argv, param);
free(param);
}
/* if --xterm was specified, pass that along */
if (NULL != orte_xterm) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
@ -2136,7 +2170,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
opal_list_remove_item(&nodes, item);
OBJ_RELEASE(item);
} else {
/* The filtering logic sets this flag only for nodes which
/* The filtering logic sets this flag only for nodes which
* are kept after filtering. This flag will be subsequently
* used in rmaps components and must be reset here */
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
@ -2274,7 +2308,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
/* ensure all routing plans are up-to-date - we need this
* so we know how to tree-spawn and/or xcast info */
orte_routed.update_routing_plan(NULL);
orte_routed.update_routing_plan();
}
/* mark that the daemon job changed */

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -278,8 +278,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
}
/* send the response back to the sender */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_LAUNCH_RESP,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_LAUNCH_RESP,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);

Просмотреть файл

@ -14,7 +14,7 @@
* reserved.
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2011-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -265,7 +265,6 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata)
orte_wait_tracker_t *t2 = (orte_wait_tracker_t*)cbdata;
orte_plm_rsh_caddy_t *caddy=(orte_plm_rsh_caddy_t*)t2->cbdata;
orte_proc_t *daemon = caddy->daemon;
char *rtmod;
if (orte_orteds_term_ordered || orte_abnormal_term_ordered) {
/* ignore any such report - it will occur if we left the
@ -290,8 +289,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata)
buf = OBJ_NEW(opal_buffer_t);
opal_dss.pack(buf, &(daemon->name.vpid), 1, ORTE_VPID);
opal_dss.pack(buf, &daemon->exit_code, 1, OPAL_INT);
orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_HNP, buf,
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
ORTE_RML_TAG_REPORT_REMOTE_LAUNCH,
orte_rml_send_callback, NULL);
/* note that this daemon failed */
@ -312,8 +310,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata)
/* remove it from the routing table to ensure num_routes
* returns the correct value
*/
rtmod = orte_rml.get_routed(orte_coll_conduit);
orte_routed.route_lost(rtmod, &daemon->name);
orte_routed.route_lost(&daemon->name);
/* report that the daemon has failed so we can exit */
ORTE_ACTIVATE_PROC_STATE(&daemon->name, ORTE_PROC_STATE_FAILED_TO_START);
}
@ -797,7 +794,6 @@ static int remote_spawn(void)
orte_job_t *daemons;
opal_list_t coll;
orte_namelist_t *child;
char *rtmod;
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:rsh: remote spawn called",
@ -816,9 +812,8 @@ static int remote_spawn(void)
}
/* get the updated routing list */
rtmod = orte_rml.get_routed(orte_coll_conduit);
OBJ_CONSTRUCT(&coll, opal_list_t);
orte_routed.get_routing_list(rtmod, &coll);
orte_routed.get_routing_list(&coll);
/* if I have no children, just return */
if (0 == opal_list_get_size(&coll)) {
@ -913,8 +908,7 @@ cleanup:
buf = OBJ_NEW(opal_buffer_t);
opal_dss.pack(buf, &target.vpid, 1, ORTE_VPID);
opal_dss.pack(buf, &rc, 1, OPAL_INT);
orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_HNP, buf,
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
ORTE_RML_TAG_REPORT_REMOTE_LAUNCH,
orte_rml_send_callback, NULL);
}
@ -1040,7 +1034,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
char *username;
int port, *portptr;
orte_namelist_t *child;
char *rtmod;
ORTE_ACQUIRE_OBJECT(state);
@ -1185,8 +1178,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* get the updated routing list */
OBJ_CONSTRUCT(&coll, opal_list_t);
rtmod = orte_rml.get_routed(orte_coll_conduit);
orte_routed.get_routing_list(rtmod, &coll);
orte_routed.get_routing_list(&coll);
}
/* setup the launch */

Просмотреть файл

@ -1,30 +0,0 @@
#
# Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# main library setup
noinst_LTLIBRARIES = libmca_regx.la
libmca_regx_la_SOURCES =
# pkgdata setup
dist_ortedata_DATA =
# local files
headers = regx.h
libmca_regx_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
ortedir = $(orteincludedir)/$(subdir)
nobase_orte_HEADERS = $(headers)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

Просмотреть файл

@ -1,18 +0,0 @@
#
# Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
headers += \
base/base.h
libmca_regx_la_SOURCES += \
base/regx_base_default_fns.c \
base/regx_base_frame.c \
base/regx_base_select.c

Просмотреть файл

@ -1,74 +0,0 @@
/*
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
* regx framework base functionality.
*/
#ifndef ORTE_MCA_REGX_BASE_H
#define ORTE_MCA_REGX_BASE_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/class/opal_list.h"
#include "orte/mca/mca.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/regx/regx.h"
BEGIN_C_DECLS
/*
* MCA Framework
*/
ORTE_DECLSPEC extern mca_base_framework_t orte_regx_base_framework;
/* select all components */
ORTE_DECLSPEC int orte_regx_base_select(void);
/*
* common stuff
*/
typedef struct {
opal_list_item_t super;
int vpid;
int cnt;
int slots;
orte_topology_t *t;
} orte_regex_range_t;
OBJ_CLASS_DECLARATION(orte_regex_range_t);
typedef struct {
/* list object */
opal_list_item_t super;
char *prefix;
char *suffix;
int num_digits;
opal_list_t ranges;
} orte_regex_node_t;
END_C_DECLS
OBJ_CLASS_DECLARATION(orte_regex_node_t);
ORTE_DECLSPEC extern int orte_regx_base_nidmap_parse(char *regex);
ORTE_DECLSPEC extern int orte_regx_base_encode_nodemap(opal_buffer_t *buffer);
ORTE_DECLSPEC int orte_regx_base_decode_daemon_nodemap(opal_buffer_t *buffer);
ORTE_DECLSPEC int orte_regx_base_generate_ppn(orte_job_t *jdata, char **ppn);
ORTE_DECLSPEC int orte_regx_base_parse_ppn(orte_job_t *jdata, char *regex);
ORTE_DECLSPEC int orte_regx_base_extract_node_names(char *regexp, char ***names);
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,77 +0,0 @@
/*
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <string.h>
#include "orte/mca/mca.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/regx/base/base.h"
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "orte/mca/regx/base/static-components.h"
/*
* Global variables
*/
orte_regx_base_module_t orte_regx = {0};
static int orte_regx_base_close(void)
{
/* give the selected module a chance to finalize */
if (NULL != orte_regx.finalize) {
orte_regx.finalize();
}
return mca_base_framework_components_close(&orte_regx_base_framework, NULL);
}
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
static int orte_regx_base_open(mca_base_open_flag_t flags)
{
int rc;
/* Open up all available components */
rc = mca_base_framework_components_open(&orte_regx_base_framework, flags);
/* All done */
return rc;
}
MCA_BASE_FRAMEWORK_DECLARE(orte, regx, "ORTE Regx Subsystem", NULL,
orte_regx_base_open, orte_regx_base_close,
mca_regx_base_static_components, 0);
/* OBJECT INSTANTIATIONS */
static void nrcon(orte_nidmap_regex_t *p)
{
p->ctx = 0;
p->nprocs = -1;
p->cnt = 0;
}
OBJ_CLASS_INSTANCE(orte_nidmap_regex_t,
opal_list_item_t,
nrcon, NULL);

Просмотреть файл

@ -1,61 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/regx/base/base.h"
/**
* Function for selecting one component from all those that are
* available.
*/
int orte_regx_base_select(void)
{
orte_regx_base_component_t *best_component = NULL;
orte_regx_base_module_t *best_module = NULL;
int rc = ORTE_SUCCESS;
/*
* Select the best component
*/
if (OPAL_SUCCESS != mca_base_select("regx", orte_regx_base_framework.framework_output,
&orte_regx_base_framework.framework_components,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component, NULL)) {
/* This will only happen if no component was selected */
return ORTE_ERR_NOT_FOUND;
}
/* Save the winner */
orte_regx = *best_module;
/* give it a chance to init */
if (NULL != orte_regx.init) {
rc = orte_regx.init();
}
return rc;
}

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: INTEL
status: active

Просмотреть файл

@ -1,127 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* The Open RTE Personality Framework (regx)
*
* Multi-select framework so that multiple personalities can be
* simultaneously supported
*
*/
#ifndef ORTE_MCA_REGX_H
#define ORTE_MCA_REGX_H
#include "orte_config.h"
#include "orte/types.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/dss/dss_types.h"
#include "orte/mca/mca.h"
#include "orte/runtime/orte_globals.h"
BEGIN_C_DECLS
/*
* regx module functions
*/
#define ORTE_MAX_NODE_PREFIX 50
#define ORTE_CONTIG_NODE_CMD 0x01
#define ORTE_NON_CONTIG_NODE_CMD 0x02
/**
* REGX module functions - the modules are accessed via
* the base stub functions
*/
typedef struct {
opal_list_item_t super;
int ctx;
int nprocs;
int cnt;
} orte_nidmap_regex_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_nidmap_regex_t);
/* initialize the module - allow it to do whatever one-time
* things it requires */
typedef int (*orte_regx_base_module_init_fn_t)(void);
typedef int (*orte_regx_base_module_nidmap_create_fn_t)(opal_pointer_array_t *pool, char **regex);
typedef int (*orte_regx_base_module_nidmap_parse_fn_t)(char *regex);
typedef int (*orte_regx_base_module_extract_node_names_fn_t)(char *regexp, char ***names);
/* create a regular expression describing the nodes in the
* allocation */
typedef int (*orte_regx_base_module_encode_nodemap_fn_t)(opal_buffer_t *buffer);
/* decode a regular expression created by the encode function
* into the orte_node_pool array */
typedef int (*orte_regx_base_module_decode_daemon_nodemap_fn_t)(opal_buffer_t *buffer);
typedef int (*orte_regx_base_module_build_daemon_nidmap_fn_t)(void);
/* create a regular expression describing the ppn for a job */
typedef int (*orte_regx_base_module_generate_ppn_fn_t)(orte_job_t *jdata, char **ppn);
/* decode the ppn */
typedef int (*orte_regx_base_module_parse_ppn_fn_t)(orte_job_t *jdata, char *ppn);
/* give the component a chance to cleanup */
typedef void (*orte_regx_base_module_finalize_fn_t)(void);
/*
* regx module version 1.0.0
*/
typedef struct {
orte_regx_base_module_init_fn_t init;
orte_regx_base_module_nidmap_create_fn_t nidmap_create;
orte_regx_base_module_nidmap_parse_fn_t nidmap_parse;
orte_regx_base_module_extract_node_names_fn_t extract_node_names;
orte_regx_base_module_encode_nodemap_fn_t encode_nodemap;
orte_regx_base_module_decode_daemon_nodemap_fn_t decode_daemon_nodemap;
orte_regx_base_module_build_daemon_nidmap_fn_t build_daemon_nidmap;
orte_regx_base_module_generate_ppn_fn_t generate_ppn;
orte_regx_base_module_parse_ppn_fn_t parse_ppn;
orte_regx_base_module_finalize_fn_t finalize;
} orte_regx_base_module_t;
ORTE_DECLSPEC extern orte_regx_base_module_t orte_regx;
/*
* regx component
*/
/**
* regx component version 1.0.0
*/
typedef struct {
/** Base MCA structure */
mca_base_component_t base_version;
/** Base MCA data */
mca_base_component_data_t base_data;
} orte_regx_base_component_t;
/**
* Macro for use in components that are of type regx
*/
#define MCA_REGX_BASE_VERSION_1_0_0 \
ORTE_MCA_BASE_VERSION_2_1_0("regx", 1, 0, 0)
END_C_DECLS
#endif

Просмотреть файл

@ -11,7 +11,7 @@
# All rights reserved.
# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
# Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -26,5 +26,4 @@ headers += \
libmca_rml_la_SOURCES += \
base/rml_base_frame.c \
base/rml_base_contact.c \
base/rml_base_msg_handlers.c \
base/rml_base_stubs.c
base/rml_base_msg_handlers.c

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -65,18 +65,9 @@ ORTE_DECLSPEC int orte_rml_base_select(void);
/*
* globals that might be needed
*/
/* adding element to hold the active modules and components */
typedef struct {
opal_list_item_t super;
int pri;
orte_rml_component_t *component;
} orte_rml_base_active_t;
OBJ_CLASS_DECLARATION(orte_rml_base_active_t);
/* a global struct containing framework-level values */
typedef struct {
opal_list_t actives; /* list to hold the active components */
opal_pointer_array_t conduits; /* array to hold the open conduits */
opal_list_t posted_recvs;
opal_list_t unmatched_msgs;
int max_retries;
@ -114,8 +105,6 @@ typedef struct {
* transfers
*/
char *data;
/* routed module to be used */
char *routed;
} orte_rml_send_t;
OBJ_CLASS_DECLARATION(orte_rml_send_t);
@ -232,52 +221,11 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t);
OBJ_RELEASE(m); \
}while(0);
#define ORTE_RML_INVALID_CHANNEL_NUM UINT32_MAX
/* common implementations */
ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata);
ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata);
/* Stub API interfaces to cycle through active plugins */
int orte_rml_API_ping(orte_rml_conduit_t conduit_id,
const char* contact_info,
const struct timeval* tv);
int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id,
orte_process_name_t* peer, struct iovec* msg,
int count, orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc, void* cbdata);
int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
void orte_rml_API_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
void orte_rml_API_purge(orte_process_name_t *peer);
int orte_rml_API_query_transports(opal_list_t *providers);
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes);
void orte_rml_API_close_conduit(orte_rml_conduit_t id);
char* orte_rml_API_get_routed(orte_rml_conduit_t id);
END_C_DECLS
#endif /* MCA_RML_BASE_H */

Просмотреть файл

@ -5,7 +5,7 @@
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -40,26 +40,10 @@
/* Initialising stub fns in the global var used by other modules */
orte_rml_base_API_t orte_rml = {
.ping = orte_rml_API_ping,
.send_nb = orte_rml_API_send_nb,
.send_buffer_nb = orte_rml_API_send_buffer_nb,
.recv_nb = orte_rml_API_recv_nb,
.recv_buffer_nb = orte_rml_API_recv_buffer_nb,
.recv_cancel = orte_rml_API_recv_cancel,
.purge = orte_rml_API_purge,
.query_transports = orte_rml_API_query_transports,
.open_conduit = orte_rml_API_open_conduit,
.close_conduit = orte_rml_API_close_conduit,
.get_routed = orte_rml_API_get_routed
};
orte_rml_base_module_t orte_rml = {0};
orte_rml_base_t orte_rml_base = {{{0}}};
orte_rml_component_t *orte_rml_component = NULL;
static bool selected = false;
static int orte_rml_base_register(mca_base_register_flag_t flags)
{
orte_rml_base.max_retries = 3;
@ -82,62 +66,9 @@ static int orte_rml_base_register(mca_base_register_flag_t flags)
return ORTE_SUCCESS;
}
static void cleanup(int sd, short args, void *cbdata)
{
orte_lock_t *lk = (orte_lock_t*)cbdata;
ORTE_ACQUIRE_OBJECT(active);
OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs);
if (NULL != lk) {
ORTE_POST_OBJECT(lk);
ORTE_WAKEUP_THREAD(lk);
}
}
static int orte_rml_base_close(void)
{
orte_lock_t lock;
int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits);
orte_rml_base_module_t *mod;
orte_rml_component_t *comp;
/* cycle thru the conduits opened and call each module's finalize */
/* The components finalise/close() will be responsible for freeing the module pointers */
for (idx = 0; idx < total_conduits ; idx++)
{
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits,idx))) {
/* close the conduit */
comp = (orte_rml_component_t*)mod->component;
if (NULL != comp && NULL != comp->close_conduit) {
comp->close_conduit(mod);
}
free(mod);
}
}
OBJ_DESTRUCT(&orte_rml_base.conduits);
OPAL_LIST_DESTRUCT(&orte_rml_base.actives)
/* because the RML posted recvs list is in a separate
* async thread for apps, we can't just destruct it here.
* Instead, we push it into that event thread and destruct
* it there */
if (ORTE_PROC_IS_APP) {
opal_event_t ev;
ORTE_CONSTRUCT_LOCK(&lock);
opal_event_set(orte_event_base, &ev, -1,
OPAL_EV_WRITE, cleanup, (void*)&lock);
opal_event_set_priority(&ev, ORTE_ERROR_PRI);
ORTE_POST_OBJECT(ev);
opal_event_active(&ev, OPAL_EV_WRITE, 1);
ORTE_WAIT_THREAD(&lock);
ORTE_DESTRUCT_LOCK(&lock);
} else {
/* we can call the destruct directly */
cleanup(0, 0, NULL);
}
OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs);
return mca_base_framework_components_close(&orte_rml_base_framework, NULL);
}
@ -145,11 +76,8 @@ static int orte_rml_base_open(mca_base_open_flag_t flags)
{
/* Initialize globals */
/* construct object for holding the active plugin modules */
OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t);
opal_pointer_array_init(&orte_rml_base.conduits,1,INT16_MAX,1);
/* Open up all available components */
return mca_base_framework_components_open(&orte_rml_base_framework, flags);
@ -159,61 +87,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, rml, "ORTE Run-Time Messaging Layer",
orte_rml_base_register, orte_rml_base_open, orte_rml_base_close,
mca_rml_base_static_components, 0);
OBJ_CLASS_INSTANCE(orte_rml_base_active_t,
opal_list_item_t,
NULL, NULL);
/**
* Function for ordering the component(plugin) by priority
*/
int orte_rml_base_select(void)
{
mca_base_component_list_item_t *cli=NULL;
orte_rml_component_t *component=NULL;
orte_rml_base_active_t *newmodule, *mod;
bool inserted;
orte_rml_component_t *best_component = NULL;
orte_rml_base_module_t *best_module = NULL;
if (selected) {
return ORTE_SUCCESS;
}
selected = true;
OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) {
component = (orte_rml_component_t*) cli->cli_component;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"orte_rml_base_select: Initializing %s component %s",
component->base.mca_type_name,
component->base.mca_component_name);
/* add to the list of available components */
newmodule = OBJ_NEW(orte_rml_base_active_t);
newmodule->pri = component->priority;
newmodule->component = component;
/* maintain priority order */
inserted = false;
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
if (newmodule->pri > mod->pri) {
opal_list_insert_pos(&orte_rml_base.actives,
(opal_list_item_t*)mod, &newmodule->super);
inserted = true;
break;
}
}
if (!inserted) {
/* must be lowest priority - add to end */
opal_list_append(&orte_rml_base.actives, &newmodule->super);
}
/*
* Select the best component
*/
if( OPAL_SUCCESS != mca_base_select("rml", orte_rml_base_framework.framework_output,
&orte_rml_base_framework.framework_components,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component, NULL) ) {
/* This will only happen if no component was selected */
/* If we didn't find one to select, that is an error */
return ORTE_ERROR;
}
if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) {
opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* show the prioritized list */
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
opal_output(0, "\tComponent: %s Priority: %d", mod->component->base.mca_component_name, mod->pri);
}
}
/* Save the winner */
orte_rml = *best_module;
return ORTE_SUCCESS;
}
@ -279,17 +174,10 @@ static void send_cons(orte_rml_send_t *ptr)
ptr->buffer = NULL;
ptr->data = NULL;
ptr->seq_num = 0xFFFFFFFF;
ptr->routed = NULL;
}
static void send_des(orte_rml_send_t *ptr)
{
if (NULL != ptr->routed) {
free(ptr->routed);
}
}
OBJ_CLASS_INSTANCE(orte_rml_send_t,
opal_list_item_t,
send_cons, send_des);
send_cons, NULL);
static void send_req_cons(orte_rml_send_request_t *ptr)
@ -353,21 +241,3 @@ static void prq_des(orte_rml_recv_request_t *ptr)
OBJ_CLASS_INSTANCE(orte_rml_recv_request_t,
opal_object_t,
prq_cons, prq_des);
static void pthcons(orte_rml_pathway_t *p)
{
p->component = NULL;
OBJ_CONSTRUCT(&p->attributes, opal_list_t);
OBJ_CONSTRUCT(&p->transports, opal_list_t);
}
static void pthdes(orte_rml_pathway_t *p)
{
if (NULL != p->component) {
free(p->component);
}
OPAL_LIST_DESTRUCT(&p->attributes);
OPAL_LIST_DESTRUCT(&p->transports);
}
OBJ_CLASS_INSTANCE(orte_rml_pathway_t,
opal_list_item_t,
pthcons, pthdes);

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -44,6 +44,7 @@
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/threads.h"
#include "orte/mca/rml/rml.h"
@ -181,16 +182,14 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata)
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return;
}
assert (NULL != orte_node_regex);
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &orte_node_regex, 1, OPAL_STRING))) {
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, buffer))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
return;
}
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&msg->sender, buffer,
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&msg->sender, buffer,
ORTE_RML_TAG_NODE_REGEX_REPORT,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -1,333 +0,0 @@
/*
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <string.h>
#include "opal/class/opal_list.h"
#include "opal/dss/dss.h"
#include "orte/mca/mca.h"
#include "opal/mca/base/mca_base_component_repository.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/state/state.h"
#include "orte/runtime/orte_wait.h"
#include "orte/util/name_fns.h"
#include "orte/util/threads.h"
#include "orte/mca/rml/base/base.h"
/*
* The stub API interface functions
*/
/** Open a conduit - check if the ORTE_RML_INCLUDE_COMP attribute is provided, this is */
/* a comma seperated list of components, try to open the conduit in this order. */
/* if the ORTE_RML_INCLUDE_COMP is not provided or this list was not able to open conduit */
/* call the open_conduit() of the component in priority order to see if they can use the */
/* attribute to open a conduit. */
/* Note: The component takes care of checking for duplicate and returning the previously */
/* opened module* in case of duplicates. Currently we are saving it in a new conduit_id */
/* even if it is duplicate. [ToDo] compare the module* received from component to see if */
/* already present in array and return the prev conduit_id instead of adding it again to array */
/* @param[in] attributes The attributes is a list of opal_value_t of type OPAL_STRING */
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes)
{
orte_rml_base_active_t *active;
orte_rml_component_t *comp;
orte_rml_base_module_t *mod, *ourmod=NULL;
int rc;
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"%s rml:base:open_conduit",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* bozo check - you cannot specify both include and exclude */
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, NULL, OPAL_STRING) &&
orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, NULL, OPAL_STRING)) {
// orte_show_help();
return ORTE_ERR_NOT_SUPPORTED;
}
/* cycle thru the actives in priority order and let each one see if they can support this request */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
comp = (orte_rml_component_t *)active->component;
if (NULL != comp->open_conduit) {
if (NULL != (mod = comp->open_conduit(attributes))) {
opal_output_verbose(2, orte_rml_base_framework.framework_output,
"%s rml:base:open_conduit Component %s provided a conduit",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
active->component->base.mca_component_name);
ourmod = mod;
break;
}
}
}
if (NULL != ourmod) {
/* we got an answer - store this conduit in our array */
rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod);
if (rc < 0) {
return ORTE_RML_CONDUIT_INVALID;
}
return rc;
}
/* we get here if nobody could support it */
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
return ORTE_RML_CONDUIT_INVALID;
}
/** Shutdown the communication system and clean up resources */
void orte_rml_API_close_conduit(orte_rml_conduit_t id)
{
orte_rml_base_module_t *mod;
orte_rml_component_t *comp;
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"%s rml:base:close_conduit(%d)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)id);
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) {
comp = (orte_rml_component_t*)mod->component;
if (NULL != comp && NULL != comp->close_conduit) {
comp->close_conduit(mod);
}
opal_pointer_array_set_item(&orte_rml_base.conduits, id, NULL);
free(mod);
}
}
/** Ping process for connectivity check */
int orte_rml_API_ping(orte_rml_conduit_t conduit_id,
const char* contact_info,
const struct timeval* tv)
{
int rc = ORTE_ERR_UNREACH;
orte_rml_base_module_t *mod;
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"%s rml:base:ping(conduit-%d)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),conduit_id);
/* get the module */
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
return rc;
}
if (NULL == mod->ping) {
return rc;
}
rc = mod->ping((struct orte_rml_base_module_t*)mod, contact_info, tv);
return rc;
}
/** Send non-blocking iovec message through a specific conduit*/
int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id,
orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
int rc = ORTE_ERR_UNREACH;
orte_rml_base_module_t *mod;
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"%s rml:base:send_nb() to peer %s through conduit %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer),conduit_id);
/* get the module */
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
return rc;
}
if (NULL == mod->send_nb) {
return rc;
}
rc = mod->send_nb((struct orte_rml_base_module_t*)mod, peer, msg, count, tag, cbfunc, cbdata);
return rc;
}
/** Send non-blocking buffer message */
int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
int rc = ORTE_ERR_UNREACH;
orte_rml_base_module_t *mod;
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"%s rml:base:send_buffer_nb() to peer %s through conduit %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer),conduit_id);
/* get the module */
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
return rc;
}
if (NULL == mod->send_buffer_nb) {
return rc;
}
rc = mod->send_buffer_nb((struct orte_rml_base_module_t*)mod, peer, buffer, tag, cbfunc, cbdata);
return rc;
}
/** post a receive for an IOV message - this is done
* strictly in the base, and so it does not go to a module */
void orte_rml_API_recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_recv_request_t *req;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"%s rml_recv_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag);
/* push the request into the event base so we can add
* the receive to our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = false;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.iov = cbfunc;
req->post->cbdata = cbdata;
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
}
/** Receive non-blocking buffer message */
void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_recv_request_t *req;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"%s rml_recv_buffer_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag);
/* push the request into the event base so we can add
* the receive to our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.buffer = cbfunc;
req->post->cbdata = cbdata;
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
}
/** Cancel posted non-blocking receive */
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
{
orte_rml_recv_request_t *req;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"%s rml_recv_cancel for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag);
ORTE_ACQUIRE_OBJECT(orte_event_base_active);
if (!orte_event_base_active) {
/* no event will be processed any more, so simply return. */
return;
}
/* push the request into the event base so we can remove
* the receive from our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->cancel = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
}
/** Purge information */
void orte_rml_API_purge(orte_process_name_t *peer)
{
orte_rml_base_module_t *mod;
int i;
for (i=0; i < orte_rml_base.conduits.size; i++) {
/* get the module */
if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, i))) {
if (NULL != mod->purge) {
mod->purge(peer);
}
}
}
}
int orte_rml_API_query_transports(opal_list_t *providers)
{
orte_rml_base_active_t *active;
orte_rml_pathway_t *p;
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"%s rml:base:orte_rml_API_query_transports()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* cycle thru the actives */
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
if (NULL != active->component->query_transports) {
opal_output_verbose(10,orte_rml_base_framework.framework_output,
"\n calling module: %s->query_transports() \n",
active->component->base.mca_component_name);
if (NULL != (p = active->component->query_transports())) {
/* pass the results across */
OBJ_RETAIN(p);
opal_list_append(providers, &p->super);
}
}
}
return ORTE_SUCCESS;
}
char* orte_rml_API_get_routed(orte_rml_conduit_t id)
{
orte_rml_base_module_t *mod;
/* get the module */
if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) {
return mod->routed;
}
return NULL;
}

Просмотреть файл

@ -14,7 +14,7 @@
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -48,25 +48,19 @@ ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component;
void orte_rml_oob_fini(struct orte_rml_base_module_t *mod);
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
orte_process_name_t* peer,
int orte_rml_oob_send_nb(orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
orte_process_name_t* peer,
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
int orte_rml_oob_ping(struct orte_rml_base_module_t *mod,
const char* uri,
const struct timeval* tv);
END_C_DECLS
#endif

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -57,9 +57,8 @@
static int rml_oob_open(void);
static int rml_oob_close(void);
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes);
static orte_rml_pathway_t* query_transports(void);
static void close_conduit(orte_rml_base_module_t *mod);
static int component_query(mca_base_module_t **module, int *priority);
/**
* component definition
*/
@ -75,220 +74,121 @@ orte_rml_component_t mca_rml_oob_component = {
ORTE_RELEASE_VERSION),
.mca_open_component = rml_oob_open,
.mca_close_component = rml_oob_close,
.mca_query_component = component_query,
},
.data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.priority = 5,
.open_conduit = open_conduit,
.query_transports = query_transports,
.close_conduit = close_conduit
.priority = 5
};
/* Local variables */
static orte_rml_pathway_t pathway;
static void recv_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_recv_request_t *req;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"%s rml_recv_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag);
/* push the request into the event base so we can add
* the receive to our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = false;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.iov = cbfunc;
req->post->cbdata = cbdata;
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
}
static void recv_buffer_nb(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_recv_request_t *req;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"%s rml_recv_buffer_nb for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag);
/* push the request into the event base so we can add
* the receive to our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->post->buffer_data = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
req->post->persistent = persistent;
req->post->cbfunc.buffer = cbfunc;
req->post->cbdata = cbdata;
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
}
static void recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
{
orte_rml_recv_request_t *req;
opal_output_verbose(10, orte_rml_base_framework.framework_output,
"%s rml_recv_cancel for peer %s tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag);
ORTE_ACQUIRE_OBJECT(orte_event_base_active);
if (!orte_event_base_active) {
/* no event will be processed any more, so simply return. */
return;
}
/* push the request into the event base so we can remove
* the receive from our list of posted recvs */
req = OBJ_NEW(orte_rml_recv_request_t);
req->cancel = true;
req->post->peer.jobid = peer->jobid;
req->post->peer.vpid = peer->vpid;
req->post->tag = tag;
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
}
static int oob_ping(const char* uri, const struct timeval* tv)
{
return ORTE_ERR_UNREACH;
}
static orte_rml_base_module_t base_module = {
.component = (struct orte_rml_component_t*)&mca_rml_oob_component,
.ping = NULL,
.ping = oob_ping,
.send_nb = orte_rml_oob_send_nb,
.send_buffer_nb = orte_rml_oob_send_buffer_nb,
.recv_nb = recv_nb,
.recv_buffer_nb = recv_buffer_nb,
.recv_cancel = recv_cancel,
.purge = NULL
};
static int rml_oob_open(void)
{
/* ask our OOB transports for their info */
OBJ_CONSTRUCT(&pathway, orte_rml_pathway_t);
pathway.component = strdup("oob");
ORTE_OOB_GET_TRANSPORTS(&pathway.transports);
/* add any component attributes of our own */
return ORTE_SUCCESS;
}
static int rml_oob_close(void)
{
/* cleanup */
OBJ_DESTRUCT(&pathway);
return ORTE_SUCCESS;
}
static orte_rml_base_module_t* make_module(void)
static int component_query(mca_base_module_t **module, int *priority)
{
orte_rml_oob_module_t *mod;
/* create a new module */
mod = (orte_rml_oob_module_t*)malloc(sizeof(orte_rml_oob_module_t));
if (NULL == mod) {
return NULL;
}
/* copy the APIs over to it */
memcpy(mod, &base_module, sizeof(base_module));
/* initialize its internal storage */
OBJ_CONSTRUCT(&mod->queued_routing_messages, opal_list_t);
mod->timer_event = NULL;
mod->routed = NULL;
/* return the result */
return (orte_rml_base_module_t*)mod;
}
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
{
char *comp_attrib;
char **comps;
int i;
orte_rml_base_module_t *md;
opal_output_verbose(20,orte_rml_base_framework.framework_output,
"%s - Entering rml_oob_open_conduit()",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* someone may require this specific component, so look for "oob" */
comp_attrib = NULL;
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
NULL != comp_attrib) {
/* they specified specific components - could be multiple */
comps = opal_argv_split(comp_attrib, ',');
free(comp_attrib);
for (i=0; NULL != comps[i]; i++) {
if (0 == strcasecmp(comps[i], "oob")) {
/* we are a candidate */
opal_argv_free(comps);
md = make_module();
free(comp_attrib);
comp_attrib = NULL;
orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING);
/* the routed system understands a NULL request, so no need to check
* return status/value here */
md->routed = orte_routed.assign_module(comp_attrib);
if (NULL != comp_attrib) {
free(comp_attrib);
}
return md;
}
}
/* we are not a candidate */
opal_argv_free(comps);
free(comp_attrib);
return NULL;
}
comp_attrib = NULL;
if (orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
NULL != comp_attrib) {
/* see if we are on the list */
comps = opal_argv_split(comp_attrib, ',');
free(comp_attrib);
for (i=0; NULL != comps[i]; i++) {
if (0 == strcasecmp(comps[i], "oob")) {
/* we cannot be a candidate */
opal_argv_free(comps);
free(comp_attrib);
return NULL;
}
}
}
/* Alternatively, check the attributes to see if we qualify - we only handle
* "Ethernet" and "TCP" */
comp_attrib = NULL;
if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) &&
NULL != comp_attrib) {
comps = opal_argv_split(comp_attrib, ',');
for (i=0; NULL != comps[i]; i++) {
if (0 == strcasecmp(comps[i], "Ethernet") ||
0 == strcasecmp(comps[i], "oob")) {
/* we are a candidate */
opal_argv_free(comps);
md = make_module();
free(comp_attrib);
comp_attrib = NULL;
orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING);
/* the routed system understands a NULL request, so no need to check
* return status/value here */
md->routed = orte_routed.assign_module(comp_attrib);
if (NULL != comp_attrib) {
free(comp_attrib);
}
return md;
}
}
/* we are not a candidate */
opal_argv_free(comps);
free(comp_attrib);
return NULL;
}
comp_attrib = NULL;
if (orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, (void**)&comp_attrib, OPAL_STRING) &&
NULL != comp_attrib) {
comps = opal_argv_split(comp_attrib, ',');
for (i=0; NULL != comps[i]; i++) {
if (0 == strcasecmp(comps[i], "TCP")) {
/* we are a candidate */
opal_argv_free(comps);
md = make_module();
free(comp_attrib);
comp_attrib = NULL;
orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING);
/* the routed system understands a NULL request, so no need to check
* return status/value here */
md->routed = orte_routed.assign_module(comp_attrib);
if (NULL != comp_attrib) {
free(comp_attrib);
}
return md;
}
}
/* we are not a candidate */
opal_argv_free(comps);
free(comp_attrib);
return NULL;
}
/* if they didn't specify a protocol or a transport, then we can be considered */
if (!orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, NULL, OPAL_STRING) ||
!orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, NULL, OPAL_STRING)) {
md = make_module();
md->routed = orte_routed.assign_module(NULL);
return md;
}
/* if we get here, we cannot handle it */
return NULL;
}
static orte_rml_pathway_t* query_transports(void)
{
/* if we have any available transports, make them available */
if (0 < opal_list_get_size(&pathway.transports)) {
return &pathway;
}
/* if not, then return NULL */
return NULL;
}
static void close_conduit(orte_rml_base_module_t *md)
{
orte_rml_oob_module_t *mod = (orte_rml_oob_module_t*)md;
/* cleanup the list of messages */
OBJ_DESTRUCT(&mod->queued_routing_messages);
/* clear the storage */
if (NULL != mod->routed) {
free(mod->routed);
mod->routed = NULL;
}
/* the rml_base_stub takes care of clearing the base receive
* and free'ng the module */
return;
*priority = 50;
*module = (mca_base_module_t *) &base_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -70,8 +70,7 @@ static void send_self_exe(int fd, short args, void* data)
OBJ_RELEASE(xfer);
}
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
orte_process_name_t* peer,
int orte_rml_oob_send_nb(orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
@ -170,7 +169,6 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
snd->count = count;
snd->cbfunc.iov = cbfunc;
snd->cbdata = cbdata;
snd->routed = strdup(mod->routed);
/* activate the OOB send state */
ORTE_OOB_SEND(snd);
@ -178,8 +176,7 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
return ORTE_SUCCESS;
}
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
orte_process_name_t* peer,
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
@ -259,7 +256,6 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
snd->buffer = buffer;
snd->cbfunc.buffer = cbfunc;
snd->cbdata = cbdata;
snd->routed = strdup(mod->routed);
/* activate the OOB send state */
ORTE_OOB_SEND(snd);

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
@ -56,10 +56,6 @@ BEGIN_C_DECLS
/* ******************************************************************** */
/* forward declare */
struct orte_rml_base_module_t;
struct orte_rml_component_t;
typedef struct {
opal_object_t super;
orte_process_name_t name;
@ -173,8 +169,7 @@ typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer,
* from the local process
* @retval ORTE_ERROR An unspecified error occurred during the update
*/
typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod,
const char* contact_info,
typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
const struct timeval* tv);
@ -201,8 +196,7 @@ typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod,
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod,
orte_process_name_t* peer,
typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
@ -232,8 +226,7 @@ typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod,
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t *mod,
orte_process_name_t* peer,
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
@ -247,6 +240,49 @@ typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t
typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer);
/**
* Receive an iovec non-blocking message
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[in] tag User defined tag for matching send/recv
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*/
typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
* Receive a buffer non-blocking message
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[in] tag User defined tag for matching send/recv
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*/
typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
/**
* Cancel a posted non-blocking receive
*
* Attempt to cancel a posted non-blocking receive.
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
* to the non-blocking receive call
* @param[in] tag Posted receive tag
*/
typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer,
orte_rml_tag_t tag);
/**
* RML internal module interface - these will be implemented by all RML components
@ -265,245 +301,21 @@ typedef struct orte_rml_base_module_t {
/** Send non-blocking buffer message */
orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
orte_rml_module_recv_nb_fn_t recv_nb;
orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb;
orte_rml_module_recv_cancel_fn_t recv_cancel;
/** Purge information */
orte_rml_module_purge_fn_t purge;
} orte_rml_base_module_t;
/* ******************************************************************** */
/* RML PUBLIC MODULE API DEFINITION */
/** Open conduit - call each component and see if they can provide a
* conduit that can satisfy all these attributes - return the conduit id
* (a negative value indicates error)
*/
typedef orte_rml_conduit_t (*orte_rml_API_open_conduit_fn_t)(opal_list_t *attributes);
/**
* Close a conduit - allow the component to cleanup.
*/
typedef void (*orte_rml_API_close_conduit_fn_t)(orte_rml_conduit_t id);
/**
* Query the library to provide all the supported interfaces/transport
* providers in the current node/system.
*
* @param[out] List of providers and their attributes.
*/
typedef int (*orte_rml_API_query_transports_fn_t)(opal_list_t *transports);
/* query the routed module for a given conduit */
typedef char* (*orte_rml_API_query_routed_fn_t)(orte_rml_conduit_t id);
/**
* "Ping" another process to determine availability
*
* Ping another process to determine if it is available. This
* function only verifies that the process is alive and will allow a
* connection to the local process. It does *not* qualify as
* establishing communication with the remote process, as required by
* the note for set_contact_info().
*
* @param[in] contact_info The contact info string for the remote process
* @param[in] tv Timeout after which the ping should be failed
*
* @retval ORTE_SUCESS The process is available and will allow connections
* from the local process
* @retval ORTE_ERROR An unspecified error occurred during the update
*/
typedef int (*orte_rml_API_ping_fn_t)(orte_rml_conduit_t conduit_id,
const char* contact_info,
const struct timeval* tv);
/**
* Send an iovec non-blocking message
*
* Send an array of iovecs to the specified peer. The call
* will return immediately, although the iovecs may not be modified
* until the completion callback is triggered. The iovecs *may* be
* passed to another call to send_nb before the completion callback is
* triggered. The callback being triggered does not give any
* indication of remote completion.
*
* @param[in] peer Name of receiving process
* @param[in] msg Pointer to an array of iovecs to be sent
* @param[in] count Number of iovecs in array
* @param[in] tag User defined tag for matching send/recv
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*
* @retval ORTE_SUCCESS The message was successfully started
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_API_send_nb_fn_t)(orte_rml_conduit_t conduit_id,
orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
* Send a buffer non-blocking message
*
* Send a buffer to the specified peer. The call
* will return immediately, although the buffer may not be modified
* until the completion callback is triggered. The buffer *may* be
* passed to another call to send_nb before the completion callback is
* triggered. The callback being triggered does not give any
* indication of remote completion.
*
* @param[in] peer Name of receiving process
* @param[in] buffer Pointer to buffer to be sent
* @param[in] tag User defined tag for matching send/recv
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*
* @retval ORTE_SUCCESS The message was successfully started
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
* receiving process is not available
* @retval ORTE_ERROR An unspecified error occurred
*/
typedef int (*orte_rml_API_send_buffer_nb_fn_t)(orte_rml_conduit_t conduit_id,
orte_process_name_t* peer,
struct opal_buffer_t* buffer,
orte_rml_tag_t tag,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
/**
* Purge the RML/OOB of contact info and pending messages
* to/from a specified process. Used when a process aborts
* and is to be restarted
*/
typedef void (*orte_rml_API_purge_fn_t)(orte_process_name_t *peer);
/**
* Receive an iovec non-blocking message
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[in] tag User defined tag for matching send/recv
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*/
typedef void (*orte_rml_API_recv_nb_fn_t)(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_callback_fn_t cbfunc,
void* cbdata);
/**
* Receive a buffer non-blocking message
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
* @param[in] tag User defined tag for matching send/recv
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
* @param[in] cbfunc Callback function on message comlpetion
* @param[in] cbdata User data to provide during completion callback
*/
typedef void (*orte_rml_API_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
orte_rml_tag_t tag,
bool persistent,
orte_rml_buffer_callback_fn_t cbfunc,
void* cbdata);
/**
* Cancel a posted non-blocking receive
*
* Attempt to cancel a posted non-blocking receive.
*
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
* to the non-blocking receive call
* @param[in] tag Posted receive tag
*/
typedef void (*orte_rml_API_recv_cancel_fn_t)(orte_process_name_t* peer,
orte_rml_tag_t tag);
/**
* RML API interface
*/
typedef struct {
/** Open Conduit **/
orte_rml_API_open_conduit_fn_t open_conduit;
/** Shutdown the conduit and clean up resources */
orte_rml_API_close_conduit_fn_t close_conduit;
/** Ping process for connectivity check */
orte_rml_API_ping_fn_t ping;
/** Send non-blocking iovec message */
orte_rml_API_send_nb_fn_t send_nb;
/** Send non-blocking buffer message */
orte_rml_API_send_buffer_nb_fn_t send_buffer_nb;
/** Receive non-blocking iovec message */
orte_rml_API_recv_nb_fn_t recv_nb;
/** Receive non-blocking buffer message */
orte_rml_API_recv_buffer_nb_fn_t recv_buffer_nb;
/** Cancel posted non-blocking receive */
orte_rml_API_recv_cancel_fn_t recv_cancel;
/** Purge information */
orte_rml_API_purge_fn_t purge;
/** Query information of transport in system */
orte_rml_API_query_transports_fn_t query_transports;
/* get the routed module for a given conduit */
orte_rml_API_query_routed_fn_t get_routed;
} orte_rml_base_API_t;
/** Interface for RML communication */
ORTE_DECLSPEC extern orte_rml_base_API_t orte_rml;
ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml;
/* ******************************************************************** */
/* RML COMPONENT DEFINITION */
/**
* RML open_conduit
*
* Create an instance (module) of the given RML component. Upon
* returning, the module data structure should be fully populated and
* all functions should be usable and will have the conduit information.
*
* @param[in] opal_list_t of all attributes requested for the conduit.
* Each attribute will be key-value.
* [TODO] put in examples of the key-value here.
* @return Exactly one module created by the call to the component's
* initialization function should be returned. The module structure
* should be fully populated, and the priority should be set to a
* reasonable value.
*
* @retval NULL An error occurred and initialization did not occur
* @retval non-NULL The module was successfully initialized
*/
typedef orte_rml_base_module_t* (*orte_rml_component_open_conduit_fn_t)(opal_list_t *attributes);
/**
* Query the library to provide all the supported interfaces/transport
* providers in the current node/system.
*
*/
typedef orte_rml_pathway_t* (*orte_rml_component_query_transports_fn_t)(void);
/** Close conduit - allow the specific component to
* cleanup the module for this conduit
*/
typedef void (*orte_rml_module_close_conduit_fn_t)(orte_rml_base_module_t *mod);
/**
* RML component interface
*
@ -518,10 +330,6 @@ typedef struct orte_rml_component_t {
mca_base_component_data_t data;
/* Component priority */
int priority;
/* Component interface functions */
orte_rml_component_open_conduit_fn_t open_conduit;
orte_rml_component_query_transports_fn_t query_transports;
orte_rml_module_close_conduit_fn_t close_conduit;
} orte_rml_component_t;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -201,19 +201,6 @@ BEGIN_C_DECLS
*/
typedef uint32_t orte_rml_tag_t;
/* Conduit ID */
typedef uint16_t orte_rml_conduit_t;
#define ORTE_RML_CONDUIT_INVALID 0xff
/* define an object for reporting transports */
typedef struct {
opal_list_item_t super;
char *component;
opal_list_t attributes;
opal_list_t transports;
} orte_rml_pathway_t;
OBJ_CLASS_DECLARATION(orte_rml_pathway_t);
/* ******************************************************************** */

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright (c) 2007-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -32,38 +32,11 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_routed_base_framework;
ORTE_DECLSPEC int orte_routed_base_select(void);
typedef struct {
opal_list_item_t super;
int pri;
orte_routed_component_t *component;
orte_routed_module_t *module;
} orte_routed_base_active_t;
OBJ_CLASS_DECLARATION(orte_routed_base_active_t);
typedef struct {
opal_list_t actives;
bool routing_enabled;
} orte_routed_base_t;
ORTE_DECLSPEC extern orte_routed_base_t orte_routed_base;
/* base API wrapper functions */
ORTE_DECLSPEC char* orte_routed_base_assign_module(char *modules);
ORTE_DECLSPEC int orte_routed_base_delete_route(char *module, orte_process_name_t *proc);
ORTE_DECLSPEC int orte_routed_base_update_route(char *module, orte_process_name_t *target,
orte_process_name_t *route);
ORTE_DECLSPEC orte_process_name_t orte_routed_base_get_route(char *module,
orte_process_name_t *target);
ORTE_DECLSPEC int orte_routed_base_route_lost(char *module,
const orte_process_name_t *route);
ORTE_DECLSPEC bool orte_routed_base_route_is_defined(char *module,
const orte_process_name_t *target);
ORTE_DECLSPEC void orte_routed_base_update_routing_plan(char *module);
ORTE_DECLSPEC void orte_routed_base_get_routing_list(char *module, opal_list_t *coll);
ORTE_DECLSPEC int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc);
ORTE_DECLSPEC size_t orte_routed_base_num_routes(char *module);
ORTE_DECLSPEC int orte_routed_base_ft_event(char *module, int state);
/* specialized support functions */
ORTE_DECLSPEC void orte_routed_base_xcast_routing(opal_list_t *coll,
opal_list_t *my_children);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -37,219 +37,6 @@
#include "orte/mca/routed/base/base.h"
char* orte_routed_base_assign_module(char *modules)
{
orte_routed_base_active_t *active;
char **desired;
int i;
/* the incoming param contains a comma-delimited, prioritized
* list of desired routing modules. If it is NULL, then we
* simply return the module at the top of our list */
if (NULL == modules) {
active = (orte_routed_base_active_t*)opal_list_get_first(&orte_routed_base.actives);
return active->component->base_version.mca_component_name;
}
/* otherwise, cycle thru the provided list of desired modules
* and pick the highest priority one that matches */
desired = opal_argv_split(modules, ',');
for (i=0; NULL != desired[i]; i++) {
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (0 == strcasecmp(desired[i], active->component->base_version.mca_component_name)) {
opal_argv_free(desired);
return active->component->base_version.mca_component_name;
}
}
}
opal_argv_free(desired);
/* get here if none match */
return NULL;
}
int orte_routed_base_delete_route(char *module, orte_process_name_t *proc)
{
orte_routed_base_active_t *active;
int rc;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->delete_route) {
if (ORTE_SUCCESS != (rc = active->module->delete_route(proc))) {
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
int orte_routed_base_update_route(char *module, orte_process_name_t *target,
orte_process_name_t *route)
{
orte_routed_base_active_t *active;
int rc;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->update_route) {
if (ORTE_SUCCESS != (rc = active->module->update_route(target, route))) {
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
orte_process_name_t orte_routed_base_get_route(char *module, orte_process_name_t *target)
{
orte_routed_base_active_t *active;
/* a NULL module corresponds to direct */
if (!orte_routed_base.routing_enabled || NULL == module) {
return *target;
}
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->get_route) {
return active->module->get_route(target);
}
return *ORTE_NAME_INVALID;
}
}
return *ORTE_NAME_INVALID;
}
int orte_routed_base_route_lost(char *module, const orte_process_name_t *route)
{
orte_routed_base_active_t *active;
int rc;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->route_lost) {
if (ORTE_SUCCESS != (rc = active->module->route_lost(route))) {
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
bool orte_routed_base_route_is_defined(char *module, const orte_process_name_t *target)
{
orte_routed_base_active_t *active;
/* a NULL module corresponds to direct */
if (NULL == module) {
return true;
}
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->route_is_defined) {
return active->module->route_is_defined(target);
}
break;
}
}
/* if we didn't find the specified module, or it doesn't have
* the required API, then the route isn't defined */
return false;
}
void orte_routed_base_update_routing_plan(char *module)
{
orte_routed_base_active_t *active;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->update_routing_plan) {
active->module->update_routing_plan();
}
}
}
return;
}
void orte_routed_base_get_routing_list(char *module, opal_list_t *coll)
{
orte_routed_base_active_t *active;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->get_routing_list) {
active->module->get_routing_list(coll);
}
}
}
return;
}
int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc)
{
orte_routed_base_active_t *active;
int rc;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->set_lifeline) {
if (ORTE_SUCCESS != (rc = active->module->set_lifeline(proc))) {
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
size_t orte_routed_base_num_routes(char *module)
{
orte_routed_base_active_t *active;
size_t rc = 0;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->num_routes) {
rc += active->module->num_routes();
}
}
}
return rc;
}
int orte_routed_base_ft_event(char *module, int state)
{
orte_routed_base_active_t *active;
int rc;
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
if (NULL == module ||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
if (NULL != active->module->ft_event) {
if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) {
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
void orte_routed_base_xcast_routing(opal_list_t *coll, opal_list_t *my_children)
{
orte_routed_tree_t *child;

Просмотреть файл

@ -10,7 +10,7 @@
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -39,25 +39,11 @@
* component's public mca_base_component_t struct. */
#include "orte/mca/routed/base/static-components.h"
orte_routed_base_t orte_routed_base = {{{0}}};
orte_routed_API_t orte_routed = {
.assign_module = orte_routed_base_assign_module,
.delete_route = orte_routed_base_delete_route,
.update_route = orte_routed_base_update_route,
.get_route = orte_routed_base_get_route,
.route_lost = orte_routed_base_route_lost,
.route_is_defined = orte_routed_base_route_is_defined,
.set_lifeline = orte_routed_base_set_lifeline,
.update_routing_plan = orte_routed_base_update_routing_plan,
.get_routing_list = orte_routed_base_get_routing_list,
.num_routes = orte_routed_base_num_routes,
.ft_event = orte_routed_base_ft_event
};
orte_routed_base_t orte_routed_base = {0};
orte_routed_module_t orte_routed = {0};
static int orte_routed_base_open(mca_base_open_flag_t flags)
{
/* setup our list of actives */
OBJ_CONSTRUCT(&orte_routed_base.actives, opal_list_t);
/* start with routing DISABLED */
orte_routed_base.routing_enabled = false;
@ -67,14 +53,10 @@ static int orte_routed_base_open(mca_base_open_flag_t flags)
static int orte_routed_base_close(void)
{
orte_routed_base_active_t *active;
while (NULL != (active = (orte_routed_base_active_t *)opal_list_remove_first(&orte_routed_base.actives))) {
active->module->finalize();
OBJ_RELEASE(active);
orte_routed_base.routing_enabled = false;
if (NULL != orte_routed.finalize) {
orte_routed.finalize();
}
OPAL_LIST_DESTRUCT(&orte_routed_base.actives);
return mca_base_framework_components_close(&orte_routed_base_framework, NULL);
}
@ -82,69 +64,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, routed, "ORTE Message Routing Subsystem", NULL,
orte_routed_base_open, orte_routed_base_close,
mca_routed_base_static_components, 0);
static bool selected = false;
int orte_routed_base_select(void)
{
mca_base_component_list_item_t *cli=NULL;
orte_routed_component_t *component=NULL;
orte_routed_base_active_t *newmodule, *mod;
mca_base_module_t *module;
bool inserted;
int pri;
orte_routed_component_t *best_component = NULL;
orte_routed_module_t *best_module = NULL;
if (selected) {
return ORTE_SUCCESS;
}
selected = true;
OPAL_LIST_FOREACH(cli, &orte_routed_base_framework.framework_components, mca_base_component_list_item_t ) {
component = (orte_routed_component_t*) cli->cli_component;
opal_output_verbose(10, orte_routed_base_framework.framework_output,
"orte_routed_base_select: Initializing %s component %s",
component->base_version.mca_type_name,
component->base_version.mca_component_name);
if (ORTE_SUCCESS != component->base_version.mca_query_component(&module, &pri)) {
continue;
}
/* add to the list of available components */
newmodule = OBJ_NEW(orte_routed_base_active_t);
newmodule->pri = pri;
newmodule->component = component;
newmodule->module = (orte_routed_module_t*)module;
if (ORTE_SUCCESS != newmodule->module->initialize()) {
OBJ_RELEASE(newmodule);
continue;
}
/* maintain priority order */
inserted = false;
OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) {
if (newmodule->pri > mod->pri) {
opal_list_insert_pos(&orte_routed_base.actives,
(opal_list_item_t*)mod, &newmodule->super);
inserted = true;
break;
}
}
if (!inserted) {
/* must be lowest priority - add to end */
opal_list_append(&orte_routed_base.actives, &newmodule->super);
}
/*
* Select the best component
*/
if( OPAL_SUCCESS != mca_base_select("routed", orte_routed_base_framework.framework_output,
&orte_routed_base_framework.framework_components,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component, NULL) ) {
/* This will only happen if no component was selected */
/* If we didn't find one to select, that is an error */
return ORTE_ERROR;
}
if (4 < opal_output_get_verbosity(orte_routed_base_framework.framework_output)) {
opal_output(0, "%s: Final routed priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* show the prioritized list */
OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) {
opal_output(0, "\tComponent: %s Priority: %d", mod->component->base_version.mca_component_name, mod->pri);
}
/* Save the winner */
orte_routed = *best_module;
if (NULL != orte_routed.initialize) {
orte_routed.initialize();
}
return ORTE_SUCCESS;
}
@ -160,7 +101,3 @@ static void destruct(orte_routed_tree_t *rt)
OBJ_CLASS_INSTANCE(orte_routed_tree_t,
opal_list_item_t,
construct, destruct);
OBJ_CLASS_INSTANCE(orte_routed_base_active_t,
opal_list_item_t,
NULL, NULL);

Просмотреть файл

@ -6,7 +6,7 @@
* reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -223,12 +223,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
goto found;
}
/* if I am an application process, always route via my local daemon */
if (ORTE_PROC_IS_APP) {
ret = ORTE_PROC_MY_DAEMON;
goto found;
}
/* if I am a tool, the route is direct if target is in
* my own job family, and to the target's HNP if not
*/
@ -264,7 +258,13 @@ static orte_process_name_t get_route(orte_process_name_t *target)
}
}
/* if the jobid is different than our own, then this the target
/* if the target is our parent, then send it direct */
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_PARENT, target)) {
ret = ORTE_PROC_MY_PARENT;
goto found;
}
/* if the jobid is different than our own, then this target
* is a tool and we should go direct */
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
ret = target;
@ -273,10 +273,15 @@ static orte_process_name_t get_route(orte_process_name_t *target)
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
/* find out what daemon hosts this proc */
if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
ret = ORTE_NAME_INVALID;
goto found;
if (ORTE_PROC_MY_NAME->jobid == target->jobid) {
/* it's a daemon - no need to look it up */
daemon.vpid = target->vpid;
} else {
if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
ret = ORTE_NAME_INVALID;
goto found;
}
}
/* if the daemon is me, then send direct to the target! */

Просмотреть файл

@ -7,7 +7,7 @@
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -221,47 +221,8 @@ typedef struct {
orte_routed_module_ft_event_fn_t ft_event;
} orte_routed_module_t;
/* define an equivalent set of API functions - these will be implemented
* as "stubs" in the framework base */
typedef char* (*orte_routed_API_assign_module_fn_t)(char *modules);
typedef int (*orte_routed_API_delete_route_fn_t)(char *module,
orte_process_name_t *proc);
typedef int (*orte_routed_API_update_route_fn_t)(char *module,
orte_process_name_t *target,
orte_process_name_t *route);
typedef orte_process_name_t (*orte_routed_API_get_route_fn_t)(char *module,
orte_process_name_t *target);
typedef int (*orte_routed_API_route_lost_fn_t)(char *module,
const orte_process_name_t *route);
typedef bool (*orte_routed_API_route_is_defined_fn_t)(char *module,
const orte_process_name_t *target);
typedef void (*orte_routed_API_update_routing_plan_fn_t)(char *module);
typedef void (*orte_routed_API_get_routing_list_fn_t)(char *module, opal_list_t *coll);
typedef int (*orte_routed_API_set_lifeline_fn_t)(char *module, orte_process_name_t *proc);
typedef size_t (*orte_routed_API_num_routes_fn_t)(char *module);
typedef int (*orte_routed_API_ft_event_fn_t)(char *module, int state);
typedef struct {
/* API functions */
orte_routed_API_assign_module_fn_t assign_module;
orte_routed_API_delete_route_fn_t delete_route;
orte_routed_API_update_route_fn_t update_route;
orte_routed_API_get_route_fn_t get_route;
orte_routed_API_route_lost_fn_t route_lost;
orte_routed_API_route_is_defined_fn_t route_is_defined;
orte_routed_API_set_lifeline_fn_t set_lifeline;
/* fns for daemons */
orte_routed_API_update_routing_plan_fn_t update_routing_plan;
orte_routed_API_get_routing_list_fn_t get_routing_list;
orte_routed_API_num_routes_fn_t num_routes;
/* FT Notification */
orte_routed_API_ft_event_fn_t ft_event;
} orte_routed_API_t;
/* provide an interface to the routed framework stub functions */
ORTE_DECLSPEC extern orte_routed_API_t orte_routed;
ORTE_DECLSPEC extern orte_routed_module_t orte_routed;
/* ******************************************************************** */

Просмотреть файл

@ -9,7 +9,7 @@
* All rights reserved.
* Copyright (c) 2007 Evergrid, Inc. All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -767,8 +767,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer,
}
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
peer, loc_buffer,
if (0 > (ret = orte_rml.send_buffer_nb(peer, loc_buffer,
ORTE_RML_TAG_CKPT,
orte_rml_send_callback, NULL))) {
opal_output(orte_snapc_base_framework.framework_output,

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -508,8 +508,7 @@ void orte_state_base_notify_data_server(orte_process_name_t *target)
}
/* send the request to the server */
rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&orte_pmix_server_globals.server, buf,
rc = orte_rml.send_buffer_nb(&orte_pmix_server_globals.server, buf,
ORTE_RML_TAG_DATA_SERVER,
orte_rml_send_callback, NULL);
if (ORTE_SUCCESS != rc) {
@ -617,8 +616,7 @@ static void _send_notification(int status,
ORTE_ERROR_NAME(status),
ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(&daemon));
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&daemon, buf,
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf,
ORTE_RML_TAG_NOTIFICATION,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -635,7 +633,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
orte_job_t *jdata;
orte_proc_t *pdata;
int i;
char *rtmod;
orte_process_name_t parent, target;
ORTE_ACQUIRE_OBJECT(caddy);
@ -648,9 +645,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
ORTE_NAME_PRINT(proc),
orte_proc_state_to_str(state));
/* get our "lifeline" routed module */
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
/* get the job object for this proc */
if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
@ -722,7 +716,7 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
* remain (might be some from another job)
*/
if (orte_orteds_term_ordered &&
0 == orte_routed.num_routes(rtmod)) {
0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) {
@ -783,7 +777,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
int32_t i32, *i32ptr;
uint32_t u32;
void *nptr;
char *rtmod;
ORTE_ACQUIRE_OBJECT(caddy);
jdata = caddy->jdata;
@ -793,10 +786,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));
/* get our "lifeline" routed module */
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
/* just check to see if the daemons are complete */
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
@ -864,7 +853,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
*/
CHECK_DAEMONS:
if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
if (0 == orte_routed.num_routes(rtmod)) {
if (0 == orte_routed.num_routes()) {
/* orteds are done! */
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s orteds complete - exiting",

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -326,8 +326,7 @@ static void _send_notification(int status,
ORTE_ERROR_NAME(status),
ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(&daemon));
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&daemon, buf,
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf,
ORTE_RML_TAG_NOTIFICATION,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright (c) 2011-2017 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -239,8 +239,7 @@ static void track_jobs(int fd, short argc, void *cbdata)
}
/* send it */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -262,7 +261,6 @@ static void track_procs(int fd, short argc, void *cbdata)
opal_buffer_t *alert;
int rc, i;
orte_plm_cmd_flag_t cmd;
char *rtmod;
orte_std_cntr_t index;
orte_job_map_t *map;
orte_node_t *node;
@ -333,8 +331,7 @@ static void track_procs(int fd, short argc, void *cbdata)
}
}
/* send it */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
@ -391,9 +388,8 @@ static void track_procs(int fd, short argc, void *cbdata)
* gone, then terminate ourselves IF no local procs
* remain (might be some from another job)
*/
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
if (orte_orteds_term_ordered &&
0 == orte_routed.num_routes(rtmod)) {
0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) {
@ -431,8 +427,7 @@ static void track_procs(int fd, short argc, void *cbdata)
"%s state:orted: SENDING JOB LOCAL TERMINATION UPDATE FOR JOB %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)));
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, alert,
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -14,7 +14,7 @@
* reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -54,11 +54,12 @@
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_progress.h"
#include "opal/dss/dss.h"
#include "opal/mca/compress/compress.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/util/name_fns.h"
#include "orte/util/compress.h"
#include "orte/util/nidmap.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/base/base.h"
@ -122,11 +123,10 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
char string[256], *string_ptr = string;
float pss;
opal_pstats_t pstat;
char *rtmod;
char *coprocessors;
orte_job_map_t *map;
int8_t flag;
uint8_t *cmpdata;
uint8_t *cmpdata, u8;
size_t cmplen;
/* unpack the command */
@ -241,6 +241,32 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
}
break;
case ORTE_DAEMON_PASS_NODE_INFO_CMD:
if (orte_debug_daemons_flag) {
opal_output(0, "%s orted_cmd: received pass_node_info",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
if (!ORTE_PROC_IS_HNP) {
n = 1;
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &u8, &n, OPAL_UINT8))) {
ORTE_ERROR_LOG(ret);
goto CLEANUP;
}
if (1 == u8) {
if (ORTE_SUCCESS != (ret = orte_util_decode_nidmap(buffer))) {
ORTE_ERROR_LOG(ret);
goto CLEANUP;
}
}
if (ORTE_SUCCESS != (ret = orte_util_parse_node_info(buffer))) {
ORTE_ERROR_LOG(ret);
goto CLEANUP;
}
}
break;
/**** ADD_LOCAL_PROCS ****/
case ORTE_DAEMON_ADD_LOCAL_PROCS:
case ORTE_DAEMON_DVM_ADD_PROCS:
@ -355,8 +381,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
/* flag that orteds were ordered to terminate */
orte_orteds_term_ordered = true;
/* if all my routes and local children are gone, then terminate ourselves */
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
if (0 == (ret = orte_routed.num_routes(rtmod))) {
if (0 == (ret = orte_routed.num_routes())) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
@ -398,8 +423,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
orte_orteds_term_ordered = true;
if (ORTE_PROC_IS_HNP) {
/* if all my routes and local children are gone, then terminate ourselves */
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
if (0 == orte_routed.num_routes(rtmod)) {
if (0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
@ -499,8 +523,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
break;
}
/* send the buffer to our IOF */
orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP,
orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP,
orte_rml_send_callback, NULL);
}
for (i=1; i < orte_node_pool->size; i++) {
@ -639,7 +662,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
free(coprocessors);
}
answer = OBJ_NEW(opal_buffer_t);
if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used,
if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used,
&cmpdata, &cmplen)) {
/* the data was compressed - mark that we compressed it */
flag = 1;
@ -691,8 +714,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
OBJ_DESTRUCT(&data);
}
/* send the data */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -722,8 +744,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
goto CLEANUP;
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -748,8 +769,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
OBJ_RELEASE(answer);
goto CLEANUP;
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -818,8 +838,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
rc = opal_hash_table_get_next_key_uint32(orte_job_data, &u32, (void **)&jobdat, nptr, &nptr);
}
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -845,8 +864,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
OBJ_RELEASE(answer);
goto CLEANUP;
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -915,8 +933,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
}
}
/* send the info */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -942,8 +959,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
OBJ_RELEASE(answer);
goto CLEANUP;
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -1061,8 +1077,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
}
}
/* send the info */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -1120,8 +1135,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
goto SEND_TOP_ANSWER;
}
/* the callback function will release relay_msg buffer */
if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit,
&proc2, relay_msg,
if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg,
ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
@ -1172,8 +1186,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
goto SEND_TOP_ANSWER;
}
/* the callback function will release relay_msg buffer */
if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit,
&proc2, relay_msg,
if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg,
ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
@ -1237,8 +1250,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
ret = ORTE_ERR_COMM_FAILURE;
break;
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
return_addr, answer, ORTE_RML_TAG_TOOL,
if (0 > (ret = orte_rml.send_buffer_nb(return_addr, answer, ORTE_RML_TAG_TOOL,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(answer);
@ -1316,8 +1328,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
free(gstack_exec);
}
/* always send our response */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, answer,
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer,
ORTE_RML_TAG_STACK_TRACE,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
@ -1355,8 +1366,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
}
opal_dss.pack(answer, &pss, 1, OPAL_FLOAT);
/* send it back */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, answer,
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer,
ORTE_RML_TAG_MEMPROFILE,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -16,7 +16,7 @@
* Copyright (c) 2009 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -67,22 +67,22 @@
#include "opal/dss/dss.h"
#include "opal/mca/hwloc/hwloc-internal.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/compress/compress.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/parse_options.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/util/pre_condition_transports.h"
#include "orte/util/compress.h"
#include "orte/util/threads.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/grpcomm/base/base.h"
#include "orte/mca/regx/regx.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/odls/odls.h"
@ -221,10 +221,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Whether to report process bindings to stderr" },
{ "orte_node_regex", '\0', "nodes", "nodes", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Regular expression defining nodes in system" },
/* End of list */
{ NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@ -716,19 +712,19 @@ int orte_daemon(int argc, char *argv[])
/* tell the routed module that we have a path
* back to the HNP
*/
if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) {
if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) {
ORTE_ERROR_LOG(ret);
goto DONE;
}
/* and a path to our parent */
if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) {
if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) {
ORTE_ERROR_LOG(ret);
goto DONE;
}
/* set the lifeline to point to our parent so that we
* can handle the situation if that lifeline goes away
*/
if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(NULL, ORTE_PROC_MY_PARENT))) {
if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(ORTE_PROC_MY_PARENT))) {
ORTE_ERROR_LOG(ret);
goto DONE;
}
@ -747,7 +743,7 @@ int orte_daemon(int argc, char *argv[])
/* define the target jobid */
target.jobid = ORTE_PROC_MY_NAME->jobid;
if (orte_fwd_mpirun_port || orte_static_ports || NULL != orte_parent_uri) {
if (NULL != orte_parent_uri) {
/* we start by sending to ourselves */
target.vpid = ORTE_PROC_MY_NAME->vpid;
/* since we will be waiting for any children to send us
@ -755,13 +751,10 @@ int orte_daemon(int argc, char *argv[])
* a little time in the launch phase by "warming up" the
* connection to our parent while we wait for our children */
buffer = OBJ_NEW(opal_buffer_t); // zero-byte message
if (NULL == orte_node_regex) {
orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT,
ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting);
node_regex_waiting = true;
}
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_PARENT, buffer,
node_regex_waiting = true;
orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT,
ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting);
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, buffer,
ORTE_RML_TAG_WARMUP_CONNECTION,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
@ -917,7 +910,7 @@ int orte_daemon(int argc, char *argv[])
if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(ret);
}
if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used,
if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used,
&cmpdata, &cmplen)) {
/* the data was compressed - mark that we compressed it */
flag = 1;
@ -961,8 +954,7 @@ int orte_daemon(int argc, char *argv[])
}
/* send it to the designated target */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&target, buffer,
if (0 > (ret = orte_rml.send_buffer_nb(&target, buffer,
ORTE_RML_TAG_ORTED_CALLBACK,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
@ -1020,10 +1012,6 @@ int orte_daemon(int argc, char *argv[])
i += 2;
}
}
if (NULL != orte_node_regex) {
/* now launch any child daemons of ours */
orte_plm.remote_spawn();
}
}
if (orte_debug_daemons_flag) {
@ -1150,19 +1138,16 @@ static void rollup(int status, orte_process_name_t* sender,
}
static void report_orted() {
char *rtmod;
int nreqd, ret;
/* get the number of children */
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
nreqd = orte_routed.num_routes(rtmod) + 1;
nreqd = orte_routed.num_routes() + 1;
if (nreqd == ncollected && NULL != mybucket && !node_regex_waiting) {
/* add the collection of our children's buckets to ours */
opal_dss.copy_payload(mybucket, bucket);
OBJ_RELEASE(bucket);
/* relay this on to our parent */
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_PARENT, mybucket,
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, mybucket,
ORTE_RML_TAG_ORTED_CALLBACK,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
@ -1174,27 +1159,18 @@ static void report_orted() {
static void node_regex_report(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata) {
int rc, n=1;
char * regex;
assert(NULL == orte_node_regex);
int rc;
bool * active = (bool *)cbdata;
/* extract the node regex if needed, and update the routing tree */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &regex, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return;
}
orte_node_regex = regex;
if (ORTE_SUCCESS != (rc = orte_regx.nidmap_parse(orte_node_regex))) {
/* extract the node info if needed, and update the routing tree */
if (ORTE_SUCCESS != (rc = orte_util_decode_nidmap(buffer))) {
ORTE_ERROR_LOG(rc);
return;
}
/* update the routing tree so any tree spawn operation
* properly gets the number of children underneath us */
orte_routed.update_routing_plan(NULL);
orte_routed.update_routing_plan();
*active = false;

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
@ -575,14 +575,14 @@ int orte_submit_init(int argc, char *argv[],
OBJ_DESTRUCT(&val);
/* set the route to be direct */
if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
orte_finalize();
exit(1);
}
/* set the target hnp as our lifeline so we will terminate if it exits */
orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP);
orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
/* setup to listen for HNP response to my commands */
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFY_COMPLETE,
@ -700,8 +700,7 @@ int orte_submit_cancel(int index) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -724,8 +723,7 @@ int orte_submit_halt(void)
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, req,
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req,
ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL);
if (ORTE_SUCCESS != rc) {
@ -1146,8 +1144,7 @@ int orte_submit_job(char *argv[], int *index,
ORTE_ERROR_LOG(rc);
return rc;
}
orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL);
/* Inform the caller of the tracker index if they passed a index pointer */
@ -3378,8 +3375,7 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
for (i=0; i < nreports; i++) {
OBJ_RETAIN(buffer);
name.vpid = i;
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
&name, buffer,
if (0 > (rc = orte_rml.send_buffer_nb(&name, buffer,
ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
@ -391,8 +391,7 @@ static void send_error(int status, opal_process_name_t *idreq,
}
/* send the response */
orte_rml.send_buffer_nb(orte_mgmt_conduit,
remote, reply,
orte_rml.send_buffer_nb(remote, reply,
ORTE_RML_TAG_DIRECT_MODEX_RESP,
orte_rml_send_callback, NULL);
return;
@ -435,8 +434,7 @@ static void _mdxresp(int sd, short args, void *cbdata)
opal_dss.copy_payload(reply, &req->msg);
/* send the response */
orte_rml.send_buffer_nb(orte_mgmt_conduit,
&req->proxy, reply,
orte_rml.send_buffer_nb(&req->proxy, reply,
ORTE_RML_TAG_DIRECT_MODEX_RESP,
orte_rml_send_callback, NULL);

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
@ -143,8 +143,7 @@ static void spawn(int sd, short args, void *cbdata)
}
/* send it to the HNP for processing - might be myself! */
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_HNP, buf,
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
ORTE_RML_TAG_PLM,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше