1
1

Enable modex-less launch. Consists of:

1. minor modification to include two new opal MCA params:
   (a) opal_profile: outputs what components were selected by each framework
       currently enabled for most, but not all, frameworks
   (b) opal_profile_file: name of file that contains profile info required
       for modex

2. introduction of two new tools:
   (a) ompi-probe: MPI process that simply calls MPI_Init/Finalize with
       opal_profile set. Also reports back the rml IP address for all
       interfaces on the node
   (b) ompi-profiler: uses ompi-probe to create the profile_file, also
       reports out a summary of what framework components are actually
       being used to help with configuration options

3. modification of the grpcomm basic component to utilize the
   profile file in place of the modex where possible

4. modification of orterun so it properly sees opal mca params and
   handles opal_profile correctly to ensure we don't get its profile

5. similar mod to orted as for orterun

6. addition of new test that calls orte_init followed by calls to
   grpcomm.barrier

This is all completely benign unless actively selected. At the moment, it only supports modex-less launch for openib-based systems. Minor mod to the TCP btl would be required to enable it as well, if people are interested. Similarly, anyone interested in enabling other BTL's for modex-less operation should let me know and I'll give you the magic details.

This seems to significantly improve scalability provided the file can be locally located on the nodes. I'm looking at an alternative means of disseminating the info (perhaps in launch message) as an option for removing that constraint.

This commit was SVN r20098.
Этот коммит содержится в:
Ralph Castain 2008-12-09 23:49:02 +00:00
родитель ba359623e0
Коммит 1ace83c470
27 изменённых файлов: 1597 добавлений и 70 удалений

Просмотреть файл

@ -1373,6 +1373,8 @@ AC_CONFIG_FILES([
ompi/tools/wrappers/mpif90-wrapper-data.txt ompi/tools/wrappers/mpif90-wrapper-data.txt
ompi/tools/ortetools/Makefile ompi/tools/ortetools/Makefile
ompi/tools/ompi-server/Makefile ompi/tools/ompi-server/Makefile
ompi/tools/ompi-probe/Makefile
ompi/tools/ompi-profiler/Makefile
test/Makefile test/Makefile
test/event/Makefile test/event/Makefile

Просмотреть файл

@ -21,6 +21,8 @@
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/runtime/opal.h"
int mca_bml_base_output = -1; int mca_bml_base_output = -1;
mca_bml_base_module_t mca_bml = { mca_bml_base_module_t mca_bml = {
@ -86,6 +88,9 @@ int mca_bml_base_init( bool enable_progress_threads,
else { else {
mca_bml_component = *best_component; mca_bml_component = *best_component;
mca_bml = *best_module; mca_bml = *best_module;
if (opal_profile) {
opal_output(0, "bml:%s", mca_bml_component.bml_version.mca_component_name);
}
return mca_base_components_close(mca_bml_base_output, return mca_base_components_close(mca_bml_base_output,
&mca_bml_base_components_available, &mca_bml_base_components_available,
(mca_base_component_t*) best_component); (mca_base_component_t*) best_component);

Просмотреть файл

@ -28,6 +28,7 @@
#include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h" #include "ompi/mca/btl/base/base.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "opal/runtime/opal.h"
OBJ_CLASS_INSTANCE( mca_btl_base_selected_module_t, OBJ_CLASS_INSTANCE( mca_btl_base_selected_module_t,
opal_list_item_t, opal_list_item_t,
@ -132,6 +133,10 @@ int mca_btl_base_select(bool enable_progress_threads,
"select: init of component %s returned success", "select: init of component %s returned success",
component->btl_version.mca_component_name); component->btl_version.mca_component_name);
if (opal_profile) {
opal_output(0, "btl:%s", component->btl_version.mca_component_name);
}
for (i = 0; i < num_btls; ++i) { for (i = 0; i < num_btls; ++i) {
sm = OBJ_NEW(mca_btl_base_selected_module_t); sm = OBJ_NEW(mca_btl_base_selected_module_t);
if (NULL == sm) { if (NULL == sm) {

Просмотреть файл

@ -24,6 +24,7 @@
#include "opal/runtime/opal_progress.h" #include "opal/runtime/opal_progress.h"
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "opal/runtime/opal.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h" #include "orte/util/name_fns.h"
@ -248,6 +249,10 @@ int mca_pml_base_select(bool enable_progress_threads,
"select: component %s selected", "select: component %s selected",
mca_pml_base_selected_component.pmlm_version.mca_component_name ); mca_pml_base_selected_component.pmlm_version.mca_component_name );
if (opal_profile) {
opal_output(0, "pml:%s", mca_pml_base_selected_component.pmlm_version.mca_component_name );
}
/* This base function closes, unloads, and removes from the /* This base function closes, unloads, and removes from the
available list all unselected components. The available list will available list all unselected components. The available list will
contain only the selected component. */ contain only the selected component. */

Просмотреть файл

@ -25,11 +25,14 @@ SUBDIRS += \
tools/ompi_info \ tools/ompi_info \
tools/wrappers \ tools/wrappers \
tools/ortetools \ tools/ortetools \
tools/ompi-server tools/ompi-server \
tools/ompi-probe \
tools/ompi-profiler
DIST_SUBDIRS += \ DIST_SUBDIRS += \
tools/ompi_info \ tools/ompi_info \
tools/wrappers \ tools/wrappers \
tools/ortetools \ tools/ortetools \
tools/ompi-server tools/ompi-server \
tools/ompi-probe \
tools/ompi-profiler

47
ompi/tools/ompi-probe/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,47 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.man-page-rules
man_pages = ompi-probe.1
EXTRA_DIST = $(man_pages:.1=.1in)
if OMPI_INSTALL_BINARIES
if !ORTE_DISABLE_FULL_SUPPORT
bin_PROGRAMS = ompi-probe
dist_pkgdata_DATA = help-ompi-probe.txt
nodist_man_MANS = $(man_pages)
# Ensure that the man pages are rebuilt if the opal_config.h file
# changes; a "good enough" way to know if configure was run again (and
# therefore the release date or version may have changed)
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
endif # !ORTE_DISABLE_FULL_SUPPORT
endif # OMPI_INSTALL_BINARIES
ompi_probe_SOURCES = ompi-probe.c
ompi_probe_LDADD = $(top_builddir)/ompi/libmpi.la
distclean-local:
rm -f $(man_pages)

27
ompi/tools/ompi-probe/help-ompi-probe.txt Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open MPI's ompi-probe tool.
#
[ompiprobe:usage]
Probe a set of nodes to determine their configuration
Usage: %s [OPTIONS]
%s

72
ompi/tools/ompi-probe/ompi-probe.1in Обычный файл
Просмотреть файл

@ -0,0 +1,72 @@
.\"
.\" Copyright (c) 2007 Los Alamos National Security, LLC
.\" All rights reserved.
.\" Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
.\"
.\" Man page for OMPI's ompi-server command
.\"
.\" .TH name section center-footer left-footer center-header
.TH OMPI-SERVER 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
.\" **************************
.\" Name Section
.\" **************************
.SH NAME
.
ompi-server \- Server for supporting name publish/lookup operations.
.
.PP
.
.\" **************************
.\" Synopsis Section
.\" **************************
.SH SYNOPSIS
.
.BR ompi-server " [ options ]"
.
.\" **************************
.\" Options Section
.\" **************************
.SH Options
.
\fIompi-server\fR acts as a data server for Open MPI jobs to exchange
contact information in support of MPI-2's Publish_name and Lookup_name
functions.
.
.TP 10
.B -h | --help
Display help for this command
.
.
.TP
.B -d | --debug
Enable verbose output for debugging
.
.
.TP
.B -r | --report-uri \fR<value>\fP
Report the Open MPI contact information for the server. This information is
required for MPI jobs to use the data server. Three parameter values are supported:
(a) '-', indicating that the uri is to be printed to stdout; (b) '+', indicating that
the uri is to be printed to stderr; and (c) "file:path-to-file", indicating that
the uri is to be printed to the specified file. The "path-to-file" can be either
absolute or relative, but must be in a location where the user has write
permissions. Please note that the resulting file must be read-accessible to
expected users of the server.
.
.
.\" **************************
.\" Description Section
.\" **************************
.SH DESCRIPTION
.
.PP
\fIompi-server\fR acts as a data server for Open MPI jobs to exchange
contact information in support of MPI-2's Publish_name and Lookup_name
functions.
.
.\" **************************
.\" See Also Section
.\" **************************
.
.SH SEE ALSO
.

157
ompi/tools/ompi-probe/ompi-probe.c Обычный файл
Просмотреть файл

@ -0,0 +1,157 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mpi.h"
#include <stdio.h>
#include <ctype.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <stdlib.h>
#include "opal/dss/dss.h"
#include "opal/mca/base/base.h"
#include "opal/util/opal_environ.h"
#include "opal/runtime/opal.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/proc_info.h"
#include "orte/runtime/orte_globals.h"
/*
* Globals
*/
int main(int argc, char *argv[])
{
char * tmp_env_var = NULL;
char *rml_uri;
opal_buffer_t buffer;
char *attr = "oob.tcp";
int32_t len;
int rc;
/* init enough of opal to use a few utilities */
if (OPAL_SUCCESS != opal_init_util()) {
fprintf(stderr, "OPAL failed to initialize -- ompi-probe aborting\n");
exit(1);
}
#if OPAL_ENABLE_FT == 1
/* Disable the checkpoint notification routine for this
* tool. As we will never need to checkpoint this tool.
* Note: This must happen before opal_init().
*/
opal_cr_set_enabled(false);
/* Select the none component, since we don't actually use a checkpointer */
tmp_env_var = mca_base_param_env_var("crs");
opal_setenv(tmp_env_var,
"none",
true, &environ);
free(tmp_env_var);
tmp_env_var = NULL;
/* Mark as a tool program */
tmp_env_var = mca_base_param_env_var("opal_cr_is_tool");
opal_setenv(tmp_env_var,
"1",
true, &environ);
free(tmp_env_var);
#endif
tmp_env_var = NULL; /* Silence compiler warning */
/* open up and select all the frameworks - this will generate the
* profiled output
*/
MPI_Init(NULL, NULL);
/* get our RML uri */
rml_uri = orte_rml.get_contact_info();
if (NULL != rml_uri) {
char *ptr, *endip, *ipout=NULL, *tmp;
endip = rml_uri;
/* remove the non-IP info */
while (NULL != (ptr = strchr(endip, '/'))) {
/* next position is the second '/' */
ptr += 2;
/* now look for ':' */
endip = strchr(ptr, ':');
if (NULL == endip) {
/* got an error - just dump this */
free(rml_uri);
goto CLEANUP;
}
*endip = '\0';
if (NULL == ipout) {
ipout = strdup(ptr);
} else {
asprintf(&tmp, "%s:%s", ipout, ptr);
free(ipout);
ipout = tmp;
}
ptr = endip + 1;
}
/* send the result to the HNP*/
OBJ_CONSTRUCT(&buffer, opal_buffer_t);
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &orte_process_info.nodename, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto skip;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &attr, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto skip;
}
len = strlen(ipout);
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &len, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
goto skip;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &ipout, len, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
goto skip;
}
orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buffer, ORTE_RML_TAG_GRPCOMM_PROFILE, 0);
skip:
OBJ_DESTRUCT(&buffer);
/* cleanup */
free(rml_uri);
}
CLEANUP:
/* Finalize and clean up ourselves */
MPI_Finalize();
return 0;
}

47
ompi/tools/ompi-profiler/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,47 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.man-page-rules
man_pages = ompi-profiler.1
EXTRA_DIST = $(man_pages:.1=.1in)
if OMPI_INSTALL_BINARIES
if !ORTE_DISABLE_FULL_SUPPORT
bin_PROGRAMS = ompi-profiler
dist_pkgdata_DATA = help-ompi-profiler.txt
nodist_man_MANS = $(man_pages)
# Ensure that the man pages are rebuilt if the opal_config.h file
# changes; a "good enough" way to know if configure was run again (and
# therefore the release date or version may have changed)
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
endif # !ORTE_DISABLE_FULL_SUPPORT
endif # OMPI_INSTALL_BINARIES
ompi_profiler_SOURCES = ompi-profiler.c
ompi_profiler_LDADD = $(top_builddir)/ompi/libmpi.la
distclean-local:
rm -f $(man_pages)

Просмотреть файл

@ -0,0 +1,37 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open MPI's ompi-probe tool.
#
[ompi-profiler:usage]
ompi-profiler (Open MPI) version %s
Profile Open MPI to determine what frameworks are being used, and to
report out information that can be used to tailor Open MPI's behavior.
Usage: ompi-profiler [OPTIONS]
Report bugs to %s
#
[ompi-profiler:version]
ompi-profiler (Open MPI) version %s
Report bugs to %s

72
ompi/tools/ompi-profiler/ompi-profiler.1in Обычный файл
Просмотреть файл

@ -0,0 +1,72 @@
.\"
.\" Copyright (c) 2007 Los Alamos National Security, LLC
.\" All rights reserved.
.\" Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
.\"
.\" Man page for OMPI's ompi-server command
.\"
.\" .TH name section center-footer left-footer center-header
.TH OMPI-SERVER 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
.\" **************************
.\" Name Section
.\" **************************
.SH NAME
.
ompi-server \- Server for supporting name publish/lookup operations.
.
.PP
.
.\" **************************
.\" Synopsis Section
.\" **************************
.SH SYNOPSIS
.
.BR ompi-server " [ options ]"
.
.\" **************************
.\" Options Section
.\" **************************
.SH Options
.
\fIompi-server\fR acts as a data server for Open MPI jobs to exchange
contact information in support of MPI-2's Publish_name and Lookup_name
functions.
.
.TP 10
.B -h | --help
Display help for this command
.
.
.TP
.B -d | --debug
Enable verbose output for debugging
.
.
.TP
.B -r | --report-uri \fR<value>\fP
Report the Open MPI contact information for the server. This information is
required for MPI jobs to use the data server. Three parameter values are supported:
(a) '-', indicating that the uri is to be printed to stdout; (b) '+', indicating that
the uri is to be printed to stderr; and (c) "file:path-to-file", indicating that
the uri is to be printed to the specified file. The "path-to-file" can be either
absolute or relative, but must be in a location where the user has write
permissions. Please note that the resulting file must be read-accessible to
expected users of the server.
.
.
.\" **************************
.\" Description Section
.\" **************************
.SH DESCRIPTION
.
.PP
\fIompi-server\fR acts as a data server for Open MPI jobs to exchange
contact information in support of MPI-2's Publish_name and Lookup_name
functions.
.
.\" **************************
.\" See Also Section
.\" **************************
.
.SH SEE ALSO
.

457
ompi/tools/ompi-profiler/ompi-profiler.c Обычный файл
Просмотреть файл

@ -0,0 +1,457 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/version.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include "opal/class/opal_list.h"
#include "opal/util/argv.h"
#include "opal/runtime/opal.h"
#include "opal/util/cmd_line.h"
#include "opal/mca/base/base.h"
#include "opal/util/os_path.h"
#include "opal/util/path.h"
#include "opal/util/opal_environ.h"
#include "opal/util/show_help.h"
/*
* Globals
*/
typedef struct {
opal_list_item_t item;
char *name;
opal_list_t frameworks;
} orte_profile_node_t;
static void node_constructor(orte_profile_node_t *ptr)
{
ptr->name = NULL;
OBJ_CONSTRUCT(&ptr->frameworks, opal_list_t);
}
static void node_destructor(orte_profile_node_t *ptr)
{
if (NULL != ptr->name) {
free(ptr->name);
}
OBJ_DESTRUCT(&ptr->frameworks);
}
OBJ_CLASS_INSTANCE(orte_profile_node_t,
opal_list_item_t,
node_constructor,
node_destructor);
typedef struct {
opal_list_item_t item;
int num_nodes;
char *framework;
char *component;
char *params;
} orte_profile_t;
static void profile_constructor(orte_profile_t *ptr)
{
ptr->num_nodes = 0;
ptr->framework = NULL;
ptr->component = NULL;
ptr->params = NULL;
}
static void profile_destructor(orte_profile_t *ptr)
{
if (NULL != ptr->framework) {
free(ptr->framework);
}
if (NULL != ptr->component) {
free(ptr->component);
}
if (NULL != ptr->params) {
free(ptr->params);
}
}
OBJ_CLASS_INSTANCE(orte_profile_t,
opal_list_item_t,
profile_constructor,
profile_destructor);
static void read_file(opal_list_t *nodes, FILE *fp);
/* global variables */
static bool help = false;
static bool version = false;
static bool verbose = false;
static bool configout = false;
static char *profilefile = NULL;
static bool report = false;
static opal_cmd_line_init_t cmd_line_init[] = {
/* Various "obvious" options */
{ NULL, NULL, NULL, 'h', "help", "help", 0,
&help, OPAL_CMD_LINE_TYPE_BOOL,
"This help message" },
{ NULL, NULL, NULL, 'V', "version", "version", 0,
&version, OPAL_CMD_LINE_TYPE_BOOL,
"Print version and exit" },
{ NULL, NULL, NULL, '\0', "verbose", "verbose", 0,
&verbose, OPAL_CMD_LINE_TYPE_BOOL,
"Print version and exit" },
{ NULL, NULL, NULL, '\0', "config", "config", 0,
&configout, OPAL_CMD_LINE_TYPE_BOOL,
"Print framework/component usage" },
{ NULL, NULL, NULL, '\0', "profile", "profile", 1,
&profilefile, OPAL_CMD_LINE_TYPE_STRING,
"File to update with system profile parameters" },
{ NULL, NULL, NULL, '\0', "report", "report", 0,
&report, OPAL_CMD_LINE_TYPE_BOOL,
"Print out a report of the data in the given profile file" },
/* End of list */
{ NULL, NULL, NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
};
int main(int argc, char *argv[])
{
opal_list_t nodes, frames;
opal_list_item_t *item, *item2, *itemold;
orte_profile_node_t *node;
orte_profile_t *profile, *frame;
int ret;
int len;
opal_cmd_line_t cmd_line;
char *args = NULL;
char *configfile=NULL;
char *cmd;
opal_cmd_line_create(&cmd_line, cmd_line_init);
mca_base_cmd_line_setup(&cmd_line);
if (OPAL_SUCCESS != (ret = opal_cmd_line_parse(&cmd_line, true,
argc, argv)) ) {
return ret;
}
/* init enough of opal to use a few utilities */
if (OPAL_SUCCESS != opal_init_util()) {
fprintf(stderr, "OPAL failed to initialize -- ompi-profiler aborting\n");
exit(1);
}
/* check for some simple options */
if (help) {
args = opal_cmd_line_get_usage_msg(&cmd_line);
opal_show_help("help-ompi-profiler.txt", "ompi-profiler:usage", false,
OPAL_VERSION, args, PACKAGE_BUGREPORT);
free(args);
/* If someone asks for help, that should be all we do */
exit(0);
}
if (version) {
opal_show_help("help-ompi-profiler.txt", "ompi-profiler:version", false,
OPAL_VERSION, PACKAGE_BUGREPORT);
exit(0);
}
if (report) {
int fd;
int32_t num_bytes;
char *nodename, *attr;
char data[8192];
/* just read the given file and print out a report */
if (NULL == profilefile) {
opal_show_help("help-ompi-profiler.txt", "ompi-profiler:report-wo-file", false);
exit(1);
}
fd = open(profilefile, O_RDONLY);
if (fd < 0) {
opal_show_help("help-ompi-profiler.txt", "ompi-profiler:report-file-not-found", false);
exit(1);
}
/* loop through file until end */
while (0 < read(fd, &num_bytes, sizeof(num_bytes))) {
/* this is the number of bytes in the nodename */
memset(data, 0, sizeof(data));
if (0 > read(fd, data, num_bytes)) {
fprintf(stderr, "ompi-profiler: node name not found\n");
close(fd);
exit(0);
}
/* this is the nodename - save it */
nodename = strdup(data);
/* get the number of bytes in the attribute name */
if (0 > read(fd, &num_bytes, sizeof(num_bytes))) {
fprintf(stderr, "ompi-profiler: attribute size not found\n");
close(fd);
exit(0);
}
/* get the attribute name */
memset(data, 0, sizeof(data));
if (0 > read(fd, data, num_bytes)) {
fprintf(stderr, "ompi-profiler: attribute name not found\n");
close(fd);
exit(0);
}
/* remove the newline and save it */
attr = strdup(data);
/* read the number of bytes in the blob */
if (0 > read(fd, &num_bytes, sizeof(num_bytes))) {
fprintf(stderr, "ompi-profiler: data size not found\n");
close(fd);
exit(0);
}
/* read the bytes so we position ourselves */
if (0 > read(fd, data, num_bytes)) {
fprintf(stderr, "ompi-profiler: data not found\n");
close(fd);
exit(0);
}
/* report the results */
fprintf(stdout, "Node %s reported %d bytes for attribute %s\n",
nodename, num_bytes, attr);
free(nodename);
free(attr);
}
exit(0);
}
/* do a quick sanity check - since they didn't want a report, see if they don't
* want -anything-
*/
if (!configout && NULL == profilefile) {
/* save us the annoyance - you have to want -something-! */
fprintf(stderr, "ompi-profiler: no options specified - aborting\n");
exit(1);
}
/* setup the cmd to execute */
if (configout) {
asprintf(&configfile, "profiler.%d", getpid());
} else {
configfile = strdup("/dev/null");
}
if (NULL != profilefile) {
asprintf(&cmd, "mpirun -pernode -mca opal_profile 1 -mca opal_profile_file %s -mca grpcomm basic ompi-probe >& %s",
profilefile, configfile);
} else {
asprintf(&cmd, "mpirun -pernode -mca opal_profile 1 -mca grpcomm basic ompi-probe >& %s",
configfile);
}
if (verbose) {
fprintf(stderr, "ompi-profiler: executing %s\n", cmd);
}
/* execute it */
if (0 > system(cmd)) {
fprintf(stderr, "ompi-profiler: could not execute cmd %s\n", cmd);
free(cmd);
goto CLEANUP;
}
free(cmd);
/* did they want the configuration output? */
if (configout) {
FILE *fp;
struct stat buf;
/* does the file already exist? */
if (0 != stat(configfile, &buf)) {
/* file must not have been created */
fprintf(stderr, "Temporary output file %s could not be found - config report cannot be generated\n", configfile);
goto CLEANUP;
}
/* yes - read the info so we can output it */
fp = fopen(configfile, "r");
if (NULL == fp) {
fprintf(stderr, "Impossible to open the file %s in read mode\n", configfile );
goto CLEANUP;
}
OBJ_CONSTRUCT(&nodes, opal_list_t);
read_file(&nodes, fp);
fclose(fp);
/* setup a list of framework info */
OBJ_CONSTRUCT(&frames, opal_list_t);
len = opal_list_get_size(&nodes);
/* convert the results over to the new list */
while (NULL != (item = opal_list_remove_first(&nodes))) {
node = (orte_profile_node_t*)item;
/* loop through this node's frameworks */
item2 = opal_list_get_first(&node->frameworks);
while (item2 != opal_list_get_end(&node->frameworks)) {
profile = (orte_profile_t*)item2;
/* is this framework already in our list? */
for (itemold = opal_list_get_first(&frames);
itemold != opal_list_get_end(&frames);
itemold = opal_list_get_next(itemold)) {
frame = (orte_profile_t*)itemold;
if (0 == strcmp(profile->framework, frame->framework) &&
0 == strcmp(profile->component, frame->component)) {
/* all matches - increment # matches */
frame->num_nodes++;
goto COMPLETE;
}
}
/* get here if the framework/component is new */
frame = OBJ_NEW(orte_profile_t);
frame->num_nodes++;
frame->framework = strdup(profile->framework);
frame->component = strdup(profile->component);
opal_list_append(&frames, &frame->item);
COMPLETE:
item2 = opal_list_get_next(item2);
}
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
/* output the list */
while (NULL != (itemold = opal_list_remove_first(&frames))) {
frame = (orte_profile_t*)itemold;
if (len == frame->num_nodes) {
fprintf(stderr, "All nodes use framework %s component %s\n", frame->framework, frame->component);
} else {
fprintf(stderr, "%d nodes use framework %s component %s\n", frame->num_nodes, frame->framework, frame->component);
}
OBJ_RELEASE(frame);
}
OBJ_DESTRUCT(&frames);
}
CLEANUP:
/* remove the file */
if (NULL != configfile) {
if (0 != strcmp("/dev/null", configfile)) {
unlink(configfile);
}
free(configfile);
}
return 0;
}
static void read_file(opal_list_t *nodes, FILE *fp)
{
char line[1024];
char *endprefix, *endnodename, *data, *nodename;
char **inputs;
opal_list_item_t *item;
orte_profile_node_t *node;
orte_profile_t *profile;
int len;
memset(line, 0, sizeof(line));
while (NULL != fgets(line, sizeof(line), fp)) {
/* get the length of the line */
len = strlen(line);
/* remove any trailing newline */
if (line[len-1] == '\n') {
line[len-1] = '\0';
}
if ('[' != line[0]) {
/* indicates empty line - ignore it */
continue;
}
if (NULL == (endprefix = strchr(line, ']'))) {
fprintf(stderr, "ompi-profiler: read bad input for ] %s\n", line);
continue;
}
*endprefix = '\0';
/* break the prefix at the colon - we don't need the pid */
if (NULL == (endnodename = strchr(line, ':'))) {
fprintf(stderr, "ompi-profiler: read bad input for : %s\n", line);
continue;
}
*endnodename = '\0';
nodename = &line[1];
/* is this node already in our list */
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
node = (orte_profile_node_t*)item;
if (0 == strcmp(node->name, nodename)) {
/* already present - just add to it */
goto PROCESS;
}
}
/* if we got here, then it wasn't found */
node = OBJ_NEW(orte_profile_node_t);
node->name = strdup(nodename);
opal_list_append(nodes, &node->item);
PROCESS:
/* point to the rest of the data */
data = endprefix;
data += 2; /* get past space */
/* use an opal utility to parse it */
if ((NULL == (inputs = opal_argv_split(data, ':'))) ||
opal_argv_count(inputs) < 2) {
fprintf(stderr, "ompi-profiler: read bad input for second : %s\n", data);
opal_argv_free(inputs);
continue;
}
/* first entry must be the framework - see if we already have it */
for (item = opal_list_get_first(&node->frameworks);
item != opal_list_get_end(&node->frameworks);
item = opal_list_get_next(item)) {
profile = (orte_profile_t*)item;
if (0 == strcmp(inputs[0], profile->framework)) {
/* this will happen if mpirun outputs some of the
* frameworks AND a proc is local to mpirun
*/
goto SKIP;
}
}
profile = OBJ_NEW(orte_profile_t);
profile->framework = strdup(inputs[0]);
/* second entry is component */
profile->component = strdup(inputs[1]);
/* if there is anything more, just save it */
if (NULL != inputs[2]) {
profile->params = opal_argv_join(&inputs[2], ':');
}
opal_list_append(&node->frameworks, &profile->item);
SKIP:
opal_argv_free(inputs);
memset(line, 0, sizeof(line));
}
}

Просмотреть файл

@ -18,6 +18,7 @@
#include <sys/types.h> #include <sys/types.h>
#endif #endif
#include "opal/runtime/opal.h"
#include "opal/class/opal_list.h" #include "opal/class/opal_list.h"
#include "opal/util/strncpy.h" #include "opal/util/strncpy.h"
#include "opal/util/argv.h" #include "opal/util/argv.h"
@ -120,7 +121,10 @@ int mca_base_select(const char *type_name, int output_id,
opal_output_verbose(5, output_id, opal_output_verbose(5, output_id,
"mca:base:select:(%5s) Selected component [%s]", "mca:base:select:(%5s) Selected component [%s]",
type_name, (*best_component)->mca_component_name); type_name, (*best_component)->mca_component_name);
if (opal_profile) {
opal_output(0, "%s:%s", type_name, (*best_component)->mca_component_name);
}
/* /*
* Close the non-selected components * Close the non-selected components
*/ */

Просмотреть файл

@ -24,13 +24,15 @@
#include "opal_config.h" #include "opal_config.h"
#if defined(c_plusplus) || defined(__cplusplus) BEGIN_C_DECLS
extern "C" {
#endif
/** version string of opal */ /** version string of opal */
OPAL_DECLSPEC extern const char opal_version_string[]; OPAL_DECLSPEC extern const char opal_version_string[];
/* profile flag */
OPAL_DECLSPEC extern bool opal_profile;
OPAL_DECLSPEC extern char *opal_profile_file;
/** /**
* Initialize the OPAL layer, including the MCA system. * Initialize the OPAL layer, including the MCA system.
* *
@ -80,8 +82,6 @@ OPAL_DECLSPEC int opal_finalize_util(void);
*/ */
OPAL_DECLSPEC int opal_register_params(void); OPAL_DECLSPEC int opal_register_params(void);
#if defined(c_plusplus) || defined(__cplusplus) END_C_DECLS
}
#endif
#endif #endif

Просмотреть файл

@ -61,6 +61,8 @@ const char opal_version_string[] = OPAL_IDENT_STRING;
int opal_initialized = 0; int opal_initialized = 0;
int opal_util_initialized = 0; int opal_util_initialized = 0;
bool opal_profile = false;
char *opal_profile_file = NULL;
static const char * static const char *
opal_err2str(int errnum) opal_err2str(int errnum)

Просмотреть файл

@ -73,6 +73,19 @@ int opal_register_params(void)
free(string); free(string);
} }
{
int j;
mca_base_param_reg_int_name("opal", "profile",
"Set to non-zero to profile component selections",
false, false, (int)false, &j);
opal_profile = OPAL_INT_TO_BOOL(j);
mca_base_param_reg_string_name("opal", "profile_file",
"Name of the file containing the cluster configuration information",
false, false, NULL, &opal_profile_file);
}
#if OMPI_ENABLE_DEBUG #if OMPI_ENABLE_DEBUG

Просмотреть файл

@ -28,5 +28,7 @@ if !ORTE_DISABLE_FULL_SUPPORT
libmca_grpcomm_la_SOURCES += \ libmca_grpcomm_la_SOURCES += \
base/grpcomm_base_allgather.c \ base/grpcomm_base_allgather.c \
base/grpcomm_base_modex.c base/grpcomm_base_modex.c \
base/grpcomm_base_receive.c
endif endif

Просмотреть файл

@ -76,6 +76,11 @@ ORTE_DECLSPEC void orte_grpcomm_base_modex_finalize(void);
ORTE_DECLSPEC int orte_grpcomm_base_pack_modex_entries(opal_buffer_t *buf, bool *modex_reqd); ORTE_DECLSPEC int orte_grpcomm_base_pack_modex_entries(opal_buffer_t *buf, bool *modex_reqd);
ORTE_DECLSPEC int orte_grpcomm_base_update_modex_entries(orte_process_name_t *proc_name, ORTE_DECLSPEC int orte_grpcomm_base_update_modex_entries(orte_process_name_t *proc_name,
opal_buffer_t *rbuf); opal_buffer_t *rbuf);
ORTE_DECLSPEC int orte_grpcomm_base_load_modex_data(orte_process_name_t *proc, char *attribute_name,
void *data, int num_bytes);
ORTE_DECLSPEC int orte_grpcomm_base_comm_start(void);
ORTE_DECLSPEC int orte_grpcomm_base_comm_stop(void);
#endif /* ORTE_DISABLE_FULL_SUPPORT */ #endif /* ORTE_DISABLE_FULL_SUPPORT */

Просмотреть файл

@ -330,6 +330,11 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
modex_proc_data_t *proc_data; modex_proc_data_t *proc_data;
modex_attr_data_t *attr_data; modex_attr_data_t *attr_data;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:get_proc_attr: searching for attr %s on proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attribute_name,
ORTE_NAME_PRINT(&proc)));
proc_data = modex_lookup_orte_proc(&proc); proc_data = modex_lookup_orte_proc(&proc);
if (NULL == proc_data) { if (NULL == proc_data) {
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
@ -363,6 +368,11 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
memcpy(copy, attr_data->attr_data, attr_data->attr_data_size); memcpy(copy, attr_data->attr_data, attr_data->attr_data_size);
*val = copy; *val = copy;
*size = attr_data->attr_data_size; *size = attr_data->attr_data_size;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:get_proc_attr: found %d bytes for attr %s on proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)attr_data->attr_data_size,
attribute_name, ORTE_NAME_PRINT(&proc)));
} }
OPAL_THREAD_UNLOCK(&proc_data->modex_lock); OPAL_THREAD_UNLOCK(&proc_data->modex_lock);
@ -451,6 +461,11 @@ int orte_grpcomm_base_update_modex_entries(orte_process_name_t *proc_name,
goto cleanup; goto cleanup;
} }
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:base:update_modex_entries: adding %d entries for proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
ORTE_NAME_PRINT(proc_name)));
/* /*
* Extract the attribute names and values * Extract the attribute names and values
*/ */
@ -507,3 +522,54 @@ cleanup:
OPAL_THREAD_UNLOCK(&proc_data->modex_lock); OPAL_THREAD_UNLOCK(&proc_data->modex_lock);
return rc; return rc;
} }
int orte_grpcomm_base_load_modex_data(orte_process_name_t *proc_name, char *attr_name,
void *data, int num_bytes)
{
modex_proc_data_t *proc_data;
modex_attr_data_t *attr_data;
int rc = ORTE_SUCCESS;
void *bytes;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:base:load_modex_data: loading %ld bytes for attr %s on proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(long)num_bytes, attr_name, ORTE_NAME_PRINT(proc_name)));
/* look up the modex data structure */
proc_data = modex_lookup_orte_proc(proc_name);
if (proc_data == NULL) {
/* report the error */
opal_output(0, "grpcomm:base:update_modex: received modex info for unknown proc %s\n",
ORTE_NAME_PRINT(proc_name));
return ORTE_ERR_NOT_FOUND;
}
OPAL_THREAD_LOCK(&proc_data->modex_lock);
/*
* Lookup the corresponding modex structure
*/
if (NULL == (attr_data = modex_lookup_attr_data(proc_data,
attr_name, true))) {
opal_output(0, "grpcomm:base:update_modex: modex_lookup_attr_data failed\n");
rc = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
if (NULL != attr_data->attr_data) {
/* some pre-existing value must be here - release it */
free(attr_data->attr_data);
}
/* create space for the data - this is necessary since the data being
* passed to us may be static or released on the other end
*/
bytes = (void*)malloc(num_bytes);
memcpy(bytes, data, num_bytes);
attr_data->attr_data = bytes;
attr_data->attr_data_size = num_bytes;
proc_data->modex_received_data = true;
cleanup:
OPAL_THREAD_UNLOCK(&proc_data->modex_lock);
return rc;
}

220
orte/mca/grpcomm/base/grpcomm_base_receive.c Обычный файл
Просмотреть файл

@ -0,0 +1,220 @@
/* -*- C -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
*/
/*
* includes
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include <stdio.h>
#include <fcntl.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "opal/class/opal_list.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/runtime/opal.h"
#include "opal/dss/dss.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/routed.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/mca/grpcomm/base/base.h"
static bool recv_issued=false;
static int profile_fd = -1;
static void orte_grpcomm_base_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
int orte_grpcomm_base_comm_start(void)
{
int rc;
if (recv_issued) {
return ORTE_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:base:receive start comm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* open the profile file for writing */
if (NULL == opal_profile_file) {
/* no file specified - we will just ignore any incoming data */
profile_fd = -1;
} else {
profile_fd = open(opal_profile_file, O_CREAT|O_RDWR|O_TRUNC, 0644);
if (profile_fd < 0) {
/* couldn't be opened */
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
return ORTE_ERR_FILE_OPEN_FAILURE;
}
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_GRPCOMM_PROFILE,
ORTE_RML_NON_PERSISTENT,
orte_grpcomm_base_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
recv_issued = true;
return rc;
}
int orte_grpcomm_base_comm_stop(void)
{
if (!recv_issued) {
return ORTE_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:base:receive stop comm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_GRPCOMM_PROFILE);
recv_issued = false;
if (0 <= profile_fd) {
close(profile_fd);
profile_fd = -1;
}
return ORTE_SUCCESS;
}
/* process incoming messages in order of receipt */
static void process_msg(int fd, short event, void *data)
{
orte_message_event_t *mev = (orte_message_event_t*)data;
char *attr, *nodename;
int32_t isize, count;
void *blob;
int32_t len, rc;
/* save the info in the file */
if (0 <= profile_fd) {
/* unpack the node name */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &nodename, &count, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* unpack the attribute name */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &attr, &count, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* unpack the data size */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &isize, &count, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* allocate space and unpack the data itself */
blob = (void*)malloc(isize);
count = isize;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, blob, &count, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:base:receive writing %d bytes of data for node %s, attribute %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
isize, nodename, attr));
len = strlen(nodename);
write(profile_fd, &len, sizeof(len));
write(profile_fd, nodename, len);
len = strlen(attr);
write(profile_fd, &len, sizeof(len));
write(profile_fd, attr, strlen(attr));
write(profile_fd, &isize, sizeof(isize));
write(profile_fd, blob, isize);
}
CLEANUP:
/* release the message */
OBJ_RELEASE(mev);
}
/*
* NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program.
* DO NOT RELEASE THIS BUFFER IN THIS CODE
*/
static void orte_grpcomm_base_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:base:receive got message from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* don't process this right away - we need to get out of the recv before
* we process the message as it may ask us to do something that involves
* more messaging! Instead, setup an event so that the message gets processed
* as soon as we leave the recv.
*
* The macro makes a copy of the buffer, which we release above - the incoming
* buffer, however, is NOT released here, although its payload IS transferred
* to the message buffer for later processing
*/
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_msg);
/* reissue the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_GRPCOMM_PROFILE,
ORTE_RML_NON_PERSISTENT,
orte_grpcomm_base_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
return;
}

Просмотреть файл

@ -25,12 +25,16 @@
#ifdef HAVE_SYS_TIME_H #ifdef HAVE_SYS_TIME_H
#include <sys/time.h> #include <sys/time.h>
#endif /* HAVE_SYS_TIME_H */ #endif /* HAVE_SYS_TIME_H */
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#include <fcntl.h>
#include "opal/threads/condition.h" #include "opal/threads/condition.h"
#include "opal/util/bit_ops.h" #include "opal/util/bit_ops.h"
#include "opal/class/opal_hash_table.h" #include "opal/class/opal_hash_table.h"
#include "opal/dss/dss.h" #include "opal/dss/dss.h"
#include "opal/runtime/opal.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ess/ess.h" #include "orte/mca/ess/ess.h"
@ -56,6 +60,7 @@ static int xcast(orte_jobid_t job,
static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf); static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf);
static int barrier(void); static int barrier(void);
static int modex(opal_list_t *procs); static int modex(opal_list_t *procs);
static int set_proc_attr(const char *attr_name, const void *data, size_t size);
/* Module def */ /* Module def */
orte_grpcomm_base_module_t orte_grpcomm_basic_module = { orte_grpcomm_base_module_t orte_grpcomm_basic_module = {
@ -65,23 +70,41 @@ orte_grpcomm_base_module_t orte_grpcomm_basic_module = {
allgather, allgather,
orte_grpcomm_base_allgather_list, orte_grpcomm_base_allgather_list,
barrier, barrier,
orte_grpcomm_base_set_proc_attr, set_proc_attr,
orte_grpcomm_base_get_proc_attr, orte_grpcomm_base_get_proc_attr,
modex, modex,
orte_grpcomm_base_purge_proc_attrs orte_grpcomm_base_purge_proc_attrs
}; };
static bool profile;
/** /**
* Initialize the module * Initialize the module
*/ */
static int init(void) static int init(void)
{ {
int rc; int rc;
int value;
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_modex_init())) { if (ORTE_SUCCESS != (rc = orte_grpcomm_base_modex_init())) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
/* if we are profiling and I am the HNP, then start the
* profiling receive
*/
mca_base_param_reg_int_name("orte", "grpcomm_recv_on",
"Whether to turn on grpcomm recv",
false, false, (int)false, &value);
profile = OPAL_INT_TO_BOOL(value);
if (profile && orte_process_info.hnp) {
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_comm_start())) {
ORTE_ERROR_LOG(rc);
}
}
return rc; return rc;
} }
@ -91,6 +114,13 @@ static int init(void)
static void finalize(void) static void finalize(void)
{ {
orte_grpcomm_base_modex_finalize(); orte_grpcomm_base_modex_finalize();
/* if we are profiling and I am the HNP, then stop the
* profiling receive
*/
if (profile && orte_process_info.hnp) {
orte_grpcomm_base_comm_stop();
}
} }
/** /**
@ -458,7 +488,7 @@ static int modex(opal_list_t *procs)
orte_std_cntr_t i, num_procs; orte_std_cntr_t i, num_procs;
orte_std_cntr_t cnt; orte_std_cntr_t cnt;
orte_process_name_t proc_name; orte_process_name_t proc_name;
int rc; int rc=ORTE_SUCCESS;
int32_t arch; int32_t arch;
bool modex_reqd = false; bool modex_reqd = false;
@ -466,21 +496,26 @@ static int modex(opal_list_t *procs)
"%s grpcomm:basic: modex entered", "%s grpcomm:basic: modex entered",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* setup the buffer that will actually be sent */ /* if we were given a list of procs to modex with, then this is happening
OBJ_CONSTRUCT(&buf, opal_buffer_t); * as part of a connect/accept operation. In this case, we -must- do the
OBJ_CONSTRUCT(&rbuf, opal_buffer_t); * modex for two reasons:
*
/* put our process name in the buffer so it can be unpacked later */ * (a) the modex could involve procs from different mpiruns. In this case,
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { * there is no way for the two sets of procs to know which node the
ORTE_ERROR_LOG(rc); * other procs are on, so we cannot use the profile_file to determine
goto cleanup; * their contact info
} *
* (b) in a comm_spawn, the parent job does not have a pidmap for the
/* decide if we need to add the architecture to the modex. Check * child job. Thus, it cannot know where the child procs are located,
* first to see if hetero is enabled - if not, then we clearly * and cannot use the profile_file to determine their contact info
* don't need to exchange arch's as they are all identical
*/ */
if (OMPI_ENABLE_HETEROGENEOUS_SUPPORT) { if (NULL != procs || NULL == opal_profile_file || opal_profile) {
modex_reqd = true;
} else if (OMPI_ENABLE_HETEROGENEOUS_SUPPORT) {
/* decide if we need to add the architecture to the modex. Check
* first to see if hetero is enabled - if not, then we clearly
* don't need to exchange arch's as they are all identical
*/
/* Case 1: If different apps in this job were built differently - e.g., some /* Case 1: If different apps in this job were built differently - e.g., some
* are built 32-bit while others are built 64-bit - then we need to modex * are built 32-bit while others are built 64-bit - then we need to modex
* regardless of any other consideration. The user is reqd to tell us via a * regardless of any other consideration. The user is reqd to tell us via a
@ -502,19 +537,27 @@ static int modex(opal_list_t *procs)
} }
if (modex_reqd) { if (modex_reqd) {
/* setup the buffer that will actually be sent */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
OBJ_CONSTRUCT(&rbuf, opal_buffer_t);
/* put our process name in the buffer so it can be unpacked later */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.arch, 1, OPAL_UINT32))) { if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.arch, 1, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
}
/* pack the entries we have received */
/* pack the entries we have received */ if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&buf, &modex_reqd))) {
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&buf, &modex_reqd))) { ORTE_ERROR_LOG(rc);
ORTE_ERROR_LOG(rc); goto cleanup;
goto cleanup; }
}
if (modex_reqd) {
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
"%s grpcomm:basic:modex: executing allgather", "%s grpcomm:basic:modex: executing allgather",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -563,36 +606,47 @@ static int modex(opal_list_t *procs)
goto cleanup; goto cleanup;
} }
if (OMPI_ENABLE_HETEROGENEOUS_SUPPORT) { /* unpack its architecture */
/* are the nodes hetero? */ cnt=1;
if (orte_homogeneous_nodes) { if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &arch, &cnt, OPAL_UINT32))) {
goto unpack_entries; ORTE_ERROR_LOG(rc);
} goto cleanup;
/* unpack its architecture */ }
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&rbuf, &arch, &cnt, OPAL_UINT32))) { /* update the arch in the ESS
ORTE_ERROR_LOG(rc); * RHC: DO NOT UPDATE ARCH IF THE PROC IS NOT IN OUR JOB. THIS IS A TEMPORARY
goto cleanup; * FIX TO COMPENSATE FOR A PROBLEM IN THE CONNECT/ACCEPT CODE WHERE WE EXCHANGE
} * INFO INCLUDING THE ARCH, BUT THEN DO A MODEX THAT ALSO INCLUDES THE ARCH. WE
/* update the arch in the ESS */ * CANNOT UPDATE THE ARCH FOR JOBS OUTSIDE OUR OWN AS THE ESS HAS NO INFO ON
* THOSE PROCS/NODES - AND DOESN'T NEED IT AS THE MPI LAYER HAS ALREADY SET
* ITSELF UP AND DOES NOT NEED ESS SUPPORT FOR PROCS IN THE OTHER JOB
*
* EVENTUALLY, WE WILL SUPPORT THE ESS HAVING INFO ON OTHER JOBS FOR
* FAULT TOLERANCE PURPOSES - BUT NOT RIGHT NOW
*/
if (proc_name.jobid == ORTE_PROC_MY_NAME->jobid) {
if (ORTE_SUCCESS != (rc = orte_ess.update_arch(&proc_name, arch))) { if (ORTE_SUCCESS != (rc = orte_ess.update_arch(&proc_name, arch))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
} }
unpack_entries: OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:basic:modex: adding modex entry for proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc_name)));
/* update the modex database */ /* update the modex database */
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_update_modex_entries(&proc_name, &rbuf))) { if (ORTE_SUCCESS != (rc = orte_grpcomm_base_update_modex_entries(&proc_name, &rbuf))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
} }
cleanup:
OBJ_DESTRUCT(&buf);
OBJ_DESTRUCT(&rbuf);
} }
cleanup:
OBJ_DESTRUCT(&buf);
OBJ_DESTRUCT(&rbuf);
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
"%s grpcomm:basic: modex completed", "%s grpcomm:basic: modex completed",
@ -601,3 +655,170 @@ cleanup:
return rc; return rc;
} }
/* the HNP will -never- execute the following as it is NOT an MPI process */
static int set_proc_attr(const char *attr_name, const void *data, size_t size)
{
struct stat buf;
int rc;
int fd;
int32_t num_bytes;
char *nodename, *attr, *prochost;
char modex_data[8192];
orte_process_name_t name;
orte_vpid_t i;
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
"%s grpcomm:basic:set_proc_attr for attribute %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name));
/* if we are doing a profile, pack this up and send it to the HNP */
if (opal_profile) {
opal_buffer_t buffer;
int32_t isize;
OBJ_CONSTRUCT(&buffer, opal_buffer_t);
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &orte_process_info.nodename, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &attr_name, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
isize = size;
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &isize, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, data, isize, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buffer, ORTE_RML_TAG_GRPCOMM_PROFILE, 0);
cleanup:
OBJ_DESTRUCT(&buffer);
/* let it fall through so that the job doesn't hang! */
return orte_grpcomm_base_set_proc_attr(attr_name, data, size);
}
/* we always have to set our own attributes in case they are needed for
* a connect/accept at some later time
*/
rc = orte_grpcomm_base_set_proc_attr(attr_name, data, size);
/* if we are not doing a profile, then see if the profile file was
* provided. if not, then we are done
*/
if (NULL == opal_profile_file) {
return rc;
}
/* if the file was provided, then we need to check the file to see if
* info for this particular attribute is available there. But first,
* the file must be available
*/
if (0 != stat(opal_profile_file, &buf)) {
orte_show_help("help-grpcomm-basic.txt", "grpcomm-basic:file-not-found", true, opal_profile_file);
return ORTE_ERR_NOT_FOUND;
}
fd = open(opal_profile_file, O_RDONLY);
if (fd < 0) {
orte_show_help("help-grpcomm-basic.txt", "grpcomm-basic:file-cant-open", true, opal_profile_file);
return ORTE_ERR_NOT_FOUND;
}
OPAL_OUTPUT_VERBOSE((10, orte_grpcomm_base_output,
"%s grpcomm:basic:set_proc_attr reading %s file for attr %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), opal_profile_file, attr_name));
/* loop through file until end */
while (0 < read(fd, &num_bytes, sizeof(num_bytes))) {
OPAL_OUTPUT_VERBOSE((20, orte_grpcomm_base_output,
"%s grpcomm:basic:set_proc_attr read %d string length",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_bytes));
/* this is the number of bytes in the nodename */
memset(modex_data, 0, sizeof(modex_data));
if (0 > read(fd, modex_data, num_bytes)) {
opal_output(0, "%s: orte:grpcomm:basic: node name not found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
close(fd);
return ORTE_ERR_NOT_FOUND;
}
/* this is the nodename - save it */
nodename = strdup(modex_data);
OPAL_OUTPUT_VERBOSE((20, orte_grpcomm_base_output,
"%s grpcomm:basic:set_proc_attr got nodename %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename));
/* get the number of bytes in the attribute name */
if (0 > read(fd, &num_bytes, sizeof(num_bytes))) {
opal_output(0, "%s: orte:grpcomm:basic: attribute name size not found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
close(fd);
return ORTE_ERR_NOT_FOUND;
}
/* get the attribute name */
memset(modex_data, 0, sizeof(modex_data));
if (0 > read(fd, modex_data, num_bytes)) {
opal_output(0, "%s: orte:grpcomm:basic: attribute name not found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
close(fd);
free(nodename);
return ORTE_ERR_NOT_FOUND;
}
/* save it */
attr = strdup(modex_data);
OPAL_OUTPUT_VERBOSE((20, orte_grpcomm_base_output,
"%s grpcomm:basic:set_proc_attr got attribute %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr));
/* read the number of bytes in the blob */
if (0 > read(fd, &num_bytes, sizeof(num_bytes))) {
opal_output(0, "%s: orte:grpcomm:basic: data size not found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
close(fd);
free(nodename);
free(attr);
return ORTE_ERR_NOT_FOUND;
}
/* read the bytes so we position ourselves */
if (0 > read(fd, modex_data, num_bytes)) {
opal_output(0, "%s: orte:grpcomm:basic: data not found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
close(fd);
free(nodename);
free(attr);
return ORTE_ERR_NOT_FOUND;
}
/* is this from the calling component? */
if (0 == strcmp(attr, attr_name)) {
/* lookup all procs on the given node */
name.jobid = ORTE_PROC_MY_NAME->jobid;
for (i=0; i < orte_process_info.num_procs; i++) {
name.vpid = i;
/* if this is me, just skip it - I loaded my info above */
if (ORTE_PROC_MY_NAME->vpid == name.vpid) {
continue;
}
prochost = orte_ess.proc_get_hostname(&name);
if (NULL == prochost) {
/* report error - unknown host */
opal_output(0, "%s: orte:grpcomm:basic: host for proc %s not found",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&name));
close(fd);
free(nodename);
free(attr);
return ORTE_ERR_NOT_FOUND;
}
OPAL_OUTPUT_VERBOSE((20, orte_grpcomm_base_output,
"%s grpcomm:basic:set_proc_attr checking node %s against %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename, prochost));
if (0 == strncmp(nodename, prochost, strlen(prochost))) {
/* on this host - load the data into the modex db */
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_load_modex_data(&name, (char*)attr_name, modex_data, num_bytes))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
free(nodename);
free(attr);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -106,6 +106,9 @@ BEGIN_C_DECLS
/* debugger release */ /* debugger release */
#define ORTE_RML_TAG_DEBUGGER_RELEASE 32 #define ORTE_RML_TAG_DEBUGGER_RELEASE 32
/* profile data */
#define ORTE_RML_TAG_GRPCOMM_PROFILE 33
#define ORTE_RML_TAG_MAX 100 #define ORTE_RML_TAG_MAX 100

Просмотреть файл

@ -224,6 +224,13 @@ int orte_daemon(int argc, char *argv[])
*/ */
mca_base_cmd_line_process_args(cmd_line, &environ, &environ); mca_base_cmd_line_process_args(cmd_line, &environ, &environ);
/* make sure that opal_profile is -not- set as we do not care
* what frameworks are opened by the daemons
*/
if (NULL != getenv("OMPI_MCA_opal_profile")) {
putenv("OMPI_MCA_opal_profile=0");
}
/* Ensure that enough of OPAL is setup for us to be able to run */ /* Ensure that enough of OPAL is setup for us to be able to run */
/* /*
* NOTE: (JJH) * NOTE: (JJH)

Просмотреть файл

@ -1,4 +1,4 @@
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits orte_ring spawn_child orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits orte_ring spawn_child orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix orte_barrier
all: $(PROGS) all: $(PROGS)

28
orte/test/system/orte_barrier.c Обычный файл
Просмотреть файл

@ -0,0 +1,28 @@
/* -*- C -*-
*
* $HEADER$
*
* The most basic of applications
*/
#include <stdio.h>
#include "orte/runtime/runtime.h"
#include "orte/mca/grpcomm/grpcomm.h"
int main(int argc, char* argv[])
{
if (ORTE_SUCCESS != orte_init(ORTE_NON_TOOL)) {
fprintf(stderr, "Failed orte_init\n");
exit(1);
}
orte_grpcomm.barrier();
orte_grpcomm.barrier();
if (ORTE_SUCCESS != orte_finalize()) {
fprintf(stderr, "Failed orte_finalize\n");
exit(1);
}
return 0;
}

Просмотреть файл

@ -114,6 +114,7 @@ static char *ompi_server=NULL;
static opal_event_t *abort_exit_event=NULL; static opal_event_t *abort_exit_event=NULL;
static bool forcibly_die = false; static bool forcibly_die = false;
static opal_event_t *timeout_ev=NULL; static opal_event_t *timeout_ev=NULL;
static bool profile_is_set = false;
/* /*
* Globals * Globals
@ -371,6 +372,35 @@ int orterun(int argc, char *argv[])
return rc; return rc;
} }
/*
* Since this process can now handle MCA/GMCA parameters, make sure to
* process them.
*/
mca_base_cmd_line_process_args(&cmd_line, &environ, &environ);
/* make sure that opal_profile is -not- set for us locally as
* we really only want to profile MPI apps. However, if it is
* set, remember it so we can add it to the apps environment later
*/
if (NULL != getenv("OMPI_MCA_opal_profile")) {
putenv("OMPI_MCA_opal_profile=0");
profile_is_set = true;
/* ensure that I know to turn on my profile receive! */
putenv("OMPI_MCA_orte_grpcomm_recv_on=1");
}
/* Ensure that enough of OPAL is setup for us to be able to run */
/*
* NOTE: (JJH)
* We need to allow 'mca_base_cmd_line_process_args()' to process command
* line arguments *before* calling opal_init_util() since the command
* line could contain MCA parameters that affect the way opal_init_util()
* functions. AMCA parameters are one such option normally received on the
* command line that affect the way opal_init_util() behaves.
* It is "safe" to call mca_base_cmd_line_process_args() before
* opal_init_util() since mca_base_cmd_line_process_args() does *not*
* depend upon opal_init_util() functionality.
*/
/* Need to initialize OPAL so that install_dirs are filled in */ /* Need to initialize OPAL so that install_dirs are filled in */
/* /*
* NOTE: (JJH) * NOTE: (JJH)
@ -1628,24 +1658,14 @@ static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
free(param); free(param);
} }
} }
/* if profile was set, add it back in */
if (profile_is_set) {
opal_setenv("OMPI_MCA_opal_profile", "1", true, &app->env);
}
/* add the ompi-server, if provided */ /* add the ompi-server, if provided */
if (NULL != ompi_server) { if (NULL != ompi_server) {
bool found_serv = false; opal_setenv("OMPI_MCA_pubsub_orte_server", ompi_server, true, &app->env);
asprintf(&param, "OMPI_MCA_pubsub_orte_server=%s", ompi_server);
/* this shouldn't exist, but if it does... */
for (i=0; i < opal_argv_count(app->env); i++) {
if (0 == strcmp(param, app->env[i])) {
free(app->env[i]);
app->env[i] = strdup(param);
found_serv = true;
break;
}
}
if (!found_serv) {
opal_argv_append_nosize(&app->env, param); /* add it */
}
free(param);
} }
/* Did the user request to export any environment variables? */ /* Did the user request to export any environment variables? */