Complete the fix of the ORTE DVM. We will now use "prun" instead of "orterun -hnp foo" to execute jobs. This provides the feature of automatic discovery of the orte-dvm so you don't need to manually enter URI's or contact file locations. All IO is forwarded to prun.
Still in the "needs to be done" category: * mapping/ranking/binding options aren't correctly supported * if the DVM encounters some errors (e.g., not enough resources for the job), the resulting error is globally set and impacts any subsequent job submission Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
bffcc3bca0
Коммит
3c914a7a97
@ -47,10 +47,8 @@ install-exec-hook:
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT))
|
||||
if OPAL_WANT_PRUN
|
||||
if WANT_INSTALL_HEADERS
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-dvm$(EXEEXT); $(LN_S) orte-dvm$(EXEEXT) ompi-dvm$(EXEEXT))
|
||||
endif
|
||||
endif
|
||||
|
||||
uninstall-local:
|
||||
rm -f $(DESTDIR)$(bindir)/mpirun$(EXEEXT) \
|
||||
@ -60,9 +58,7 @@ uninstall-local:
|
||||
$(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-server$(EXEEXT)
|
||||
if OPAL_WANT_PRUN
|
||||
if WANT_INSTALL_HEADERS
|
||||
m -f $(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT)
|
||||
endif
|
||||
rm -f $(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT)
|
||||
endif
|
||||
|
||||
endif # OPAL_INSTALL_BINARIES
|
||||
@ -119,11 +115,9 @@ ompi-server.1: $(top_builddir)/orte/tools/orte-server/orte-server.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-server/orte-server.1 ompi-server.1
|
||||
|
||||
if OPAL_WANT_PRUN
|
||||
if WANT_INSTALL_HEADERS
|
||||
ompi-dvm.1: $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 ompi-dvm.1
|
||||
endif
|
||||
endif
|
||||
|
||||
clean-local:
|
||||
rm -f $(man_pages)
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -837,7 +837,7 @@ int opal_dss_pack_value(opal_buffer_t *buffer, const void *src,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "PACK-OPAL-VALUE: UNSUPPORTED TYPE %d", (int)ptr[i]->type);
|
||||
opal_output(0, "PACK-OPAL-VALUE: UNSUPPORTED TYPE %d FOR KEY %s", (int)ptr[i]->type, ptr[i]->key);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
}
|
||||
@ -981,4 +981,3 @@ int opal_dss_pack_status(opal_buffer_t *buffer, const void *src,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -1086,13 +1086,21 @@ int opal_dss_unpack_value(opal_buffer_t *buffer, void *dest,
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case OPAL_PTR:
|
||||
/* just ignore these values */
|
||||
break;
|
||||
case OPAL_NAME:
|
||||
if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &ptr[i]->data.name, &m, OPAL_NAME))) {
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case OPAL_STATUS:
|
||||
if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &ptr[i]->data.status, &m, OPAL_INT))) {
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "PACK-OPAL-VALUE: UNSUPPORTED TYPE");
|
||||
opal_output(0, "UNPACK-OPAL-VALUE: UNSUPPORTED TYPE %d FOR KEY %s", (int)ptr[i]->type, ptr[i]->key);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -84,6 +84,8 @@ static int opal_pmix_base_frame_open(mca_base_open_flag_t flags)
|
||||
rc = mca_base_framework_components_open(&opal_pmix_base_framework, flags);
|
||||
/* ensure the function pointers are NULL */
|
||||
memset(&opal_pmix, 0, sizeof(opal_pmix));
|
||||
/* default to the OPAL event base */
|
||||
opal_pmix_base.evbase = opal_sync_event_base;
|
||||
/* pass across the verbosity */
|
||||
opal_pmix_verbose_output = opal_pmix_base_framework.framework_output;
|
||||
return rc;
|
||||
|
@ -48,16 +48,7 @@ AC_DEFUN([MCA_opal_pmix_ext2x_CONFIG],[
|
||||
[$1
|
||||
# need to set the wrapper flags for static builds
|
||||
pmix_ext2x_WRAPPER_EXTRA_LDFLAGS=$opal_external_pmix_LDFLAGS
|
||||
pmix_ext2x_WRAPPER_EXTRA_LIBS=$opal_external_pmix_LIBS
|
||||
# and the flags for prun
|
||||
OPAL_PMIX_CPPFLAGS="-I$opal_external_pmix_CPPFLAGS"
|
||||
AC_SUBST(OPAL_PMIX_CPPFLAGS)
|
||||
OPAL_PMIX_LDFLAGS=$opal_external_pmix_LDFLAGS
|
||||
AC_SUBST(OPAL_PMIX_LDFLAGS)
|
||||
OPAL_PMIX_LDADD=
|
||||
AC_SUBST(OPAL_PMIX_LDADD)
|
||||
OPAL_PMIX_LIBS=-lpmix
|
||||
AC_SUBST(OPAL_PMIX_LIBS)],
|
||||
pmix_ext2x_WRAPPER_EXTRA_LIBS=$opal_external_pmix_LIBS],
|
||||
[$2])],
|
||||
[$2])
|
||||
|
||||
|
@ -852,6 +852,21 @@ typedef void (*opal_pmix_base_module_query_fn_t)(opal_list_t *queries,
|
||||
typedef void (*opal_pmix_base_log_fn_t)(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* allocation */
|
||||
typedef int (*opal_pmix_base_alloc_fn_t)(opal_pmix_alloc_directive_t directive,
|
||||
opal_list_t *info,
|
||||
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* job control */
|
||||
typedef int (*opal_pmix_base_job_control_fn_t)(opal_list_t *targets,
|
||||
opal_list_t *directives,
|
||||
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* monitoring */
|
||||
typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
|
||||
opal_list_t *directives,
|
||||
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/*
|
||||
* the standard public API data structure
|
||||
*/
|
||||
@ -883,6 +898,9 @@ typedef struct {
|
||||
opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes;
|
||||
opal_pmix_base_module_query_fn_t query;
|
||||
opal_pmix_base_log_fn_t log;
|
||||
opal_pmix_base_alloc_fn_t allocate;
|
||||
opal_pmix_base_job_control_fn_t job_control;
|
||||
opal_pmix_base_process_monitor_fn_t monitor;
|
||||
/* server APIs */
|
||||
opal_pmix_base_module_server_init_fn_t server_init;
|
||||
opal_pmix_base_module_server_finalize_fn_t server_finalize;
|
||||
|
@ -86,16 +86,7 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[
|
||||
opal_pmix_pmix2x_LDFLAGS=
|
||||
opal_pmix_pmix2x_LIBS="$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix/src/libpmix.la"
|
||||
opal_pmix_pmix2x_CPPFLAGS="-I$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix/include -I$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix -I$OPAL_TOP_SRCDIR/$opal_pmix_pmix2x_basedir/pmix/include -I$OPAL_TOP_SRCDIR/$opal_pmix_pmix2x_basedir/pmix"
|
||||
opal_pmix_pmix2x_DEPENDENCIES="$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix/src/libpmix.la"
|
||||
# and the flags for prun
|
||||
OPAL_PMIX_CPPFLAGS="$opal_pmix_pmix2x_CPPFLAGS"
|
||||
AC_SUBST(OPAL_PMIX_CPPFLAGS)
|
||||
OPAL_PMIX_LDADD=$opal_pmix_pmix2x_LIBS
|
||||
AC_SUBST(OPAL_PMIX_LDADD)
|
||||
OPAL_PMIX_LIBS=
|
||||
AC_SUBST(OPAL_PMIX_LIBS)
|
||||
OPAL_PMIX_LDFLAGS=
|
||||
AC_SUBST(OPAL_PMIX_LDFLAGS)])
|
||||
opal_pmix_pmix2x_DEPENDENCIES="$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix/src/libpmix.la"])
|
||||
|
||||
AC_SUBST([opal_pmix_pmix2x_LIBS])
|
||||
AC_SUBST([opal_pmix_pmix2x_CPPFLAGS])
|
||||
|
@ -30,7 +30,7 @@ greek=
|
||||
# command, or with the date (if "git describe" fails) in the form of
|
||||
# "date<date>".
|
||||
|
||||
repo_rev=gitdcf4faf
|
||||
repo_rev=git2389189
|
||||
|
||||
# If tarball_version is not empty, it is used as the version string in
|
||||
# the tarball filename, regardless of all other versions listed in
|
||||
@ -44,7 +44,7 @@ tarball_version=
|
||||
|
||||
# The date when this release was created
|
||||
|
||||
date="Sep 13, 2017"
|
||||
date="Sep 14, 2017"
|
||||
|
||||
# The shared library version of each of PMIx's public libraries.
|
||||
# These versions are maintained in accordance with the "Library
|
||||
|
@ -9,10 +9,12 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -61,7 +63,7 @@
|
||||
# type: string (root path to install shell scripts)
|
||||
%{!?shell_scripts_path: %define shell_scripts_path %{_bindir}}
|
||||
# type: string (base name of the shell scripts)
|
||||
%{!?shell_scripts_basename: %define shell_scripts_basename mpivars}
|
||||
%{!?shell_scripts_basename: %define shell_scripts_basename pmixvars}
|
||||
|
||||
# Define this to 1 if you want this RPM to install a modulefile.
|
||||
# type: bool (0/1)
|
||||
@ -78,17 +80,6 @@
|
||||
# type: string (name of modules RPM)
|
||||
%{!?modules_rpm_name: %define modules_rpm_name environment-modules}
|
||||
|
||||
# Should we use the mpi-selector functionality?
|
||||
# type: bool (0/1)
|
||||
%{!?use_mpi_selector: %define use_mpi_selector 0}
|
||||
# The name of the mpi-selector RPM. Can vary from system to system.
|
||||
# type: string (name of mpi-selector RPM)
|
||||
%{!?mpi_selector_rpm_name: %define mpi_selector_rpm_name mpi-selector}
|
||||
# The location of the mpi-selector executable (can be a relative path
|
||||
# name if "mpi-selector" can be found in the path)
|
||||
# type: string (path to mpi-selector exectuable)
|
||||
%{!?mpi_selector: %define mpi_selector mpi-selector}
|
||||
|
||||
# Should we build a debuginfo RPM or not?
|
||||
# type: bool (0/1)
|
||||
%{!?build_debuginfo_rpm: %define build_debuginfo_rpm 0}
|
||||
@ -100,7 +91,7 @@
|
||||
# Should we use the default "check_files" RPM step (i.e., check for
|
||||
# unpackaged files)? It is discouraged to disable this, but some
|
||||
# installers need it (e.g., older versions of OFED, because they
|
||||
# installed lots of other stuff in the BUILD_ROOT before PMIx/SHMEM).
|
||||
# installed lots of other stuff in the BUILD_ROOT before PMIx).
|
||||
# type: bool (0/1)
|
||||
%{!?use_check_files: %define use_check_files 1}
|
||||
|
||||
@ -125,7 +116,7 @@
|
||||
# type: bool (0/1)
|
||||
%{!?disable_auto_requires: %define disable_auto_requires 0}
|
||||
|
||||
# On some platforms, PMIx/SHMEM just flat-out doesn't work with
|
||||
# On some platforms, PMIx just flat-out doesn't work with
|
||||
# -D_FORTIFY_SOURCE (e.g., some users have reported that there are
|
||||
# problems on ioa64 platforms). In this case, just turn it off
|
||||
# (meaning: this specfile will strip out that flag from the
|
||||
@ -152,7 +143,7 @@
|
||||
%define _includedir /opt/%{name}/%{version}/include
|
||||
%define _mandir /opt/%{name}/%{version}/man
|
||||
# Note that the name "pmix" is hard-coded in
|
||||
# opal/mca/installdirs/config for pkgdatadir; there is currently no
|
||||
# src/mca/installdirs/config for pkgdatadir; there is currently no
|
||||
# easy way to have PMIx change this directory name internally. So we
|
||||
# just hard-code that name here as well (regardless of the value of
|
||||
# %{name} or %{_name}).
|
||||
@ -162,6 +153,8 @@
|
||||
# bets are off. So feel free to install it anywhere in your tree. He
|
||||
# suggests $prefix/doc.
|
||||
%define _defaultdocdir /opt/%{name}/%{version}/doc
|
||||
# Also put the modulefile in /opt.
|
||||
%define modulefile_path /opt/%{name}/%{version}/share/pmixmodulefiles
|
||||
%endif
|
||||
|
||||
%if !%{build_debuginfo_rpm}
|
||||
@ -191,10 +184,6 @@
|
||||
%define optflags ""
|
||||
%endif
|
||||
|
||||
%if %{use_mpi_selector}
|
||||
%define install_shell_scripts 1
|
||||
%endif
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
# Preamble Section
|
||||
@ -212,7 +201,7 @@ Packager: %{?_packager:%{_packager}}%{!?_packager:%{_vendor}}
|
||||
Vendor: %{?_vendorinfo:%{_vendorinfo}}%{!?_vendorinfo:%{_vendor}}
|
||||
Distribution: %{?_distribution:%{_distribution}}%{!?_distribution:%{_vendor}}
|
||||
Prefix: %{_prefix}
|
||||
Provides: mpi
|
||||
Provides: pmix
|
||||
Provides: pmix = %{version}
|
||||
BuildRoot: /var/tmp/%{name}-%{version}-%{release}-root
|
||||
%if %{disable_auto_requires}
|
||||
@ -221,9 +210,6 @@ AutoReq: no
|
||||
%if %{install_modulefile}
|
||||
Requires: %{modules_rpm_name}
|
||||
%endif
|
||||
%if %{use_mpi_selector}
|
||||
Requires: %{mpi_selector_rpm_name}
|
||||
%endif
|
||||
|
||||
%description
|
||||
The Process Management Interface (PMI) has been used for quite some time as a
|
||||
@ -340,9 +326,8 @@ fi
|
||||
|
||||
CFLAGS="%{?cflags:%{cflags}}%{!?cflags:$RPM_OPT_FLAGS}"
|
||||
CXXFLAGS="%{?cxxflags:%{cxxflags}}%{!?cxxflags:$RPM_OPT_FLAGS}"
|
||||
FFLAGS="%{?f77flags:%{f77flags}}%{!?f7flags:$RPM_OPT_FLAGS}"
|
||||
FCFLAGS="%{?fcflags:%{fcflags}}%{!?fcflags:$RPM_OPT_FLAGS}"
|
||||
export CFLAGS CXXFLAGS F77FLAGS FCFLAGS
|
||||
export CFLAGS CXXFLAGS FCFLAGS
|
||||
|
||||
%configure %{configure_options}
|
||||
%{__make} %{?mflags}
|
||||
@ -369,14 +354,14 @@ cat <<EOF >$RPM_BUILD_ROOT/%{modulefile_path}/%{modulefile_subdir}/%{modulefile_
|
||||
#%Module
|
||||
|
||||
# NOTE: This is an automatically-generated file! (generated by the
|
||||
# PMIx/SHMEM RPM). Any changes made here will be lost a) if the RPM is
|
||||
# PMIx RPM). Any changes made here will be lost a) if the RPM is
|
||||
# uninstalled, or b) if the RPM is upgraded or uninstalled.
|
||||
|
||||
proc ModulesHelp { } {
|
||||
puts stderr "This module adds PMIx/SHMEM v%{version} to various paths"
|
||||
puts stderr "This module adds PMIx v%{version} to various paths"
|
||||
}
|
||||
|
||||
module-whatis "Sets up PMIx/SHMEM v%{version} in your enviornment"
|
||||
module-whatis "Sets up PMIx v%{version} in your enviornment"
|
||||
|
||||
prepend-path PATH "%{_prefix}/bin/"
|
||||
prepend-path LD_LIBRARY_PATH %{_libdir}
|
||||
@ -391,7 +376,7 @@ EOF
|
||||
%{__mkdir_p} $RPM_BUILD_ROOT/%{shell_scripts_path}
|
||||
cat <<EOF > $RPM_BUILD_ROOT/%{shell_scripts_path}/%{shell_scripts_basename}.sh
|
||||
# NOTE: This is an automatically-generated file! (generated by the
|
||||
# PMIx/SHMEM RPM). Any changes made here will be lost if the RPM is
|
||||
# PMIx RPM). Any changes made here will be lost if the RPM is
|
||||
# uninstalled or upgraded.
|
||||
|
||||
# PATH
|
||||
@ -412,13 +397,10 @@ if test -z "\`echo \$MANPATH | grep %{_mandir}\`"; then
|
||||
export MANPATH
|
||||
fi
|
||||
|
||||
# MPI_ROOT
|
||||
MPI_ROOT=%{_prefix}
|
||||
export MPI_ROOT
|
||||
EOF
|
||||
cat <<EOF > $RPM_BUILD_ROOT/%{shell_scripts_path}/%{shell_scripts_basename}.csh
|
||||
# NOTE: This is an automatically-generated file! (generated by the
|
||||
# PMIx/SHMEM RPM). Any changes made here will be lost if the RPM is
|
||||
# PMIx RPM). Any changes made here will be lost if the RPM is
|
||||
# uninstalled or upgraded.
|
||||
|
||||
# path
|
||||
@ -444,8 +426,6 @@ else
|
||||
setenv MANPATH %{_mandir}:
|
||||
endif
|
||||
|
||||
# MPI_ROOT
|
||||
setenv MPI_ROOT %{_prefix}
|
||||
EOF
|
||||
%endif
|
||||
# End of shell_scripts if
|
||||
@ -465,30 +445,6 @@ rm -rf $RPM_BUILD_DIR/%{name}-%{version}
|
||||
|
||||
test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
# Post Install Section
|
||||
#
|
||||
#############################################################################
|
||||
%if %{use_mpi_selector}
|
||||
%post
|
||||
%{mpi_selector} \
|
||||
--register %{name}-%{version} \
|
||||
--source-dir %{shell_scripts_path} \
|
||||
--yes
|
||||
%endif
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
# Pre Uninstall Section
|
||||
#
|
||||
#############################################################################
|
||||
%if %{use_mpi_selector}
|
||||
%preun
|
||||
%{mpi_selector} --unregister %{name}-%{version} --yes || \
|
||||
/bin/true > /dev/null 2> /dev/null
|
||||
%endif
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
# Files Section
|
||||
@ -504,13 +460,20 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%defattr(-, root, root, -)
|
||||
%if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0)
|
||||
#%{_bindir}/*
|
||||
%{_includedir}/*
|
||||
%{_libdir}/*
|
||||
%{_datadir}
|
||||
%else
|
||||
%{_prefix}
|
||||
# If the sysconfdir is not under the prefix, then list it explicitly.
|
||||
%if !%{sysconfdir_in_prefix}
|
||||
%{_sysconfdir}
|
||||
%endif
|
||||
# If %{install_in_opt}, then we're installing PMIx to
|
||||
# /opt/pmix/<version>. But be sure to also explicitly mention
|
||||
# If the sysconfdir is not under the prefix, then list it explicitly.
|
||||
#%if !%{sysconfdir_in_prefix}
|
||||
#%{_sysconfdir}/*
|
||||
#%endif
|
||||
# If %{install_in_opt}, then we're instaling PMIx to
|
||||
# /opt/pmix<version>. But be sure to also explicitly mention
|
||||
# /opt/pmix so that it can be removed by RPM when everything under
|
||||
# there is also removed.
|
||||
%if %{install_in_opt}
|
||||
@ -527,14 +490,22 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
|
||||
%endif
|
||||
%doc README INSTALL LICENSE
|
||||
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
# Changelog
|
||||
#
|
||||
#############################################################################
|
||||
%changelog
|
||||
* Fri Jun 19 2015 Ralph H. Castain <rhc@open-mpi.org>
|
||||
- Port to PMIx
|
||||
* Tue Sep 12 2017 Ralph Castain <rhc@open-mpi.org>
|
||||
- Port to pmix
|
||||
|
||||
* Tue Mar 28 2017 Jeff Squyres <jsquyres@cisco.com>
|
||||
- Reverting a decision from a prior changelog entry: if
|
||||
install_in_opt==1, then even put the modulefile under /opt.
|
||||
|
||||
* Thu Nov 12 2015 Gilles Gouaillardet <gilles@rist.or.jp>
|
||||
- Revamp packaging when prefix is /usr
|
||||
|
||||
* Tue Jan 20 2015 Bert Wesarg <bert.wesarg@tu-dresden.de>
|
||||
- Remove VampirTrace wrapper from package.
|
||||
@ -545,7 +516,7 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
|
||||
fields in case %{name} is overridden.
|
||||
|
||||
* Mon Jun 24 2013 Igor Ivanov <Igor.Ivanov@itseez.com>
|
||||
- Add Open SHMEM parallel programming library as part of Open MPI
|
||||
- Add Open parallel programming library as part of PMIx
|
||||
|
||||
* Tue Dec 11 2012 Jeff Squyres <jsquyres@cisco.com>
|
||||
- Re-release 1.6.0-1.6.3 SRPMs (with new SRPM Release numbers) with
|
||||
@ -593,7 +564,7 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
|
||||
leave_build_root kludge nastyness. W00t!
|
||||
|
||||
* Fri Jan 18 2008 Jeff Squyres <jsquyres@cisco.com>
|
||||
- Remove the hard-coded "pmix" name from two Requires statements
|
||||
- Remove the hard-coded "openmpi" name from two Requires statements
|
||||
and use %{name} instead (FWIW, %{_name} caused rpmbuild to barf).
|
||||
|
||||
* Wed Jan 2 2008 Jeff Squyres <jsquyres@cisco.com>
|
||||
@ -683,4 +654,3 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
* Wed Mar 23 2005 Mezzanine <mezzanine@kainx.org>
|
||||
- Specfile auto-generated by Mezzanine
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
|
@ -510,10 +510,14 @@ pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p,
|
||||
memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t));
|
||||
break;
|
||||
case PMIX_PROC:
|
||||
memcpy(&p->data.proc, &src->data.proc, sizeof(pmix_proc_t));
|
||||
PMIX_PROC_CREATE(p->data.proc, 1);
|
||||
if (NULL == p->data.proc) {
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
memcpy(p->data.proc, src->data.proc, sizeof(pmix_proc_t));
|
||||
break;
|
||||
case PMIX_PROC_RANK:
|
||||
memcpy(&p->data.proc, &src->data.rank, sizeof(pmix_rank_t));
|
||||
memcpy(&p->data.rank, &src->data.rank, sizeof(pmix_rank_t));
|
||||
break;
|
||||
case PMIX_BYTE_OBJECT:
|
||||
case PMIX_COMPRESSED_STRING:
|
||||
|
@ -3,7 +3,7 @@
|
||||
# Copyright (c) 2006 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2016 Intel, Inc. All rights reserved
|
||||
# Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2016 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
|
@ -3,7 +3,7 @@
|
||||
# Copyright (c) 2006 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2016 Intel, Inc. All rights reserved
|
||||
# Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2016 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
|
@ -99,6 +99,8 @@ const opal_pmix_base_module_t opal_pmix_pmix2x_module = {
|
||||
.resolve_nodes = pmix2x_resolve_nodes,
|
||||
.query = pmix2x_query,
|
||||
.log = pmix2x_log,
|
||||
.allocate = pmix2x_allocate,
|
||||
.job_control = pmix2x_job_control,
|
||||
/* server APIs */
|
||||
.server_init = pmix2x_server_init,
|
||||
.server_finalize = pmix2x_server_finalize,
|
||||
@ -265,9 +267,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
|
||||
} else {
|
||||
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&cd->pname.jobid, source->nspace))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(cd);
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return;
|
||||
cd->pname.jobid = OPAL_NAME_INVALID->jobid;
|
||||
}
|
||||
cd->pname.vpid = pmix2x_convert_rank(source->rank);
|
||||
}
|
||||
@ -750,7 +750,7 @@ void pmix2x_value_load(pmix_value_t *v,
|
||||
break;
|
||||
case OPAL_STATUS:
|
||||
v->type = PMIX_STATUS;
|
||||
memcpy(&(v->data.status), &kv->data.status, sizeof(pmix_status_t));
|
||||
v->data.status = pmix2x_convert_opalrc(kv->data.status);
|
||||
break;
|
||||
case OPAL_VPID:
|
||||
v->type = PMIX_PROC_RANK;
|
||||
@ -770,7 +770,7 @@ void pmix2x_value_load(pmix_value_t *v,
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
(void)opal_snprintf_jobid(v->data.proc->nspace, PMIX_MAX_NSLEN, kv->data.name.vpid);
|
||||
(void)opal_snprintf_jobid(v->data.proc->nspace, PMIX_MAX_NSLEN, kv->data.name.jobid);
|
||||
}
|
||||
v->data.proc->rank = pmix2x_convert_opalrank(kv->data.name.vpid);
|
||||
break;
|
||||
@ -925,7 +925,7 @@ int pmix2x_value_unload(opal_value_t *kv,
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
kv->type = OPAL_STATUS;
|
||||
memcpy(&kv->data.status, &(v->data.status), sizeof(opal_status_t));
|
||||
kv->data.status = pmix2x_convert_rc(v->data.status);
|
||||
break;
|
||||
case PMIX_PROC_RANK:
|
||||
kv->type = OPAL_VPID;
|
||||
@ -1185,14 +1185,7 @@ static int notify_event(int status,
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
|
||||
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
|
||||
/* little dicey here as we need to convert a status, if
|
||||
* provided, and it will be an int coming down to us */
|
||||
if (0 == strcmp(kv->key, OPAL_PMIX_JOB_TERM_STATUS)) {
|
||||
op->info[n].value.type = PMIX_STATUS;
|
||||
op->info[n].value.data.status = pmix2x_convert_opalrc(kv->data.integer);
|
||||
} else {
|
||||
pmix2x_value_load(&op->info[n].value, kv);
|
||||
}
|
||||
pmix2x_value_load(&op->info[n].value, kv);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
@ -226,6 +226,9 @@ int pmix2x_tool_init(opal_list_t *info)
|
||||
pinfo = NULL;
|
||||
ninfo = 0;
|
||||
}
|
||||
/* we are going to get our name from the server, or we were given it by the tool,
|
||||
* so mark as native launch so we don't convert back/forth */
|
||||
mca_pmix_pmix2x_component.native_launch = true;
|
||||
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
rc = PMIx_tool_init(&my_proc, pinfo, ninfo);
|
||||
@ -245,20 +248,10 @@ int pmix2x_tool_init(opal_list_t *info)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
if (OPAL_JOBID_INVALID == pname.jobid) {
|
||||
/* store our jobid and rank */
|
||||
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
|
||||
/* if we were launched by the OMPI RTE, then
|
||||
* the jobid is in a special format - so get it */
|
||||
mca_pmix_pmix2x_component.native_launch = true;
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
} else {
|
||||
/* we were launched by someone else, so make the
|
||||
* jobid just be the hash of the nspace */
|
||||
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
|
||||
}
|
||||
pname.vpid = pmix2x_convert_rank(my_proc.rank);
|
||||
}
|
||||
/* store our jobid and rank */
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
pname.vpid = pmix2x_convert_rank(my_proc.rank);
|
||||
|
||||
/* insert this into our list of jobids - it will be the
|
||||
* first, and so we'll check it first */
|
||||
job = OBJ_NEW(opal_pmix2x_jobid_trkr_t);
|
||||
@ -1154,6 +1147,9 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid)
|
||||
if (NULL != app->env) {
|
||||
papps[n].env = opal_argv_copy(app->env);
|
||||
}
|
||||
if (NULL != app->cwd) {
|
||||
papps[n].cwd = strdup(app->cwd);
|
||||
}
|
||||
papps[n].maxprocs = app->maxprocs;
|
||||
if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) {
|
||||
PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo);
|
||||
|
@ -93,7 +93,7 @@ static int
|
||||
opal_err2str(int errnum, const char **errmsg)
|
||||
{
|
||||
const char *retval;
|
||||
|
||||
opal_output(0, "OPAL ERR2STR %d", errnum);
|
||||
switch (errnum) {
|
||||
case OPAL_SUCCESS:
|
||||
retval = "Success";
|
||||
|
@ -111,7 +111,7 @@ static int rte_init(void)
|
||||
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_tool_setup";
|
||||
goto fn_fail;
|
||||
|
@ -65,7 +65,7 @@ ORTE_DECLSPEC int orte_ess_base_app_setup(bool db_restrict_local);
|
||||
ORTE_DECLSPEC int orte_ess_base_app_finalize(void);
|
||||
ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report);
|
||||
|
||||
ORTE_DECLSPEC int orte_ess_base_tool_setup(void);
|
||||
ORTE_DECLSPEC int orte_ess_base_tool_setup(uint8_t flags);
|
||||
ORTE_DECLSPEC int orte_ess_base_tool_finalize(void);
|
||||
|
||||
ORTE_DECLSPEC int orte_ess_base_orted_setup(void);
|
||||
|
@ -38,20 +38,18 @@
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
#include "opal/runtime/opal_progress_threads.h"
|
||||
#include "opal/util/arch.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/plm/base/base.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
#include "orte/mca/snapc/base/base.h"
|
||||
#include "orte/mca/sstore/base/base.h"
|
||||
#endif
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/show_help.h"
|
||||
@ -63,13 +61,51 @@
|
||||
#include "orte/mca/ess/base/base.h"
|
||||
|
||||
|
||||
int orte_ess_base_tool_setup(void)
|
||||
static void infocb(int status,
|
||||
opal_list_t *info,
|
||||
void *cbdata,
|
||||
opal_pmix_release_cbfunc_t release_fn,
|
||||
void *release_cbdata)
|
||||
{
|
||||
opal_value_t *kv;
|
||||
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
||||
|
||||
if (OPAL_SUCCESS != status) {
|
||||
ORTE_ERROR_LOG(status);
|
||||
} else {
|
||||
kv = (opal_value_t*)opal_list_get_first(info);
|
||||
if (NULL == kv) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
|
||||
} else {
|
||||
if (0 == strcmp(kv->key, OPAL_PMIX_SERVER_URI)) {
|
||||
orte_process_info.my_hnp_uri = strdup(kv->data.string);
|
||||
} else {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NULL != release_fn) {
|
||||
release_fn(release_cbdata);
|
||||
}
|
||||
OPAL_PMIX_WAKEUP_THREAD(lock);
|
||||
}
|
||||
|
||||
int orte_ess_base_tool_setup(uint8_t flags)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
opal_list_t transports;
|
||||
orte_jobid_t jobid;
|
||||
orte_vpid_t vpid;
|
||||
opal_list_t info;
|
||||
opal_value_t *kv, val;
|
||||
opal_pmix_query_t *q;
|
||||
opal_pmix_lock_t lock;
|
||||
opal_buffer_t *buf;
|
||||
|
||||
/* we need an external progress thread to ensure that things run
|
||||
* async with the PMIx code */
|
||||
orte_event_base = opal_progress_thread_init("tool");
|
||||
|
||||
/* setup the PMIx framework - ensure it skips all non-PMIx components,
|
||||
* but do not override anything we were given */
|
||||
@ -84,7 +120,13 @@ int orte_ess_base_tool_setup(void)
|
||||
error = "opal_pmix_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* set the event base */
|
||||
if (NULL == opal_pmix.tool_init) {
|
||||
/* we no longer support non-pmix tools */
|
||||
error = "opal_pmix.tool_init";
|
||||
ret = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto error;
|
||||
}
|
||||
/* set the event base for the pmix component code */
|
||||
opal_pmix_base_set_evbase(orte_event_base);
|
||||
|
||||
/* we have to define our name here */
|
||||
@ -126,44 +168,68 @@ int orte_ess_base_tool_setup(void)
|
||||
|
||||
/* initialize - PMIx may set our name here if we attach to
|
||||
* a PMIx server */
|
||||
if (NULL != opal_pmix.tool_init) {
|
||||
opal_list_t info;
|
||||
opal_value_t *kv;
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
/* pass our name so the PMIx layer can use it */
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
/* pass our name so the PMIx layer can use it */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_NSPACE);
|
||||
orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid);
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(&info, &kv->super);
|
||||
/* ditto for our rank */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_RANK);
|
||||
kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
kv->type = OPAL_VPID;
|
||||
opal_list_append(&info, &kv->super);
|
||||
if (0 != flags) {
|
||||
/* instruct the PMIx layer on if/how to connect */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_NSPACE);
|
||||
orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid);
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(&info, &kv->super);
|
||||
/* ditto for our rank */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_RANK);
|
||||
kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
kv->type = OPAL_VPID;
|
||||
opal_list_append(&info, &kv->super);
|
||||
/* ORTE tools don't need to connect to a PMIx server as
|
||||
* they will connect via the OOB */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT);
|
||||
if (0x01 == flags) {
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT);
|
||||
} else if (0x02 == flags) {
|
||||
kv->key = strdup(OPAL_PMIX_CONNECT_SYSTEM_FIRST);
|
||||
} else if (0x04 == flags) {
|
||||
kv->key = strdup(OPAL_PMIX_CONNECT_TO_SYSTEM);
|
||||
} else {
|
||||
opal_output(0, "UNKNOWN CONNECTION FLAG %0x", flags);
|
||||
error = "unknown connection flags";
|
||||
ret = ORTE_ERR_BAD_PARAM;
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
OBJ_RELEASE(kv);
|
||||
goto error;
|
||||
}
|
||||
kv->data.flag = true;
|
||||
kv->type = OPAL_BOOL;
|
||||
opal_list_append(&info, &kv->super);
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "opal_pmix.init";
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
|
||||
ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
|
||||
}
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "opal_pmix.init";
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
|
||||
ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
|
||||
|
||||
orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
|
||||
orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||
orte_process_info.super.proc_arch = opal_local_arch;
|
||||
opal_proc_local_set(&orte_process_info.super);
|
||||
|
||||
if (NULL != opal_pmix.query) {
|
||||
/* query the server for its URI so we can get any IO forwarded to us */
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
q = OBJ_NEW(opal_pmix_query_t);
|
||||
opal_argv_append_nosize(&q->keys, OPAL_PMIX_SERVER_URI);
|
||||
opal_list_append(&info, &q->super);
|
||||
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
||||
opal_pmix.query(&info, infocb, &lock);
|
||||
OPAL_PMIX_WAIT_THREAD(&lock);
|
||||
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
}
|
||||
|
||||
/* open and setup the state machine */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -227,12 +293,6 @@ int orte_ess_base_tool_setup(void)
|
||||
orte_mgmt_conduit = orte_rml.open_conduit(&transports);
|
||||
OPAL_LIST_DESTRUCT(&transports);
|
||||
|
||||
/* since I am a tool, then all I really want to do is communicate.
|
||||
* So setup communications and be done - finding the HNP
|
||||
* to which I want to communicate and setting up a route for
|
||||
* that link is my responsibility
|
||||
*/
|
||||
|
||||
/* we -may- need to know the name of the head
|
||||
* of our session directory tree, particularly the
|
||||
* tmp base where any other session directories on
|
||||
@ -248,7 +308,52 @@ int orte_ess_base_tool_setup(void)
|
||||
|
||||
/* setup I/O forwarding system - must come after we init routes */
|
||||
if (NULL != orte_process_info.my_hnp_uri) {
|
||||
/* only do this if we were given an HNP */
|
||||
/* extract the name */
|
||||
if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) {
|
||||
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
|
||||
exit(1);
|
||||
}
|
||||
/* Set the contact info in the RML - this won't actually establish
|
||||
* the connection, but just tells the RML how to reach the HNP
|
||||
* if/when we attempt to send to it
|
||||
*/
|
||||
OBJ_CONSTRUCT(&val, opal_value_t);
|
||||
val.key = OPAL_PMIX_PROC_URI;
|
||||
val.type = OPAL_STRING;
|
||||
val.data.string = orte_process_info.my_hnp_uri;
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_HNP, &val))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
val.key = NULL;
|
||||
val.data.string = NULL;
|
||||
OBJ_DESTRUCT(&val);
|
||||
error = "store HNP URI";
|
||||
goto error;
|
||||
}
|
||||
val.key = NULL;
|
||||
val.data.string = NULL;
|
||||
OBJ_DESTRUCT(&val);
|
||||
/* set the route to be direct */
|
||||
if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
|
||||
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
|
||||
orte_finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* connect to the HNP so we can recv forwarded output */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_HNP,
|
||||
buf, ORTE_RML_TAG_WARMUP_CONNECTION,
|
||||
orte_rml_send_callback, NULL);
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "warmup connection";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* set the target hnp as our lifeline so we will terminate if it exits */
|
||||
orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP);
|
||||
|
||||
/* setup the IOF */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_iof_base_open";
|
||||
@ -259,46 +364,8 @@ int orte_ess_base_tool_setup(void)
|
||||
error = "orte_iof_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* if we were given an HNP, then also setup the PLM in case this
|
||||
* tool wants to request that we spawn something for it */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_plm_base_open";
|
||||
goto error;
|
||||
}
|
||||
/* we don't select the plm framework as we only want the
|
||||
* base proxy functions */
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
/*
|
||||
* Setup the SnapC
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_snapc_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_sstore_base_open";
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_snapc_base_select";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_sstore_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Tools do not need all the OPAL CR stuff */
|
||||
opal_cr_set_enabled(false);
|
||||
#endif
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
@ -314,11 +381,6 @@ int orte_ess_base_tool_finalize(void)
|
||||
{
|
||||
orte_wait_finalize();
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
mca_base_framework_close(&orte_snapc_base_framework);
|
||||
mca_base_framework_close(&orte_sstore_base_framework);
|
||||
#endif
|
||||
|
||||
orte_rml.close_conduit(orte_mgmt_conduit);
|
||||
|
||||
/* if I am a tool, then all I will have done is
|
||||
|
@ -92,7 +92,7 @@ static int rte_init(void)
|
||||
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_tool_setup";
|
||||
goto error;
|
||||
|
@ -87,7 +87,7 @@ static int rte_init(void)
|
||||
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_tool_setup";
|
||||
goto error;
|
||||
|
@ -91,7 +91,7 @@ static int rte_init(void)
|
||||
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_tool_setup";
|
||||
goto error;
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,6 +32,9 @@ int orte_ess_tool_component_query(mca_base_module_t **module, int *priority);
|
||||
typedef struct {
|
||||
orte_ess_base_component_t super;
|
||||
bool async;
|
||||
bool system_server_first;
|
||||
bool system_server_only;
|
||||
bool do_not_connect;
|
||||
} orte_ess_tool_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_ess_tool_component_t mca_ess_tool_component;
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -76,6 +76,30 @@ static int tool_component_register(void)
|
||||
OPAL_INFO_LVL_2,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_ess_tool_component.async);
|
||||
|
||||
mca_ess_tool_component.do_not_connect = false;
|
||||
(void) mca_base_component_var_register (c, "do_not_connect",
|
||||
"Do not connect to a PMIx server",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_2,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_ess_tool_component.do_not_connect);
|
||||
|
||||
mca_ess_tool_component.system_server_first = false;
|
||||
(void) mca_base_component_var_register (c, "system_server_first",
|
||||
"Look for a system PMIx server first",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_2,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_ess_tool_component.system_server_first);
|
||||
|
||||
mca_ess_tool_component.system_server_only = false;
|
||||
(void) mca_base_component_var_register (c, "system_server_only",
|
||||
"Only connect to a system server (and not an mpirun)",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_2,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_ess_tool_component.system_server_only);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -111,4 +135,3 @@ orte_ess_tool_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -63,6 +63,7 @@ static int rte_init(void)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
uint8_t flags;
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
|
||||
@ -79,8 +80,18 @@ static int rte_init(void)
|
||||
progress_thread_running = true;
|
||||
}
|
||||
|
||||
/* setup the tool connection flags */
|
||||
flags = 0;
|
||||
if (mca_ess_tool_component.do_not_connect) {
|
||||
flags = 0x01;
|
||||
} else if (mca_ess_tool_component.system_server_first) {
|
||||
flags = 0x02;
|
||||
} else if (mca_ess_tool_component.system_server_only) {
|
||||
flags = 0x04;
|
||||
}
|
||||
|
||||
/* do the standard tool init */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(flags))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_tool_setup";
|
||||
goto error;
|
||||
|
@ -133,29 +133,29 @@ BEGIN_C_DECLS
|
||||
* behalf of a tool that had the HNP spawn a job. First
|
||||
* argument is the orte_job_t of the spawned job, second
|
||||
* is a pointer to the name of the requesting tool */
|
||||
#define ORTE_IOF_PROXY_PULL(a, b) \
|
||||
do { \
|
||||
opal_buffer_t *buf; \
|
||||
orte_iof_tag_t tag; \
|
||||
orte_process_name_t nm; \
|
||||
\
|
||||
buf = OBJ_NEW(opal_buffer_t); \
|
||||
\
|
||||
/* setup the tag to pull from HNP */ \
|
||||
tag = ORTE_IOF_STDOUTALL | ORTE_IOF_PULL; \
|
||||
opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG); \
|
||||
/* pack the name of the source we want to pull */ \
|
||||
nm.jobid = (a)->jobid; \
|
||||
nm.vpid = ORTE_VPID_WILDCARD; \
|
||||
opal_dss.pack(buf, &nm, 1, ORTE_NAME); \
|
||||
/* pack the name of the tool */ \
|
||||
opal_dss.pack(buf, (b), 1, ORTE_NAME); \
|
||||
\
|
||||
/* send the buffer to the HNP */ \
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit, \
|
||||
ORTE_PROC_MY_HNP, buf, \
|
||||
ORTE_RML_TAG_IOF_HNP, \
|
||||
orte_rml_send_callback, NULL); \
|
||||
#define ORTE_IOF_PROXY_PULL(a, b) \
|
||||
do { \
|
||||
opal_buffer_t *buf; \
|
||||
orte_iof_tag_t tag; \
|
||||
orte_process_name_t nm; \
|
||||
\
|
||||
buf = OBJ_NEW(opal_buffer_t); \
|
||||
\
|
||||
/* setup the tag to pull from HNP */ \
|
||||
tag = ORTE_IOF_STDOUTALL | ORTE_IOF_PULL | ORTE_IOF_EXCLUSIVE; \
|
||||
opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG); \
|
||||
/* pack the name of the source we want to pull */ \
|
||||
nm.jobid = (a)->jobid; \
|
||||
nm.vpid = ORTE_VPID_WILDCARD; \
|
||||
opal_dss.pack(buf, &nm, 1, ORTE_NAME); \
|
||||
/* pack the name of the tool */ \
|
||||
opal_dss.pack(buf, (b), 1, ORTE_NAME); \
|
||||
\
|
||||
/* send the buffer to the HNP */ \
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit, \
|
||||
ORTE_PROC_MY_HNP, buf, \
|
||||
ORTE_RML_TAG_IOF_HNP, \
|
||||
orte_rml_send_callback, NULL); \
|
||||
} while(0);
|
||||
|
||||
/* Initialize the selected module */
|
||||
|
@ -387,6 +387,7 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
|
||||
orte_vpid_t *vptr;
|
||||
int i, rc;
|
||||
char *serial_number;
|
||||
orte_process_name_t requestor, *rptr;
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(caddy);
|
||||
|
||||
@ -425,7 +426,12 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
|
||||
* indicating that request */
|
||||
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FWDIO_TO_TOOL, NULL, OPAL_BOOL)) {
|
||||
/* send a message to our IOF containing the requested pull */
|
||||
ORTE_IOF_PROXY_PULL(jdata, &jdata->originator);
|
||||
rptr = &requestor;
|
||||
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&rptr, OPAL_NAME)) {
|
||||
ORTE_IOF_PROXY_PULL(jdata, rptr);
|
||||
} else {
|
||||
ORTE_IOF_PROXY_PULL(jdata, &jdata->originator);
|
||||
}
|
||||
/* the tool will PUSH its stdin, so nothing we need to do here
|
||||
* about stdin */
|
||||
}
|
||||
|
@ -240,6 +240,9 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
|
||||
} else if (0 == strcmp(info->key, OPAL_PMIX_REQUESTOR_IS_TOOL)) {
|
||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_DVM_JOB,
|
||||
ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
|
||||
/* request that IO be forwarded to the requesting tool */
|
||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_FWDIO_TO_TOOL,
|
||||
ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
|
||||
} else if (0 == strcmp(info->key, OPAL_PMIX_STDIN_TGT)) {
|
||||
if (0 == strcmp(info->data.string, "all")) {
|
||||
jdata->stdin_target = ORTE_VPID_WILDCARD;
|
||||
|
@ -676,6 +676,13 @@ static void _query(int sd, short args, void *cbdata)
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
#endif
|
||||
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_SERVER_URI)) {
|
||||
/* they want our URI */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_SERVER_URI);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = strdup(orte_process_info.my_hnp_uri);
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -53,9 +53,7 @@ DIST_SUBDIRS += \
|
||||
tools/prun
|
||||
|
||||
if OPAL_WANT_PRUN
|
||||
if WANT_INSTALL_HEADERS
|
||||
SUBDIRS += \
|
||||
tools/prun \
|
||||
tools/orte-dvm
|
||||
endif
|
||||
endif
|
||||
|
@ -502,6 +502,14 @@ static void notify_requestor(int sd, short args, void *cbdata)
|
||||
|
||||
if (notify) {
|
||||
info = OBJ_NEW(opal_list_t);
|
||||
/* ensure this only goes to the job terminated event handler */
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->key = strdup(OPAL_PMIX_EVENT_NON_DEFAULT);
|
||||
val->type = OPAL_BOOL;
|
||||
val->data.flag = true;
|
||||
opal_list_append(info, &val->super);
|
||||
/* tell the server not to cache the event as subsequent jobs
|
||||
* do not need to know about it */
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->key = strdup(OPAL_PMIX_EVENT_DO_NOT_CACHE);
|
||||
val->type = OPAL_BOOL;
|
||||
@ -510,15 +518,20 @@ static void notify_requestor(int sd, short args, void *cbdata)
|
||||
/* provide the status */
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->key = strdup(OPAL_PMIX_JOB_TERM_STATUS);
|
||||
val->type = OPAL_INT;
|
||||
val->data.integer = ret;
|
||||
val->type = OPAL_STATUS;
|
||||
val->data.status = ret;
|
||||
opal_list_append(info, &val->super);
|
||||
/* if there was a problem, we need to send the requestor more info about what happened */
|
||||
if (0 < ret) {
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->key = strdup(OPAL_PMIX_PROCID);
|
||||
val->type = OPAL_NAME;
|
||||
val->data.name = pptr->name;
|
||||
val->data.name.jobid = jdata->jobid;
|
||||
if (NULL != pptr) {
|
||||
val->data.name.vpid = pptr->name.vpid;
|
||||
} else {
|
||||
val->data.name.vpid = ORTE_VPID_WILDCARD;
|
||||
}
|
||||
opal_list_append(info, &val->super);
|
||||
}
|
||||
opal_pmix.notify_event(OPAL_ERR_JOB_TERMINATED, NULL,
|
||||
|
@ -26,10 +26,7 @@
|
||||
# post-processed forms of the CFLAGS in the library targets down
|
||||
# below.
|
||||
|
||||
AM_CPPFLAGS = $(OPAL_PMIX_CPPFLAGS)
|
||||
|
||||
CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS)
|
||||
AM_LDFLAGS = $(OPAL_PMIX_LDFLAGS)
|
||||
|
||||
include $(top_srcdir)/Makefile.ompi-rules
|
||||
|
||||
@ -56,10 +53,7 @@ prun_SOURCES = \
|
||||
|
||||
prun_LDADD = \
|
||||
$(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \
|
||||
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \
|
||||
$(OPAL_PMIX_LDADD)
|
||||
|
||||
prun_LIBS = $(OPAL_PMIX_LIBS)
|
||||
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
|
||||
|
||||
distclean-local:
|
||||
rm -f $(man_pages)
|
||||
|
@ -69,22 +69,23 @@
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/fd.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
#endif
|
||||
|
||||
#include "opal/version.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/runtime/opal_info_support.h"
|
||||
#include "opal/runtime/opal_progress_threads.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/dss/dss.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
|
||||
/* ensure I can behave like a daemon */
|
||||
#include "prun.h"
|
||||
#include <include/pmix.h>
|
||||
#include <include/pmix_tool.h>
|
||||
|
||||
/**
|
||||
* Global struct for caching orte command line options.
|
||||
@ -143,7 +144,7 @@ typedef struct orte_cmd_options_t orte_cmd_options_t;
|
||||
static orte_cmd_options_t orte_cmd_options = {0};
|
||||
static opal_cmd_line_t *orte_cmd_line = NULL;
|
||||
static opal_list_t job_info;
|
||||
static opal_pmix_lock_t globallock;
|
||||
static volatile bool active = false;
|
||||
|
||||
static int create_app(int argc, char* argv[],
|
||||
opal_list_t *jdata,
|
||||
@ -476,10 +477,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
};
|
||||
|
||||
|
||||
static void infocb(pmix_status_t status,
|
||||
pmix_info_t *info, size_t ninfo,
|
||||
static void infocb(int status,
|
||||
opal_list_t *info,
|
||||
void *cbdata,
|
||||
pmix_release_cbfunc_t release_fn,
|
||||
opal_pmix_release_cbfunc_t release_fn,
|
||||
void *release_cbdata)
|
||||
{
|
||||
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
||||
@ -491,35 +492,42 @@ static void infocb(pmix_status_t status,
|
||||
OPAL_PMIX_WAKEUP_THREAD(lock);
|
||||
}
|
||||
|
||||
static void regcbfunc(pmix_status_t status, size_t ref, void *cbdata)
|
||||
static void regcbfunc(int status, size_t ref, void *cbdata)
|
||||
{
|
||||
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
||||
OPAL_ACQUIRE_OBJECT(lock);
|
||||
OPAL_PMIX_WAKEUP_THREAD(lock);
|
||||
}
|
||||
|
||||
static void evhandler(size_t evhdlr_registration_id,
|
||||
pmix_status_t status,
|
||||
const pmix_proc_t *source,
|
||||
pmix_info_t info[], size_t ninfo,
|
||||
pmix_info_t *results, size_t nresults,
|
||||
pmix_event_notification_cbfunc_fn_t cbfunc,
|
||||
static void release(int sd, short args, void *cbdata)
|
||||
{
|
||||
active = false;
|
||||
}
|
||||
|
||||
static bool fired = false;
|
||||
static void evhandler(int status,
|
||||
const opal_process_name_t *source,
|
||||
opal_list_t *info, opal_list_t *results,
|
||||
opal_pmix_notification_complete_fn_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
size_t n;
|
||||
opal_value_t *val;
|
||||
|
||||
if (NULL != info) {
|
||||
for (n=0; n < ninfo; n++) {
|
||||
if (0 == strncmp(info[n].key, PMIX_JOB_TERM_STATUS, PMIX_MAX_KEYLEN)) {
|
||||
opal_output(0, "JOB COMPLETED WITH STATUS %s", PMIx_Error_string(info[n].value.data.status));
|
||||
OPAL_LIST_FOREACH(val, info, opal_value_t) {
|
||||
if (0 == strcmp(val->key, OPAL_PMIX_JOB_TERM_STATUS)) {
|
||||
opal_output(0, "JOB COMPLETED WITH STATUS %d",
|
||||
val->data.integer);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata);
|
||||
cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata);
|
||||
}
|
||||
if (!fired) {
|
||||
fired = true;
|
||||
ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, ORTE_PROC_STATE_TERMINATED);
|
||||
}
|
||||
OPAL_ACQUIRE_OBJECT(&globallock);
|
||||
OPAL_PMIX_WAKEUP_THREAD(&globallock);
|
||||
}
|
||||
|
||||
|
||||
@ -530,14 +538,9 @@ int prun(int argc, char *argv[])
|
||||
opal_pmix_lock_t lock;
|
||||
opal_list_t apps;
|
||||
opal_value_t *val;
|
||||
opal_pmix_app_t *app;
|
||||
pmix_status_t code;
|
||||
char nspace[PMIX_MAX_NSLEN+1];
|
||||
pmix_info_t info;
|
||||
pmix_proc_t myproc;
|
||||
size_t asz, jsz;
|
||||
pmix_app_t *papps = NULL;
|
||||
pmix_info_t *pinfo = NULL;
|
||||
opal_list_t info;
|
||||
opal_jobid_t jobid;
|
||||
struct timespec tp = {0, 100000};
|
||||
|
||||
/* init the globals */
|
||||
memset(&orte_cmd_options, 0, sizeof(orte_cmd_options));
|
||||
@ -644,106 +647,85 @@ int prun(int argc, char *argv[])
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* use the system connection first, if available */
|
||||
PMIX_INFO_LOAD(&info, OPAL_PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL);
|
||||
/* init as a tool */
|
||||
if (OPAL_SUCCESS != PMIx_tool_init(&myproc, &info, 1)) {
|
||||
fprintf(stderr, "Unable to init as tool\n");
|
||||
exit(1);
|
||||
/* tell the ess/tool component that we want to connect to a system-level
|
||||
* PMIx server */
|
||||
opal_setenv("OMPI_MCA_ess_tool_system_server_only", "1", true, &environ);
|
||||
|
||||
/* now initialize ORTE */
|
||||
if (OPAL_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
PMIX_INFO_DESTRUCT(&info);
|
||||
|
||||
/* if the user just wants us to terminate a DVM, then do so */
|
||||
if (orte_cmd_options.terminate_dvm) {
|
||||
PMIX_INFO_LOAD(&info, OPAL_PMIX_JOB_CTRL_TERMINATE, NULL, PMIX_BOOL);
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->key = strdup(OPAL_PMIX_JOB_CTRL_TERMINATE);
|
||||
val->type = OPAL_BOOL;
|
||||
val->data.flag = true;
|
||||
opal_list_append(&info, &val->super);
|
||||
|
||||
fprintf(stderr, "TERMINATING DVM...");
|
||||
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
||||
rc = PMIx_Job_control_nb(NULL, 0, &info, 1, infocb, (void*)&lock);
|
||||
rc = opal_pmix.job_control(NULL, &info, infocb, (void*)&lock);
|
||||
OPAL_PMIX_WAIT_THREAD(&lock);
|
||||
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
||||
PMIX_INFO_DESTRUCT(&info);
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
fprintf(stderr, "DONE\n");
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
orte_state.add_proc_state(ORTE_PROC_STATE_TERMINATED, release, ORTE_SYS_PRI);
|
||||
|
||||
/* get here if they want to run an application, so let's parse
|
||||
* the cmd line to get it */
|
||||
|
||||
if (OPAL_SUCCESS != parse_locals(&apps, argc, argv)) {
|
||||
opal_output(0, "[%s:%d] SOMETHING WRONG", __FILE__, __LINE__);
|
||||
if (OPAL_SUCCESS != (rc = parse_locals(&apps, argc, argv))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OPAL_LIST_DESTRUCT(&apps);
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
/* bozo check */
|
||||
if (0 == (asz = opal_list_get_size(&apps))) {
|
||||
opal_output(0, "[%s:%d] SOMETHING WRONG", __FILE__, __LINE__);
|
||||
if (0 == opal_list_get_size(&apps)) {
|
||||
opal_output(0, "No application specified!");
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
/* init flag */
|
||||
active = true;
|
||||
|
||||
/* register for job terminations so we get notified when
|
||||
* our job completes */
|
||||
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
||||
code = PMIX_ERR_JOB_TERMINATED;
|
||||
PMIx_Register_event_handler(&code, 1, NULL, 0, evhandler, regcbfunc, &lock);
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->key = strdup("foo");
|
||||
val->type = OPAL_INT;
|
||||
val->data.integer = OPAL_ERR_JOB_TERMINATED;
|
||||
opal_list_append(&info, &val->super);
|
||||
opal_pmix.register_evhandler(&info, NULL, evhandler, regcbfunc, &lock);
|
||||
OPAL_PMIX_WAIT_THREAD(&lock);
|
||||
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
|
||||
/* convert the job info and apps to PMIx arrays */
|
||||
if (0 < (jsz = opal_list_get_size(&job_info))) {
|
||||
PMIX_INFO_CREATE(pinfo, jsz);
|
||||
i=0;
|
||||
OPAL_LIST_FOREACH(val, &job_info, opal_value_t) {
|
||||
(void)strncpy(pinfo[i].key, val->key, PMIX_MAX_KEYLEN);
|
||||
/* we only have bool and string types here */
|
||||
if (OPAL_BOOL == val->type) {
|
||||
pinfo[i].value.type = PMIX_BOOL;
|
||||
pinfo[i].value.data.flag = val->data.flag;
|
||||
} else if (OPAL_STRING == val->type) {
|
||||
pinfo[i].value.type = PMIX_STRING;
|
||||
pinfo[i].value.data.string = strdup(val->data.string);
|
||||
} else {
|
||||
opal_output(0, "UNSUPPORTED TYPE %d", val->type);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&job_info);
|
||||
|
||||
PMIX_APP_CREATE(papps, asz);
|
||||
i=0;
|
||||
OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) {
|
||||
papps[i].cmd = strdup(app->cmd);
|
||||
papps[i].argv = opal_argv_copy(app->argv);
|
||||
papps[i].env = opal_argv_copy(app->env);
|
||||
if (NULL != app->cwd) {
|
||||
papps[i].cwd = strdup(app->cwd);
|
||||
}
|
||||
papps[i].maxprocs = app->maxprocs;
|
||||
++i;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&apps);
|
||||
|
||||
OPAL_PMIX_CONSTRUCT_LOCK(&globallock);
|
||||
if (PMIX_SUCCESS != PMIx_Spawn(pinfo, jsz, papps, asz, nspace)) {
|
||||
opal_output(0, "[%s:%d] SOMETHING WRONG", __FILE__, __LINE__);
|
||||
OPAL_PMIX_DESTRUCT_LOCK(&globallock);
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.spawn(&job_info, &apps, &jobid))) {
|
||||
opal_output(0, "Job failed to spawn: %s", opal_strerror(rc));
|
||||
goto DONE;
|
||||
}
|
||||
opal_output(0, "JOB %s EXECUTING", nspace);
|
||||
OPAL_PMIX_WAIT_THREAD(&globallock);
|
||||
OPAL_PMIX_DESTRUCT_LOCK(&globallock);
|
||||
if (NULL != pinfo) {
|
||||
PMIX_INFO_FREE(pinfo, jsz);
|
||||
}
|
||||
if (NULL != papps) {
|
||||
PMIX_APP_FREE(papps, asz);
|
||||
OPAL_LIST_DESTRUCT(&job_info);
|
||||
OPAL_LIST_DESTRUCT(&apps);
|
||||
|
||||
opal_output(0, "JOB %s EXECUTING", OPAL_JOBID_PRINT(jobid));
|
||||
|
||||
while (active) {
|
||||
nanosleep(&tp, NULL);
|
||||
}
|
||||
|
||||
DONE:
|
||||
DONE:
|
||||
/* cleanup and leave */
|
||||
PMIx_tool_finalize();
|
||||
opal_finalize();
|
||||
orte_finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user