1
1

Merge pull request #4206 from rhc54/topic/prun

Add a new launcher "prun" for starting applications against the ORTE DVM.
Этот коммит содержится в:
Ralph Castain 2017-09-13 06:55:30 -07:00 коммит произвёл GitHub
родитель d41069795f bbd83fd4c0
Коммит df4bd83fcb
14 изменённых файлов: 3002 добавлений и 53 удалений

2
.gitignore поставляемый
Просмотреть файл

@ -520,6 +520,8 @@ orte/tools/orted/orted
orte/tools/orted/orted.1
orte/tools/orterun/orterun
orte/tools/orterun/orterun.1
orte/tools/prun/prun
orte/tools/prun/prun.1
orte/tools/wrappers/ortecc-wrapper-data.txt
orte/tools/wrappers/ortec++-wrapper-data.txt
orte/tools/wrappers/ortecc.1

Просмотреть файл

@ -13,7 +13,7 @@
# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
# Copyright (c) 2014-2016 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -240,9 +240,11 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
AC_MSG_ERROR([Cannot continue])])
AC_MSG_CHECKING([if user requested external PMIx support($with_pmix)])
opal_prun_happy=no
AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"],
[AC_MSG_RESULT([no])
opal_external_pmix_happy=no],
opal_external_pmix_happy=no
opal_prun_happy=yes],
[AC_MSG_RESULT([yes])
# check for external pmix lib */
@ -295,7 +297,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=2x
opal_external_pmix_version_found=1],
opal_external_pmix_version_found=1
opal_prun_happy=yes],
[AC_MSG_RESULT([not found])])])
AS_IF([test "$opal_external_pmix_version_found" = "0"],
@ -326,5 +329,6 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
opal_external_pmix_LIBS=-lpmix
opal_external_pmix_happy=yes])
AM_CONDITIONAL([OPAL_WANT_PRUN], [test "$opal_prun_happy" = "yes"])
OPAL_VAR_SCOPE_POP
])

Просмотреть файл

@ -6,7 +6,7 @@
# Corporation. All rights reserved.
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved
# Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -34,5 +34,6 @@ AC_DEFUN([ORTE_CONFIG_FILES],[
orte/tools/orte-info/Makefile
orte/tools/orte-server/Makefile
orte/tools/orte-dvm/Makefile
orte/tools/prun/Makefile
])
])

Просмотреть файл

@ -88,7 +88,7 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[
opal_pmix_pmix2x_CPPFLAGS="-I$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix/include -I$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix -I$OPAL_TOP_SRCDIR/$opal_pmix_pmix2x_basedir/pmix/include -I$OPAL_TOP_SRCDIR/$opal_pmix_pmix2x_basedir/pmix"
opal_pmix_pmix2x_DEPENDENCIES="$OPAL_TOP_BUILDDIR/$opal_pmix_pmix2x_basedir/pmix/src/libpmix.la"
# and the flags for prun
OPAL_PMIX_CPPFLAGS="-I$opal_pmix_pmix2x_CPPFLAGS"
OPAL_PMIX_CPPFLAGS="$opal_pmix_pmix2x_CPPFLAGS"
AC_SUBST(OPAL_PMIX_CPPFLAGS)
OPAL_PMIX_LDADD=$opal_pmix_pmix2x_LIBS
AC_SUBST(OPAL_PMIX_LDADD)

Просмотреть файл

@ -1240,6 +1240,7 @@ static void process_cbfunc(int sd, short args, void *cbdata)
pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cd->cbdata;
pmix_nspace_t *nptr;
pmix_rank_info_t *info;
pmix_peer_t *peer;
int rc;
uint32_t u32;
pmix_info_t ginfo;
@ -1294,10 +1295,24 @@ static void process_cbfunc(int sd, short args, void *cbdata)
/* add this nspace to our pool */
nptr = PMIX_NEW(pmix_nspace_t);
if (NULL == nptr) {
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
nptr->nspace = strdup(cd->proc.nspace);
pmix_list_append(&pmix_server_globals.nspaces, &nptr->super);
/* add this tool rank to the nspace */
info = PMIX_NEW(pmix_rank_info_t);
if (NULL == info) {
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
info->pname.nspace = strdup(cd->proc.nspace);
info->pname.rank = 0;
/* need to include the uid/gid for validation */
@ -1306,7 +1321,14 @@ static void process_cbfunc(int sd, short args, void *cbdata)
pmix_list_append(&nptr->ranks, &info->super);
/* setup a peer object for this tool */
pmix_peer_t *peer = PMIX_NEW(pmix_peer_t);
peer = PMIX_NEW(pmix_peer_t);
if (NULL == peer) {
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* mark the peer proc type */
peer->proc_type = PMIX_PROC_TOOL | pnd->proc_type;
/* add in the nspace pointer */
@ -1382,11 +1404,11 @@ static void process_cbfunc(int sd, short args, void *cbdata)
}
/* start the events for this tool */
pmix_event_assign(&peer->recv_event, pmix_globals.evbase, pnd->sd,
pmix_event_assign(&peer->recv_event, pmix_globals.evbase, peer->sd,
EV_READ|EV_PERSIST, pmix_ptl_base_recv_handler, peer);
pmix_event_add(&peer->recv_event, NULL);
peer->recv_ev_active = true;
pmix_event_assign(&peer->send_event, pmix_globals.evbase, pnd->sd,
pmix_event_assign(&peer->send_event, pmix_globals.evbase, peer->sd,
EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, peer);
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"pmix:server tool %s:%d has connected on socket %d",
@ -1410,6 +1432,10 @@ static void cnct_cbfunc(pmix_status_t status,
/* need to thread-shift this into our context */
cd = PMIX_NEW(pmix_setup_caddy_t);
if (NULL == cd) {
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
return;
}
cd->status = status;
(void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN);
cd->cbdata = cbdata;

Просмотреть файл

@ -256,6 +256,12 @@ OPAL_MODULE_DECLSPEC int pmix2x_disconnectnb(opal_list_t *procs,
OPAL_MODULE_DECLSPEC int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid,
opal_list_t *procs);
OPAL_MODULE_DECLSPEC int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist);
OPAL_MODULE_DECLSPEC int pmix2x_allocate(opal_pmix_alloc_directive_t directive,
opal_list_t *info,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix2x_job_control(opal_list_t *targets,
opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
/**** TOOL FUNCTIONS ****/
OPAL_MODULE_DECLSPEC int pmix2x_tool_init(opal_list_t *info);

Просмотреть файл

@ -670,6 +670,9 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p,
if (NULL != apps[n].env) {
app->env = opal_argv_copy(apps[n].env);
}
if (NULL != apps[n].cwd) {
app->cwd = strdup(apps[n].cwd);
}
app->maxprocs = apps[n].maxprocs;
for (k=0; k < apps[n].ninfo; k++) {
oinfo = OBJ_NEW(opal_value_t);

Просмотреть файл

@ -13,7 +13,7 @@
# Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -50,5 +50,10 @@ DIST_SUBDIRS += \
tools/orte-info \
tools/orte-migrate \
tools/orte-server \
tools/orte-dvm
tools/orte-dvm \
tools/prun
if OPAL_WANT_PRUN
SUBDIRS += \
tools/prun
endif

Просмотреть файл

@ -100,7 +100,6 @@ static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
static struct {
bool help;
bool version;
char *report_uri;
char *prefix;
bool run_as_root;
bool set_sid;
@ -116,10 +115,6 @@ static opal_cmd_line_init_t cmd_line_init[] = {
&myglobals.version, OPAL_CMD_LINE_TYPE_BOOL,
"Print version and exit" },
{ NULL, '\0', "report-uri", "report-uri", 1,
&myglobals.report_uri, OPAL_CMD_LINE_TYPE_STRING,
"Printout URI on stdout [-], stderr [+], or a file [anything else]" },
{ NULL, '\0', "prefix", "prefix", 1,
&myglobals.prefix, OPAL_CMD_LINE_TYPE_STRING,
"Prefix to be used to look for ORTE executables" },
@ -183,7 +178,6 @@ int main(int argc, char *argv[])
char *param, *value;
orte_job_t *jdata=NULL;
orte_app_context_t *app;
char *uri, *ptr;
/* Setup and parse the command line */
memset(&myglobals, 0, sizeof(myglobals));
@ -285,6 +279,11 @@ int main(int argc, char *argv[])
exit(0);
}
/* we should act as system-level PMIx server */
opal_setenv("OMPI_MCA_pmix_system_server", "1", true, &environ);
/* and as session-level PMIx server */
opal_setenv("OMPI_MCA_pmix_session_server", "1", true, &environ);
/* Setup MCA params */
orte_register_params();
@ -324,43 +323,7 @@ int main(int argc, char *argv[])
*/
opal_finalize();
/* check for request to report uri */
orte_oob_base_get_addr(&uri);
if (NULL != myglobals.report_uri) {
FILE *fp;
if (0 == strcmp(myglobals.report_uri, "-")) {
/* if '-', then output to stdout */
printf("VMURI: %s\n", uri);
} else if (0 == strcmp(myglobals.report_uri, "+")) {
/* if '+', output to stderr */
fprintf(stderr, "VMURI: %s\n", uri);
} else if (0 == strncasecmp(myglobals.report_uri, "file:", strlen("file:"))) {
ptr = strchr(myglobals.report_uri, ':');
++ptr;
fp = fopen(ptr, "w");
if (NULL == fp) {
orte_show_help("help-orterun.txt", "orterun:write_file", false,
orte_basename, "pid", ptr);
exit(0);
}
fprintf(fp, "%s\n", uri);
fclose(fp);
} else {
fp = fopen(myglobals.report_uri, "w");
if (NULL == fp) {
orte_show_help("help-orterun.txt", "orterun:write_file", false,
orte_basename, "pid", myglobals.report_uri);
exit(0);
}
fprintf(fp, "%s\n", uri);
fclose(fp);
}
free(uri);
} else {
printf("VMURI: %s\n", uri);
}
/* get the daemon job object - was created by ess/hnp component */
/* get the daemon job object - was created by ess/hnp component */
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
orte_show_help("help-orterun.txt", "bad-job-object", true,
orte_basename);
@ -526,6 +489,8 @@ static void notify_requestor(int sd, short args, void *cbdata)
orte_daemon_cmd_flag_t command;
orte_grpcomm_signature_t *sig;
opal_output(0, "NOTIFY JOB COMPLETE");
/* notify the requestor */
reply = OBJ_NEW(opal_buffer_t);

65
orte/tools/prun/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,65 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
# Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is not quite in the Automake spirit, but we have to do it.
# Since the totalview portion of the library must be built with -g, we
# must eliminate the CFLAGS that are passed in here by default (which
# may already have debugging and/or optimization flags). We use
# post-processed forms of the CFLAGS in the library targets down
# below.
AM_CPPFLAGS = $(OPAL_PMIX_CPPFLAGS)
CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS)
AM_LDFLAGS = $(OPAL_PMIX_LDFLAGS)
include $(top_srcdir)/Makefile.ompi-rules
man_pages = prun.1
EXTRA_DIST = $(man_pages:.1=.1in)
if OPAL_INSTALL_BINARIES
bin_PROGRAMS = prun
nodist_man_MANS = $(man_pages)
# Ensure that the man pages are rebuilt if the opal_config.h file
# changes; a "good enough" way to know if configure was run again (and
# therefore the release date or version may have changed)
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
endif # OPAL_INSTALL_BINARIES
prun_SOURCES = \
main.c \
prun.c \
prun.h
prun_LDADD = \
$(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \
$(OPAL_PMIX_LDADD)
prun_LIBS = $(OPAL_PMIX_LIBS)
distclean-local:
rm -f $(man_pages)

33
orte/tools/prun/main.c Обычный файл
Просмотреть файл

@ -0,0 +1,33 @@
/***************************************************************************
* *
* Open MPI: Open Source High Performance Computing *
* *
* http://www.open-mpi.org/ *
* *
***************************************************************************/
#include "prun.h"
int main(int argc, char *argv[])
{
return prun(argc, argv);
}
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

1597
orte/tools/prun/prun.1in Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

1205
orte/tools/prun/prun.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

37
orte/tools/prun/prun.h Обычный файл
Просмотреть файл

@ -0,0 +1,37 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef PRUN_H
#define PRUN_H
#include "orte_config.h"
BEGIN_C_DECLS
/**
* Main body of prun functionality
*/
int prun(int argc, char *argv[]);
END_C_DECLS
#endif /* ORTERUN_ORTERUN_H */