1
1

Implement the MPI_Info part of MPI-3 Ticket 313. Add an MPI_info object MPI_INFO_GET_ENV that contains a number of run-time related pieces of info. This includes all the required ones in the ticket, plus a few that specifically address recent user questions:

"num_app_ctx" - the number of app_contexts in the job
"first_rank" - the MPI rank of the first process in each app_context
"np" - the number of procs in each app_context

Still need clarification on the MPI_Init portion of the ticket. Specifically, does the ticket call for returning an error is someone calls MPI_Init more than once in a program? We set a flag to tell us that we have been initialized, but currently never check it.

This commit was SVN r27005.
Этот коммит содержится в:
Ralph Castain 2012-08-12 01:28:23 +00:00
родитель ad4cdd1a64
Коммит cb48fd52d4
16 изменённых файлов: 212 добавлений и 11 удалений

Просмотреть файл

@ -642,6 +642,11 @@ enum {
#define MPI_FILE_NULL OMPI_PREDEFINED_GLOBAL(MPI_File, ompi_mpi_file_null)
#endif
/*
* GET_ENV handle
*/
#define MPI_INFO_GET_ENV OMPI_PREDEFINED_GLOBAL(MPI_Info, ompi_mpi_info_get_env)
#define MPI_STATUS_IGNORE ((MPI_Status *) 0)
#define MPI_STATUSES_IGNORE ((MPI_Status *) 0)
@ -885,6 +890,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_win_t ompi_mpi_win_null;
OMPI_DECLSPEC extern struct ompi_predefined_file_t ompi_mpi_file_null;
OMPI_DECLSPEC extern struct ompi_predefined_info_t ompi_mpi_info_null;
OMPI_DECLSPEC extern struct ompi_predefined_info_t ompi_mpi_info_get_env;
OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUS_IGNORE;
OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE;

Просмотреть файл

@ -12,6 +12,8 @@
* All rights reserved.
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -20,6 +22,7 @@
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#ifdef HAVE_STRING_H
#include <string.h>
@ -28,12 +31,20 @@
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <limits.h>
#include <ctype.h>
#ifdef HAVE_SYS_UTSNAME_H
#include <sys/utsname.h>
#endif
#include "opal/util/argv.h"
#include "opal/util/opal_getcwd.h"
#include "opal/util/output.h"
#include "opal/util/strncpy.h"
#include "ompi/constants.h"
#include "ompi/info/info.h"
#include "ompi/runtime/params.h"
@ -43,6 +54,7 @@
*/
ompi_predefined_info_t ompi_mpi_info_null;
ompi_predefined_info_t *ompi_mpi_info_null_addr = &ompi_mpi_info_null;
ompi_predefined_info_t ompi_mpi_info_get_env;
/*
@ -78,10 +90,14 @@ opal_pointer_array_t ompi_info_f_to_c_table;
/*
* This function is called during ompi_init and initializes the
* fortran to C translation table.
* fortran to C translation table. It also fills in the values
* for the MPI_INFO_GET_ENV object
*/
int ompi_info_init(void)
int ompi_info_init(int argc, char **argv)
{
char tmp[MPI_MAX_INFO_KEY];
char *cptr;
/* initialize table */
OBJ_CONSTRUCT(&ompi_info_f_to_c_table, opal_pointer_array_t);
@ -95,6 +111,62 @@ int ompi_info_init(void)
OBJ_CONSTRUCT(&ompi_mpi_info_null.info, ompi_info_t);
ompi_mpi_info_null.info.i_f_to_c_index = 0;
/* Create MPI_INFO_GET_ENV */
OBJ_CONSTRUCT(&ompi_mpi_info_get_env.info, ompi_info_t);
ompi_mpi_info_get_env.info.i_f_to_c_index = 1;
/* fill the get_env info object */
/* command for this app_context */
ompi_info_set(&ompi_mpi_info_get_env.info, "command", argv[0]);
/* space-separated list of argv for this command */
if (1 < argc) {
cptr = opal_argv_join(&argv[1], ' ');
ompi_info_set(&ompi_mpi_info_get_env.info, "argv", cptr);
free(cptr);
} else {
ompi_info_set(&ompi_mpi_info_get_env.info, "argv", "N/A");
}
/* max procs for the entire job */
if (NULL == (cptr = getenv("OMPI_MCA_orte_ess_num_procs"))) {
cptr = "1";
}
ompi_info_set(&ompi_mpi_info_get_env.info, "maxprocs", cptr);
ompi_info_set(&ompi_mpi_info_get_env.info, "soft", "N/A");
/* local host name */
gethostname(tmp, MPI_MAX_INFO_KEY);
ompi_info_set(&ompi_mpi_info_get_env.info, "host", tmp);
/* architecture name */
if (NULL == (cptr = getenv("OMPI_MCA_orte_cpu_type"))) {
#ifdef HAVE_SYS_UTSNAME_H
{
struct utsname sysname;
uname(&sysname);
cptr = sysname.machine;
}
#else
cptr = "unknown";
#endif
}
ompi_info_set(&ompi_mpi_info_get_env.info, "arch", cptr);
/* working directory of this process */
opal_getcwd(tmp, MPI_MAX_INFO_KEY);
ompi_info_set(&ompi_mpi_info_get_env.info, "wdir", tmp);
/* the number of app_contexts in this job */
if (NULL == (cptr = getenv("OMPI_NUM_APP_CTX"))) {
cptr = "1";
}
ompi_info_set(&ompi_mpi_info_get_env.info, "num_app_ctx", cptr);
/* space-separated list of first MPI rank of each app_context */
if (NULL == (cptr = getenv("OMPI_FIRST_RANKS"))) {
cptr = "0";
}
ompi_info_set(&ompi_mpi_info_get_env.info, "first_rank", cptr);
/* space-separated list of num procs for each app_context */
if (NULL == (cptr = getenv("OMPI_APP_CTX_NUM_PROCS"))) {
cptr = "1";
}
ompi_info_set(&ompi_mpi_info_get_env.info, "np", cptr);
/* All done */
return OMPI_SUCCESS;
@ -354,11 +426,15 @@ int ompi_info_finalize(void)
OBJ_DESTRUCT(&ompi_mpi_info_null.info);
opal_pointer_array_set_item(&ompi_info_f_to_c_table, 0, NULL);
/* ditto for MPI_INFO_GET_ENV */
OBJ_DESTRUCT(&ompi_mpi_info_get_env.info);
opal_pointer_array_set_item(&ompi_info_f_to_c_table, 1, NULL);
/* Go through the f2c table and see if anything is left. Free them
all. */
max = opal_pointer_array_get_size(&ompi_info_f_to_c_table);
for (i = 0; i < max; ++i) {
for (i = 2; i < max; ++i) {
info = (ompi_info_t *)opal_pointer_array_get_item(&ompi_info_f_to_c_table, i);
/* If the info was freed but still exists because the user

Просмотреть файл

@ -12,6 +12,8 @@
* All rights reserved.
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -103,6 +105,11 @@ OMPI_DECLSPEC extern ompi_predefined_info_t ompi_mpi_info_null;
*/
OMPI_DECLSPEC extern ompi_predefined_info_t *ompi_mpi_info_null_addr;
/**
* Global instance for MPI_INFO_GET_ENV
*/
OMPI_DECLSPEC extern ompi_predefined_info_t ompi_mpi_info_get_env;
/**
* \internal
* Some declarations needed to use OBJ_NEW and OBJ_DESTRUCT macros
@ -119,7 +126,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_info_entry_t);
* This function is invoked during ompi_mpi_init() and sets up
* MPI_Info handling.
*/
int ompi_info_init(void);
int ompi_info_init(int argc, char **argv);
/**
* This functions is called during ompi_mpi_finalize() and shuts

Просмотреть файл

@ -638,7 +638,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
/* initialize info */
if (OMPI_SUCCESS != (ret = ompi_info_init())) {
if (OMPI_SUCCESS != (ret = ompi_info_init(argc, argv))) {
error = "ompi_info_init() failed";
goto error;
}

Просмотреть файл

@ -24,6 +24,7 @@
#include <string.h>
#endif
#include "opal/util/basename.h"
#include "opal/util/opal_getcwd.h"
#include "opal/constants.h"
@ -38,6 +39,7 @@ int opal_getcwd(char *buf, size_t size)
char cwd[OPAL_PATH_MAX];
char *pwd = getenv("PWD");
struct stat a, b;
char *shortened;
/* Bozo checks (e.g., if someone accidentally passed -1 to the
unsigned "size" param) */
@ -86,6 +88,15 @@ int opal_getcwd(char *buf, size_t size)
give. Ensure the user's buffer is long enough. If it is, copy
in the value and be done. */
if (strlen(pwd) > size) {
/* if it isn't big enough, give them as much
* of the basename as possible
*/
shortened = opal_basename(pwd);
strncpy(buf, shortened, size);
free(shortened);
/* ensure it is null terminated */
buf[size-1] = '\0';
/* indicate that it isn't the full path */
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
}
strncpy(buf, pwd, size);

Просмотреть файл

@ -682,6 +682,8 @@ static int rte_finalize(void)
/* Remove the epipe handler */
opal_event_signal_del(&epipe_handler);
#ifndef __WINDOWS__
/* remove the term handler */
opal_event_del(&term_handler);
/** Remove the USR signal handlers */
opal_event_signal_del(&sigusr1_handler);
opal_event_signal_del(&sigusr2_handler);

Просмотреть файл

@ -83,7 +83,7 @@ static int rte_init(void)
int ret, i, j;
char *error = NULL, *localj;
int32_t jobfam, stepid;
char *envar;
char *envar, *ev1, *ev2;
uint64_t unique_key[2];
char *cs_env, *string_key;
char *pmi_id=NULL;
@ -195,6 +195,13 @@ static int rte_init(void)
goto error;
}
orte_process_info.num_procs = i;
/* push into the environ for pickup in MPI layer for
* MPI-3 required info key
*/
asprintf(&ev1, "OMPI_MCA_orte_ess_num_procs=%d", i);
putenv(ev1);
asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", i);
putenv(ev2);
/* setup transport keys in case the MPI layer needs them -
* we can use the jobfam and stepid as unique keys

Просмотреть файл

@ -1051,6 +1051,9 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
orte_job_t *jobdat;
orte_jobid_t job = caddy->job;
orte_odls_base_fork_local_proc_fn_t fork_local = caddy->fork_local;
char *num_app_ctx = NULL;
char **nps, *npstring;
char **firstranks, *firstrankstring;
/* establish our baseline working directory - we will be potentially
* bouncing around as we execute various apps, but we will always return
@ -1095,9 +1098,18 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
}
#endif
/* Now we preload any files that are needed. This is done on a per
* app context basis
/* MPI-3 requires we provide some further info to the procs,
* so we pass them as envars to avoid introducing further
* ORTE calls in the MPI layer
*/
asprintf(&num_app_ctx, "%lu", (unsigned long)jobdat->num_apps);
/* Now we preload any files that are needed. This is done on a per
* app context basis, so let's take the opportunity to build
* some common envars we need to pass for MPI-3 compatibility
*/
nps = NULL;
firstranks = NULL;
for (j=0; j < jobdat->apps->size; j++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) {
continue;
@ -1109,7 +1121,13 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
/* JJH: Do not fail here, instead try to execute without the preloaded options*/
}
}
opal_argv_append_nosize(&nps, ORTE_VPID_PRINT(app->num_procs));
opal_argv_append_nosize(&firstranks, ORTE_VPID_PRINT(app->first_rank));
}
npstring = opal_argv_join(nps, ' ');
firstrankstring = opal_argv_join(firstranks, ' ');
opal_argv_free(nps);
opal_argv_free(firstranks);
#if OPAL_ENABLE_FT_CR == 1
for (j=0; j < jobdat->apps->size; j++) {
@ -1227,6 +1245,11 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
goto GETOUT;
}
/* add the MPI-3 envars */
opal_setenv("OMPI_NUM_APP_CTX", num_app_ctx, true, &app->env);
opal_setenv("OMPI_FIRST_RANKS", firstrankstring, true, &app->env);
opal_setenv("OMPI_APP_CTX_NUM_PROCS", npstring, true, &app->env);
/* okay, now let's launch all the local procs for this app using the provided fork_local fn */
for (proc_rank = 0, idx=0; idx < orte_local_children->size; idx++) {
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {

Просмотреть файл

@ -142,6 +142,9 @@ static int rank_span(orte_job_t *jdata,
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rank_span: assigning vpid %s", ORTE_VPID_PRINT(vpid));
proc->name.vpid = vpid++;
if (0 == cnt) {
app->first_rank = proc->name.vpid;
}
cnt++;
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
@ -246,6 +249,9 @@ static int rank_fill(orte_job_t *jdata,
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rank_fill: assigning vpid %s", ORTE_VPID_PRINT(vpid));
proc->name.vpid = vpid++;
if (0 == cnt) {
app->first_rank = proc->name.vpid;
}
cnt++;
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
@ -367,6 +373,9 @@ static int rank_by(orte_job_t *jdata,
continue;
}
proc->name.vpid = vpid++;
if (0 == cnt) {
app->first_rank = proc->name.vpid;
}
cnt++;
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));

Просмотреть файл

@ -523,6 +523,13 @@ int orte_dt_pack_app_context(opal_buffer_t *buffer, const void *src,
return rc;
}
/* pack the first rank for this app */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(app_context[i]->first_rank)), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of entries in the argv array */
count = opal_argv_count(app_context[i]->argv);
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) {

Просмотреть файл

@ -537,10 +537,11 @@ int orte_dt_print_app_context(char **output, char *prefix, orte_app_context_t *s
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "\n%sData for app_context: index %lu\tapp: %s\n%s\tNum procs: %lu\tRecovery: %s\tMax Restarts: %d",
asprintf(&tmp, "\n%sData for app_context: index %lu\tapp: %s\n%s\tNum procs: %lu\tFirstRank: %s\tRecovery: %s\tMax Restarts: %d",
pfx2, (unsigned long)src->idx,
(NULL == src->app) ? "NULL" : src->app,
pfx2, (unsigned long)src->num_procs,
ORTE_VPID_PRINT(src->first_rank),
(src->recovery_defined) ? "DEFINED" : "DEFAULT",
src->max_restarts);

Просмотреть файл

@ -574,6 +574,14 @@ int orte_dt_unpack_app_context(opal_buffer_t *buffer, void *dest,
return rc;
}
/* get the first rank for this app */
max_n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &(app_context[i]->first_rank),
&max_n, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* get the number of argv strings that were packed */
max_n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, &max_n, ORTE_STD_CNTR))) {

Просмотреть файл

@ -571,6 +571,7 @@ static void orte_app_context_construct(orte_app_context_t* app_context)
app_context->idx=0;
app_context->app=NULL;
app_context->num_procs=0;
app_context->first_rank = 0;
app_context->argv=NULL;
app_context->env=NULL;
app_context->cwd=NULL;

Просмотреть файл

@ -245,6 +245,8 @@ typedef struct {
char *app;
/** Number of copies of this process that are to be launched */
orte_std_cntr_t num_procs;
/** First MPI rank of this app_context in the job */
orte_vpid_t first_rank;
/** Standard argv-style array, including a final NULL pointer */
char **argv;
/** Standard environ-style array, including a final NULL pointer */

Просмотреть файл

@ -1,4 +1,4 @@
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave_spawn slave cell_spawn reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave_spawn slave cell_spawn reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info
all: $(PROGS)

41
orte/test/mpi/mpi_info.c Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
/* -*- C -*-
*
* $HEADER$
*
* The most basic of MPI applications
*/
#include <stdio.h>
#include "mpi.h"
int main(int argc, char* argv[])
{
char value[MPI_MAX_INFO_KEY];
int flag;
char *keys[] = {
"command",
"argv",
"maxprocs",
"soft",
"host",
"arch",
"wdir",
"num_app_ctx",
"first_rank",
"np"
};
int i, nk;
MPI_Init(&argc, &argv);
nk = sizeof(keys) / sizeof(char*);
for (i=0; i < nk; i++) {
MPI_Info_get(MPI_INFO_GET_ENV, keys[i], MPI_MAX_INFO_KEY,
value, &flag);
fprintf(stderr, "%s: %s\n", keys[i], value);
}
MPI_Finalize();
return 0;
}