1
1

Merge pull request #7535 from rhc54/topic/rte

Cleanup singleton detection and data retrieval
Этот коммит содержится в:
Ralph Castain 2020-03-16 15:49:20 -07:00 коммит произвёл GitHub
родитель 9ffee9859f 6b4fb509e9
Коммит ddc19559af
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 289 добавлений и 265 удалений

Просмотреть файл

@ -13,6 +13,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -87,6 +88,7 @@
#include "ompi/errhandler/errcode.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/runtime/ompi_rte.h"
/*
* Private functions
@ -103,8 +105,6 @@ static int set_f(int keyval, MPI_Fint value);
int ompi_attr_create_predefined(void)
{
int ret;
char *univ_size;
int usize;
/* Create all the keyvals */
@ -138,14 +138,8 @@ int ompi_attr_create_predefined(void)
return ret;
}
/* If the universe size is set, then use it. Otherwise default
* to the size of MPI_COMM_WORLD */
univ_size = getenv("OMPI_UNIVERSE_SIZE");
if (NULL == univ_size || (usize = strtol(univ_size, NULL, 0)) <= 0) {
ret = set_f(MPI_UNIVERSE_SIZE, ompi_comm_size(MPI_COMM_WORLD));
} else {
ret = set_f(MPI_UNIVERSE_SIZE, usize);
}
/* set the universe size */
ret = set_f(MPI_UNIVERSE_SIZE, ompi_process_info.univ_size);
if (OMPI_SUCCESS != ret) {
return ret;
}

Просмотреть файл

@ -19,6 +19,7 @@
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2019 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -46,11 +47,13 @@
#include "opal/util/argv.h"
#include "opal/util/opal_getcwd.h"
#include "opal/util/output.h"
#include "opal/util/printf.h"
#include "opal/util/info.h"
#include "ompi/info/info.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#include "ompi/runtime/ompi_rte.h"
/*
* Global variables
@ -85,8 +88,7 @@ opal_pointer_array_t ompi_info_f_to_c_table = {{0}};
*/
int ompi_mpiinfo_init(void)
{
const char *val;
char *cptr;
char *cptr, **tmp;
/* initialize table */
@ -107,32 +109,33 @@ int ompi_mpiinfo_init(void)
/* fill the env info object */
/* command for this app_context */
if (NULL != (cptr = getenv("OMPI_COMMAND"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "command", cptr);
}
if (NULL != ompi_process_info.command) {
tmp = opal_argv_split(ompi_process_info.command, ' ');
opal_info_set(&ompi_mpi_info_env.info.super, "command", tmp[0]);
/* space-separated list of argv for this command */
if (NULL != (cptr = getenv("OMPI_ARGV"))) {
/* space-separated list of argv for this command */
if (1 < opal_argv_count(tmp)) {
cptr = opal_argv_join(&tmp[1], ' ');
} else {
cptr = strdup(tmp[0]);
}
opal_argv_free(tmp);
opal_info_set(&ompi_mpi_info_env.info.super, "argv", cptr);
free(cptr);
}
/* max procs for the entire job */
if (NULL != (cptr = getenv("OMPI_MCA_num_procs"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "maxprocs", cptr);
/* Open MPI does not support the "soft" option, so set it to maxprocs */
opal_info_set(&ompi_mpi_info_env.info.super, "soft", cptr);
}
opal_asprintf(&cptr, "%u", ompi_process_info.num_procs);
opal_info_set(&ompi_mpi_info_env.info.super, "maxprocs", cptr);
/* Open MPI does not support the "soft" option, so set it to maxprocs */
opal_info_set(&ompi_mpi_info_env.info.super, "soft", cptr);
free(cptr);
/* local host name */
val = opal_gethostname();
opal_info_set(&ompi_mpi_info_env.info.super, "host", val);
opal_info_set(&ompi_mpi_info_env.info.super, "host", ompi_process_info.nodename);
/* architecture name */
if (NULL != (cptr = getenv("OMPI_MCA_cpu_type"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "arch", cptr);
}
#ifdef HAVE_SYS_UTSNAME_H
else {
{
struct utsname sysname;
uname(&sysname);
cptr = sysname.machine;
@ -140,12 +143,9 @@ int ompi_mpiinfo_init(void)
}
#endif
/* initial working dir of this process - only set when
* run by mpiexec as we otherwise have no reliable way
* of determining the value
*/
if (NULL != (cptr = getenv("OMPI_MCA_initial_wdir"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "wdir", cptr);
/* initial working dir of this process, if provided */
if (NULL != ompi_process_info.initial_wdir) {
opal_info_set(&ompi_mpi_info_env.info.super, "wdir", ompi_process_info.initial_wdir);
}
/* provide the REQUESTED thread level - may be different
@ -172,25 +172,25 @@ int ompi_mpiinfo_init(void)
/**** now some OMPI-specific values that other MPIs may not provide ****/
/* the number of app_contexts in this job */
if (NULL != (cptr = getenv("OMPI_NUM_APP_CTX"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_num_apps", cptr);
}
opal_asprintf(&cptr, "%u", ompi_process_info.num_apps);
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_num_apps", cptr);
free(cptr);
/* space-separated list of first MPI rank of each app_context */
if (NULL != (cptr = getenv("OMPI_FIRST_RANKS"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_first_rank", cptr);
if (NULL != ompi_process_info.app_ldrs) {
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_first_rank", ompi_process_info.app_ldrs);
}
/* space-separated list of num procs for each app_context */
if (NULL != (cptr = getenv("OMPI_APP_CTX_NUM_PROCS"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_np", cptr);
if (NULL != ompi_process_info.app_sizes) {
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_np", ompi_process_info.app_sizes);
}
/* location of the directory containing any prepositioned files
* the user may have requested
*/
if (NULL != (cptr = getenv("OMPI_FILE_LOCATION"))) {
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_positioned_file_dir", cptr);
if (NULL != ompi_process_info.proc_session_dir) {
opal_info_set(&ompi_mpi_info_env.info.super, "ompi_positioned_file_dir", ompi_process_info.proc_session_dir);
}
/* All done */
@ -334,9 +334,9 @@ static void info_constructor(ompi_info_t *info)
info);
info->i_freed = false;
/*
/*
* If the user doesn't want us to ever free it, then add an extra
* RETAIN here
* RETAIN here
*/
if (ompi_debug_no_free_handles) {
OBJ_RETAIN(&(info->super));

Просмотреть файл

@ -504,23 +504,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
OMPI_TIMING_NEXT("initialization");
/* if we were not externally started, then we need to setup
* some envars so the MPI_INFO_ENV can get the cmd name
* and argv (but only if the user supplied a non-NULL argv!), and
* the requested thread level
*/
if (NULL == getenv("OMPI_COMMAND") && NULL != argv && NULL != argv[0]) {
opal_setenv("OMPI_COMMAND", argv[0], true, &environ);
}
if (NULL == getenv("OMPI_ARGV") && 1 < argc) {
char *tmp;
tmp = opal_argv_join(&argv[1], ' ');
opal_setenv("OMPI_ARGV", tmp, true, &environ);
free(tmp);
}
/* Setup RTE */
if (OMPI_SUCCESS != (ret = ompi_rte_init(NULL, NULL))) {
if (OMPI_SUCCESS != (ret = ompi_rte_init(&argc, &argv))) {
error = "ompi_mpi_init: ompi_rte_init failed";
goto error;
}

Просмотреть файл

@ -67,10 +67,6 @@ bool ompi_singleton = false;
static pmix_proc_t myprocid;
static bool added_transport_keys = false;
static bool added_num_procs = false;
static bool added_app_ctx = false;
static char* pre_condition_transports_print(uint64_t *unique_key);
static int _setup_top_session_dir(char **sdir);
static int _setup_job_session_dir(char **sdir);
static int _setup_proc_session_dir(char **sdir);
@ -504,13 +500,12 @@ int ompi_rte_init(int *pargc, char ***pargv)
int u32, *u32ptr;
uint16_t u16, *u16ptr;
char **peers=NULL;
char *envar, *ev1, *ev2;
char *ev1;
char *val;
size_t i;
uint64_t unique_key[2];
char *string_key;
pmix_value_t pval;
pmix_status_t rc;
char **tmp;
u32ptr = &u32;
u16ptr = &u16;
@ -537,15 +532,17 @@ int ompi_rte_init(int *pargc, char ***pargv)
/* initialize the selected module */
if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&myprocid, NULL, 0)))) {
/* we cannot run - this could be due to being direct launched
* without the required PMI support being built, so print
* out a help message indicating it */
opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret));
return OPAL_ERR_SILENT;
}
/* if our nspace starts with "singleton", then we are a singleton */
if (0 == strncmp(myprocid.nspace, "singleton", strlen("singleton"))) {
ompi_singleton = true;
/* if we get PMIX_ERR_UNREACH indicating that we cannot reach the
* server, then we assume we are operating as a singleton */
if (PMIX_ERR_UNREACH == ret) {
ompi_singleton = true;
} else {
/* we cannot run - this could be due to being direct launched
* without the required PMI support being built, so print
* out a help message indicating it */
opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret));
return OPAL_ERR_SILENT;
}
}
/* setup the process name fields - also registers the new nspace */
@ -567,23 +564,35 @@ int ompi_rte_init(int *pargc, char ***pargv)
}
opal_process_info.nodename = ev1; // ev1 is an allocated string
}
ompi_process_info.nodename = opal_process_info.nodename;
pmix_process_info.nodename = opal_process_info.nodename;
/* get our local rank from PMI */
/* get our local rank from PMIx */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_RANK,
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
if (PMIX_SUCCESS != rc) {
ret = opal_pmix_convert_status(rc);
error = "local rank";
goto error;
if (ompi_singleton) {
/* just assume 0 */
u16 = 0;
} else {
ret = opal_pmix_convert_status(rc);
error = "local rank";
goto error;
}
}
pmix_process_info.my_local_rank = u16;
/* get our node rank from PMI */
/* get our node rank from PMIx */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NODE_RANK,
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
if (PMIX_SUCCESS != rc) {
u16 = 0;
if (ompi_singleton) {
/* just assume 0 */
u16 = 0;
} else {
ret = opal_pmix_convert_status(rc);
error = "node rank";
goto error;
}
}
pmix_process_info.my_node_rank = u16;
@ -593,27 +602,43 @@ int ompi_rte_init(int *pargc, char ***pargv)
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_JOB_SIZE,
&pname, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS != rc) {
ret = opal_pmix_convert_status(rc);
error = "job size";
goto error;
if (ompi_singleton) {
/* just assume 1 */
u32 = 1;
} else {
ret = opal_pmix_convert_status(rc);
error = "job size";
goto error;
}
}
pmix_process_info.num_procs = u32;
/* push into the environ for pickup in MPI layer for
* MPI-3 required info key
*/
if (NULL == getenv(OPAL_MCA_PREFIX"opal_ess_num_procs")) {
opal_asprintf(&ev1, OPAL_MCA_PREFIX"opal_ess_num_procs=%d", pmix_process_info.num_procs);
putenv(ev1);
added_num_procs = true;
/* get universe size */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_UNIV_SIZE,
&pname, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS != rc) {
if (ompi_singleton) {
/* just assume 1 */
u32 = 1;
} else {
/* default to job size */
u32 = pmix_process_info.num_procs;
}
}
if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
opal_asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", pmix_process_info.num_procs);
putenv(ev2);
added_app_ctx = true;
pmix_process_info.univ_size = u32;
/* get number of app contexts */
pname.jobid = pmix_process_info.my_name.jobid;
pname.vpid = OPAL_VPID_WILDCARD;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_JOB_NUM_APPS,
&pname, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
pmix_process_info.num_apps = u32;
} else {
pmix_process_info.num_apps = 1;
}
/* get our app number from PMI - ok if not found */
/* get our app number from PMIx - ok if not found */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_APPNUM,
&pmix_process_info.my_name, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
@ -622,8 +647,48 @@ int ompi_rte_init(int *pargc, char ***pargv)
pmix_process_info.app_num = 0;
}
/* if more than one app context, get the number of procs and first rank of each */
if (1 == pmix_process_info.num_apps) {
pmix_process_info.app_ldrs = strdup("0");
opal_asprintf(&pmix_process_info.app_sizes, "%u", pmix_process_info.num_procs);
} else {
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_APP_SIZES", &pname, &val, PMIX_STRING);
if (PMIX_SUCCESS != rc) {
/* assume it is just us */
opal_asprintf(&pmix_process_info.app_sizes, "%u", pmix_process_info.num_procs);
} else {
pmix_process_info.app_sizes = val;
}
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_FIRST_RANKS", &pname, &val, PMIX_STRING);
if (PMIX_SUCCESS != rc) {
/* assume it is just us */
pmix_process_info.app_ldrs = strdup("0");
} else {
pmix_process_info.app_ldrs = val;
}
}
/* get our command - defaults to our appnum */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_APP_ARGV,
&pname, (char**)&ev1, PMIX_STRING);
if (PMIX_SUCCESS == rc) {
pmix_process_info.command = ev1; // ev1 is an allocated string
} else if (NULL != pargv) {
tmp = *pargv;
if (NULL != tmp) {
pmix_process_info.command = opal_argv_join(tmp, ' ');
}
}
/* get our reincarnation number */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_REINCARNATION,
&OPAL_PROC_MY_NAME, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
pmix_process_info.reincarnation = u32;
}
/* get the number of local peers - required for wireup of
* shared memory BTL */
* shared memory BTL, defaults to local node */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_SIZE,
&pname, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
@ -634,24 +699,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
goto error;
}
/* setup transport keys in case the MPI layer needs them -
* we can use the jobfam and stepid as unique keys
* because they are unique values assigned by the RM
*/
if (NULL == getenv(OPAL_MCA_PREFIX"opal_precondition_transports")) {
unique_key[0] = (pmix_process_info.my_name.jobid & 0xff00) >> 16;
unique_key[1] = pmix_process_info.my_name.jobid & 0x00ff;
if (NULL == (string_key = pre_condition_transports_print(unique_key))) {
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
return OPAL_ERR_OUT_OF_RESOURCE;
}
opal_asprintf(&envar, OPAL_MCA_PREFIX"opal_precondition_transports=%s", string_key);
putenv(envar);
added_transport_keys = true;
/* cannot free the envar as that messes up our environ */
free(string_key);
}
/* retrieve temp directories info */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_TMPDIR, &pname, &val, PMIX_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
@ -692,14 +739,24 @@ int ompi_rte_init(int *pargc, char ***pargv)
}
}
/* get our initial working directory - defaults to getting the value
* for our app */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_WDIR, &pname, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
pmix_process_info.initial_wdir = val;
val = NULL;
}
/* identify our location */
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
&pmix_process_info.my_name, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
pmix_process_info.cpuset = val;
pmix_proc_is_bound = true;
} else {
pmix_process_info.cpuset = NULL;
pmix_proc_is_bound = false;
}
/* get our local peers */
@ -710,7 +767,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
error = "num local peers";
goto error;
}
/* retrieve the local peers */
/* retrieve the local peers - defaults to local node */
OPAL_MODEX_RECV_VALUE(rc, PMIX_LOCAL_PEERS,
&pname, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
@ -762,11 +819,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
opal_argv_free(peers);
}
/* poor attempt to detect we are bound */
if (NULL != getenv("SLURM_CPU_BIND_TYPE")) {
pmix_proc_is_bound = true;
}
/* set the remaining opal_process_info fields. Note that
* the OPAL layer will have initialized these to NULL, and
* anyone between us would not have strdup'd the string, so
@ -816,19 +868,6 @@ static bool check_file(const char *root, const char *path)
int ompi_rte_finalize(void)
{
/* remove the envars that we pushed into environ
* so we leave that structure intact
*/
if (added_transport_keys) {
unsetenv(OPAL_MCA_PREFIX"opal_precondition_transports");
}
if (added_num_procs) {
unsetenv(OPAL_MCA_PREFIX"opal_ess_num_procs");
}
if (added_app_ctx) {
unsetenv("OMPI_APP_CTX_NUM_PROCS");
}
/* shutdown pmix */
PMIx_Finalize(NULL, 0);
@ -837,10 +876,43 @@ int ompi_rte_finalize(void)
opal_os_dirpath_destroy(pmix_process_info.job_session_dir,
false, check_file);
free(pmix_process_info.job_session_dir);
pmix_process_info.job_session_dir = NULL;
}
free (pmix_process_info.cpuset);
pmix_process_info.cpuset = NULL;
if (NULL != pmix_process_info.top_session_dir) {
free(pmix_process_info.top_session_dir);
pmix_process_info.top_session_dir = NULL;
}
if (NULL != pmix_process_info.proc_session_dir) {
free(pmix_process_info.proc_session_dir);
pmix_process_info.proc_session_dir = NULL;
}
if (NULL != pmix_process_info.app_sizes) {
free(pmix_process_info.app_sizes);
pmix_process_info.app_sizes = NULL;
}
if (NULL != pmix_process_info.app_ldrs) {
free(pmix_process_info.app_ldrs);
pmix_process_info.app_ldrs = NULL;
}
if (NULL != pmix_process_info.cpuset) {
free(pmix_process_info.cpuset);
pmix_process_info.cpuset = NULL;
}
if (NULL != pmix_process_info.command) {
free(pmix_process_info.command);
pmix_process_info.command = NULL;
}
if (NULL != pmix_process_info.initial_wdir) {
free(pmix_process_info.initial_wdir);
pmix_process_info.initial_wdir = NULL;
}
/* cleanup our internal nspace hack */
opal_pmix_finalize_nspace_tracker();
@ -941,72 +1013,6 @@ void ompi_rte_wait_for_debugger(void)
PMIx_Deregister_event_handler(handler, NULL, NULL);
}
static char* pre_condition_transports_print(uint64_t *unique_key)
{
unsigned int *int_ptr;
size_t i, j, string_key_len, written_len;
char *string_key = NULL, *format = NULL;
/* string is two 64 bit numbers printed in hex with a dash between
* and zero padding.
*/
string_key_len = (sizeof(uint64_t) * 2) * 2 + strlen("-") + 1;
string_key = (char*) malloc(string_key_len);
if (NULL == string_key) {
return NULL;
}
string_key[0] = '\0';
written_len = 0;
/* get a format string based on the length of an unsigned int. We
* want to have zero padding for sizeof(unsigned int) * 2
* characters -- when printing as a hex number, each byte is
* represented by 2 hex characters. Format will contain something
* that looks like %08lx, where the number 8 might be a different
* number if the system has a different sized long (8 would be for
* sizeof(int) == 4)).
*/
opal_asprintf(&format, "%%0%dx", (int)(sizeof(unsigned int)) * 2);
/* print the first number */
int_ptr = (unsigned int*) &unique_key[0];
for (i = 0 ; i < sizeof(uint64_t) / sizeof(unsigned int) ; ++i) {
if (0 == int_ptr[i]) {
/* inject some energy */
for (j=0; j < sizeof(unsigned int); j++) {
int_ptr[i] |= j << j;
}
}
snprintf(string_key + written_len,
string_key_len - written_len,
format, int_ptr[i]);
written_len = strlen(string_key);
}
/* print the middle dash */
snprintf(string_key + written_len, string_key_len - written_len, "-");
written_len = strlen(string_key);
/* print the second number */
int_ptr = (unsigned int*) &unique_key[1];
for (i = 0 ; i < sizeof(uint64_t) / sizeof(unsigned int) ; ++i) {
if (0 == int_ptr[i]) {
/* inject some energy */
for (j=0; j < sizeof(unsigned int); j++) {
int_ptr[i] |= j << j;
}
}
snprintf(string_key + written_len,
string_key_len - written_len,
format, int_ptr[i]);
written_len = strlen(string_key);
}
free(format);
return string_key;
}
static int _setup_top_session_dir(char **sdir)
{
char *tmpdir;

Просмотреть файл

@ -243,7 +243,6 @@ typedef uint16_t ompi_local_rank_t;
typedef struct {
opal_process_name_t my_name;
char *my_hnp_uri;
char *nodename;
pid_t pid;
char *top_session_dir;
@ -254,7 +253,14 @@ typedef struct {
int32_t num_local_peers;
uint32_t num_procs;
uint32_t app_num;
uint32_t univ_size;
char *app_sizes;
char *app_ldrs;
char *cpuset;
char *command;
uint32_t num_apps;
char *initial_wdir;
uint32_t reincarnation;
} pmix_process_info_t;
OMPI_DECLSPEC extern pmix_process_info_t pmix_process_info;
#define ompi_process_info pmix_process_info

Просмотреть файл

@ -19,7 +19,7 @@
* reserved.
* Copyright (c) 2020 Google, LLC. All rights reserved.
*
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* Copyright (c) 2019-2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -125,7 +125,7 @@ struct mca_btl_vader_component_t {
char *my_segment; /**< this rank's base pointer */
size_t segment_size; /**< size of my_segment */
int32_t num_smp_procs; /**< current number of smp procs on this host */
int32_t local_rank; /**< current rank index at add_procs() time */
opal_atomic_int32_t local_rank; /**< current rank index at add_procs() time */
opal_free_list_t vader_frags_eager; /**< free list of vader send frags */
opal_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */
opal_free_list_t vader_frags_user; /**< free list of small inline frags */

Просмотреть файл

@ -405,44 +405,6 @@ typedef struct {
} \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
*
* r - the integer return status from the modex op (int)
* s - string key (char*)
* p - pointer to the opal_process_name_t of the proc that posted
* the data (opal_process_name_t*)
* d - pointer to a location wherein the data object
* it to be returned (char**)
* sz - pointer to a location wherein the number of bytes
* in the data object can be returned (size_t)
*/
#define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \
do { \
pmix_proc_t _proc; \
pmix_value_t *_kv = NULL; \
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
"%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), (s))); \
*(d) = NULL; \
*(sz) = 0; \
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
(r) = PMIx_Get(&(_proc), (s), NULL, 0, &(_kv)); \
if (NULL == _kv) { \
(r) = PMIX_ERR_NOT_FOUND; \
} else if (PMIX_SUCCESS == (r)) { \
*(d) = (uint8_t*)_kv->data.bo.bytes; \
*(sz) = _kv->data.bo.size; \
_kv->data.bo.bytes = NULL; /* protect the data */ \
} \
if (NULL != _kv) { \
PMIX_VALUE_RELEASE(_kv); \
} \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
@ -484,6 +446,44 @@ typedef struct {
} \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
*
* r - the integer return status from the modex op (int)
* s - string key (char*)
* p - pointer to the opal_process_name_t of the proc that posted
* the data (opal_process_name_t*)
* d - pointer to a location wherein the data object
* it to be returned (char**)
* sz - pointer to a location wherein the number of bytes
* in the data object can be returned (size_t)
*/
#define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \
do { \
pmix_proc_t _proc; \
pmix_value_t *_kv = NULL; \
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
"%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), (s))); \
*(d) = NULL; \
*(sz) = 0; \
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
(r) = PMIx_Get(&(_proc), (s), NULL, 0, &(_kv)); \
if (NULL == _kv) { \
(r) = PMIX_ERR_NOT_FOUND; \
} else if (PMIX_SUCCESS == (r)) { \
*(d) = (uint8_t*)_kv->data.bo.bytes; \
*(sz) = _kv->data.bo.size; \
_kv->data.bo.bytes = NULL; /* protect the data */ \
} \
if (NULL != _kv) { \
PMIX_VALUE_RELEASE(_kv); \
} \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
@ -497,24 +497,25 @@ typedef struct {
* sz - pointer to a location wherein the number of bytes
* in the data object can be returned (size_t)
*/
#define OPAL_MODEX_RECV(r, s, p, d, sz) \
do { \
char *_key; \
_key = mca_base_component_to_string((s)); \
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
"%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), _key)); \
if (NULL == _key) { \
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \
(r) = OPAL_ERR_OUT_OF_RESOURCE; \
} else { \
OPAL_MODEX_RECV_STRING((r), _key, (p), (d), (sz)); \
free(_key); \
} \
#define OPAL_MODEX_RECV_OPTIONAL(r, s, p, d, sz) \
do { \
char *_key; \
_key = mca_base_component_to_string((s)); \
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
"%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), _key)); \
if (NULL == _key) { \
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \
(r) = OPAL_ERR_OUT_OF_RESOURCE; \
} else { \
OPAL_MODEX_RECV_STRING_OPTIONAL((r), _key, (p), (d), (sz)); \
free(_key); \
} \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
@ -547,6 +548,38 @@ typedef struct {
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
*
* r - the integer return status from the modex op (int)
* s - the MCA component that posted the data (mca_base_component_t*)
* p - pointer to the opal_process_name_t of the proc that posted
* the data (opal_process_name_t*)
* d - pointer to a location wherein the data object
* it to be returned (char**)
* sz - pointer to a location wherein the number of bytes
* in the data object can be returned (size_t)
*/
#define OPAL_MODEX_RECV(r, s, p, d, sz) \
do { \
char *_key; \
_key = mca_base_component_to_string((s)); \
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
"%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), _key)); \
if (NULL == _key) { \
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \
(r) = OPAL_ERR_OUT_OF_RESOURCE; \
} else { \
OPAL_MODEX_RECV_STRING((r), _key, (p), (d), (sz)); \
free(_key); \
} \
} while(0);
#define PMIX_ERROR_LOG(r) \
opal_output(0, "[%s:%d] PMIx Error: %s", __FILE__, __LINE__, PMIx_Error_string((r)))

@ -1 +1 @@
Subproject commit 8b0a8360e7bb11e0ab48698eefb2715e549d4b1e
Subproject commit 305150c29f06d8e780b630f0b5992877005ca1dd

2
prrte

@ -1 +1 @@
Subproject commit bf8d0192740f01cd7c86bda9c887fe3d7064585d
Subproject commit 21ccf39445358ab10c6ced09fa1e80f0047c98c2