1
1
openmpi/orte/util/pre_condition_transports.c
Ralph Castain 9613b3176c Effectively revert the orte_output system and return to direct use of opal_output at all levels. Retain the orte_show_help subsystem to allow aggregation of show_help messages at the HNP.
After much work by Jeff and myself, and quite a lot of discussion, it has become clear that we simply cannot resolve the infinite loops caused by RML-involved subsystems calling orte_output. The original rationale for the change to orte_output has also been reduced by shifting the output of XML-formatted vs human readable messages to an alternative approach.

I have globally replaced the orte_output/ORTE_OUTPUT calls in the code base, as well as the corresponding .h file name. I have test compiled and run this on the various environments within my reach, so hopefully this will prove minimally disruptive.

This commit was SVN r18619.
2008-06-09 14:53:58 +00:00

166 строки
5.1 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_TIME_H
#include <time.h>
#endif
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/opal_environ.h"
#include "opal/util/argv.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/plm/plm_types.h"
#include "orte/util/pre_condition_transports.h"
/* some network transports require a little bit of information to
* "pre-condition" them - i.e., to setup their individual transport
* connections so they can generate their endpoint addresses. This
* function provides a means for doing so. The resulting info is placed
* into the app_context's env array so it will automatically be pushed
* into the environment of every MPI process when launched.
*/
static inline void orte_pre_condition_transports_use_rand(uint64_t* unique_key) {
srand((unsigned int)time(NULL));
unique_key[1] = rand();
unique_key[2] = rand();
}
int orte_pre_condition_transports(orte_job_t *jdata)
{
size_t i, string_key_len, written_len;
char *cs_env, *string_key = NULL, *format = NULL;
uint64_t unique_key[2];
unsigned int *int_ptr;
orte_std_cntr_t n;
orte_app_context_t **apps;
#if !defined(__WINDOWS__)
int fd_rand;
size_t bytes_read;
struct stat buf;
/* put the number here - or else create an appropriate string. this just needs to
* eventually be a string variable
*/
if(0 != stat("/dev/urandom", &buf)) {
/* file doesn't exist! */
orte_pre_condition_transports_use_rand(unique_key);
}
if(-1 == (fd_rand = open("/dev/urandom", O_RDONLY))) {
orte_pre_condition_transports_use_rand(unique_key);
} else {
bytes_read = read(fd_rand, (char *) unique_key, 16);
if(bytes_read != 16) {
orte_pre_condition_transports_use_rand(unique_key);
} else {
close(fd_rand);
}
}
#else
{
unsigned int random_value;
rand_s( &random_value );
unique_key[0] = (uint64_t)random_value;
rand_s( &random_value );
unique_key[1] = (uint64_t)random_value;
}
#endif /* !defined(__WINDOWS__) */
/* string is two 64 bit numbers printed in hex with a dash between
* and zero padding.
*/
string_key_len = (sizeof(uint64_t) * 2) * 2 + strlen("-") + 1;
string_key = (char*) malloc(string_key_len);
if (NULL == string_key) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
string_key[0] = '\0';
written_len = 0;
/* get a format string based on the length of an unsigned int. We
* want to have zero padding for sizeof(unsigned int) * 2
* characters -- when printing as a hex number, each byte is
* represented by 2 hex characters. Format will contain something
* that looks like %08lx, where the number 8 might be a different
* number if the system has a different sized long (8 would be for
* sizeof(int) == 4)).
*/
asprintf(&format, "%%0%dx", (int)(sizeof(unsigned int)) * 2);
/* print the first number */
int_ptr = (unsigned int*) &unique_key[0];
for (i = 0 ; i < sizeof(uint64_t) / sizeof(unsigned int) ; ++i) {
snprintf(string_key + written_len,
string_key_len - written_len,
format, int_ptr[i]);
written_len = strlen(string_key);
}
/* print the middle dash */
snprintf(string_key + written_len, string_key_len - written_len, "-");
written_len = strlen(string_key);
/* print the second number */
int_ptr = (unsigned int*) &unique_key[1];
for (i = 0 ; i < sizeof(uint64_t) / sizeof(unsigned int) ; ++i) {
snprintf(string_key + written_len,
string_key_len - written_len,
format, int_ptr[i]);
written_len = strlen(string_key);
}
if (NULL == (cs_env = mca_base_param_environ_variable("orte_precondition_transports",NULL,NULL))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
apps = (orte_app_context_t**)jdata->apps->addr;
for (n=0; n < jdata->num_apps; n++) {
opal_setenv(cs_env, string_key, true, &apps[n]->env);
}
free(cs_env);
free(format);
free(string_key);
return ORTE_SUCCESS;
}