9613b3176c
After much work by Jeff and myself, and quite a lot of discussion, it has become clear that we simply cannot resolve the infinite loops caused by RML-involved subsystems calling orte_output. The original rationale for the change to orte_output has also been reduced by shifting the output of XML-formatted vs human readable messages to an alternative approach. I have globally replaced the orte_output/ORTE_OUTPUT calls in the code base, as well as the corresponding .h file name. I have test compiled and run this on the various environments within my reach, so hopefully this will prove minimally disruptive. This commit was SVN r18619.
505 строки
15 KiB
C
505 строки
15 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
#include "orte_config.h"
|
|
#include "orte/types.h"
|
|
#include "orte/constants.h"
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "orte/util/show_help.h"
|
|
#include "opal/util/printf.h"
|
|
#include "opal/threads/tsd.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
#include "orte/util/name_fns.h"
|
|
|
|
#define ORTE_PRINT_NAME_ARGS_MAX_SIZE 50
|
|
#define ORTE_PRINT_NAME_ARG_NUM_BUFS 16
|
|
|
|
#define ORTE_SCHEMA_DELIMITER_CHAR '.'
|
|
#define ORTE_SCHEMA_DELIMITER_STRING "."
|
|
#define ORTE_SCHEMA_WILDCARD_CHAR '*'
|
|
#define ORTE_SCHEMA_WILDCARD_STRING "*"
|
|
#define ORTE_SCHEMA_INVALID_CHAR '$'
|
|
#define ORTE_SCHEMA_INVALID_STRING "$"
|
|
|
|
/* constructor - used to initialize namelist instance */
|
|
static void orte_namelist_construct(orte_namelist_t* list)
|
|
{
|
|
list->name.jobid = ORTE_JOBID_INVALID;
|
|
list->name.vpid = ORTE_VPID_INVALID;
|
|
}
|
|
|
|
/* destructor - used to free any resources held by instance */
|
|
static void orte_namelist_destructor(orte_namelist_t* list)
|
|
{
|
|
}
|
|
|
|
/* define instance of opal_class_t */
|
|
OBJ_CLASS_INSTANCE(orte_namelist_t, /* type name */
|
|
opal_list_item_t, /* parent "class" name */
|
|
orte_namelist_construct, /* constructor */
|
|
orte_namelist_destructor); /* destructor */
|
|
|
|
static bool fns_init=false;
|
|
|
|
static opal_tsd_key_t print_args_tsd_key;
|
|
char* orte_print_args_null = "NULL";
|
|
typedef struct {
|
|
char *buffers[ORTE_PRINT_NAME_ARG_NUM_BUFS];
|
|
int cntr;
|
|
} orte_print_args_buffers_t;
|
|
|
|
static void
|
|
buffer_cleanup(void *value)
|
|
{
|
|
int i;
|
|
orte_print_args_buffers_t *ptr;
|
|
|
|
if (NULL != value) {
|
|
ptr = (orte_print_args_buffers_t*)value;
|
|
for (i=0; i < ORTE_PRINT_NAME_ARG_NUM_BUFS; i++) {
|
|
free(ptr->buffers[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static orte_print_args_buffers_t*
|
|
get_print_name_buffer(void)
|
|
{
|
|
orte_print_args_buffers_t *ptr;
|
|
int ret, i;
|
|
|
|
ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr);
|
|
if (OPAL_SUCCESS != ret) return NULL;
|
|
|
|
if (NULL == ptr) {
|
|
ptr = (orte_print_args_buffers_t*)malloc(sizeof(orte_print_args_buffers_t));
|
|
for (i=0; i < ORTE_PRINT_NAME_ARG_NUM_BUFS; i++) {
|
|
ptr->buffers[i] = (char *) malloc((ORTE_PRINT_NAME_ARGS_MAX_SIZE+1) * sizeof(char));
|
|
}
|
|
ptr->cntr = 0;
|
|
ret = opal_tsd_setspecific(print_args_tsd_key, (void*)ptr);
|
|
}
|
|
|
|
return (orte_print_args_buffers_t*) ptr;
|
|
}
|
|
|
|
char* orte_util_print_name_args(const orte_process_name_t *name)
|
|
{
|
|
orte_print_args_buffers_t *ptr;
|
|
char *job, *vpid;
|
|
|
|
/* protect against NULL names */
|
|
if (NULL == name) {
|
|
/* get the next buffer */
|
|
ptr = get_print_name_buffer();
|
|
if (NULL == ptr) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return orte_print_args_null;
|
|
}
|
|
/* cycle around the ring */
|
|
if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
|
|
ptr->cntr = 0;
|
|
}
|
|
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]");
|
|
return ptr->buffers[ptr->cntr-1];
|
|
}
|
|
|
|
/* get the jobid and vpid strings first - this will protect us from
|
|
* stepping on each other's buffer. This also guarantees
|
|
* that the print_args function has been initialized, so
|
|
* we don't need to duplicate that here
|
|
*/
|
|
job = orte_util_print_jobids(name->jobid);
|
|
vpid = orte_util_print_vpids(name->vpid);
|
|
|
|
/* get the next buffer */
|
|
ptr = get_print_name_buffer();
|
|
|
|
if (NULL == ptr) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return orte_print_args_null;
|
|
}
|
|
|
|
/* cycle around the ring */
|
|
if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
|
|
ptr->cntr = 0;
|
|
}
|
|
|
|
snprintf(ptr->buffers[ptr->cntr++],
|
|
ORTE_PRINT_NAME_ARGS_MAX_SIZE,
|
|
"[%s,%s]", job, vpid);
|
|
|
|
return ptr->buffers[ptr->cntr-1];
|
|
}
|
|
|
|
char* orte_util_print_jobids(const orte_jobid_t job)
|
|
{
|
|
orte_print_args_buffers_t *ptr;
|
|
int rc;
|
|
unsigned long tmp1, tmp2;
|
|
|
|
if (!fns_init) {
|
|
/* setup the print_args function */
|
|
if (ORTE_SUCCESS != (rc = opal_tsd_key_create(&print_args_tsd_key, buffer_cleanup))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return NULL;
|
|
}
|
|
fns_init = true;
|
|
}
|
|
|
|
ptr = get_print_name_buffer();
|
|
|
|
if (NULL == ptr) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return orte_print_args_null;
|
|
}
|
|
|
|
/* cycle around the ring */
|
|
if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
|
|
ptr->cntr = 0;
|
|
}
|
|
|
|
if (ORTE_JOBID_INVALID == job) {
|
|
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "[INVALID]");
|
|
} else if (ORTE_JOBID_WILDCARD == job) {
|
|
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "[WILDCARD]");
|
|
} else {
|
|
tmp1 = ((unsigned long)job & 0xffff0000) >> 16;
|
|
tmp2 = (unsigned long)job & 0x0000ffff;
|
|
snprintf(ptr->buffers[ptr->cntr++],
|
|
ORTE_PRINT_NAME_ARGS_MAX_SIZE,
|
|
"[%lu,%lu]", tmp1, tmp2);
|
|
}
|
|
return ptr->buffers[ptr->cntr-1];
|
|
}
|
|
|
|
char* orte_util_print_vpids(const orte_vpid_t vpid)
|
|
{
|
|
orte_print_args_buffers_t *ptr;
|
|
int rc;
|
|
|
|
if (!fns_init) {
|
|
/* setup the print_args function */
|
|
if (ORTE_SUCCESS != (rc = opal_tsd_key_create(&print_args_tsd_key, buffer_cleanup))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return NULL;
|
|
}
|
|
fns_init = true;
|
|
}
|
|
|
|
ptr = get_print_name_buffer();
|
|
|
|
if (NULL == ptr) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return orte_print_args_null;
|
|
}
|
|
|
|
/* cycle around the ring */
|
|
if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
|
|
ptr->cntr = 0;
|
|
}
|
|
|
|
if (ORTE_VPID_INVALID == vpid) {
|
|
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "INVALID");
|
|
} else if (ORTE_VPID_WILDCARD == vpid) {
|
|
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD");
|
|
} else {
|
|
snprintf(ptr->buffers[ptr->cntr++],
|
|
ORTE_PRINT_NAME_ARGS_MAX_SIZE,
|
|
"%ld", (long)vpid);
|
|
}
|
|
return ptr->buffers[ptr->cntr-1];
|
|
}
|
|
|
|
|
|
|
|
/*** STRING FUNCTIONS ***/
|
|
int orte_util_convert_jobid_to_string(char **jobid_string, const orte_jobid_t jobid)
|
|
{
|
|
/* check for wildcard value - handle appropriately */
|
|
if (ORTE_JOBID_WILDCARD == jobid) {
|
|
*jobid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING);
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
if (0 > asprintf(jobid_string, "%ld", (long) jobid)) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
int orte_util_convert_string_to_jobid(orte_jobid_t *jobid, const char* jobidstring)
|
|
{
|
|
if (NULL == jobidstring) { /* got an error */
|
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
|
*jobid = ORTE_JOBID_INVALID;
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
|
|
/** check for wildcard character - handle appropriately */
|
|
if (0 == strcmp(ORTE_SCHEMA_WILDCARD_STRING, jobidstring)) {
|
|
*jobid = ORTE_JOBID_WILDCARD;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/* check for invalid value */
|
|
if (0 == strcmp(ORTE_SCHEMA_INVALID_STRING, jobidstring)) {
|
|
*jobid = ORTE_JOBID_INVALID;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
*jobid = strtoul(jobidstring, NULL, 10);
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
int orte_util_convert_vpid_to_string(char **vpid_string, const orte_vpid_t vpid)
|
|
{
|
|
/* check for wildcard value - handle appropriately */
|
|
if (ORTE_VPID_WILDCARD == vpid) {
|
|
*vpid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING);
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/* check for invalid value - handle appropriately */
|
|
if (ORTE_VPID_INVALID == vpid) {
|
|
*vpid_string = strdup(ORTE_SCHEMA_INVALID_STRING);
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
if (0 > asprintf(vpid_string, "%ld", (long) vpid)) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
int orte_util_convert_string_to_vpid(orte_vpid_t *vpid, const char* vpidstring)
|
|
{
|
|
if (NULL == vpidstring) { /* got an error */
|
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
|
*vpid = ORTE_VPID_INVALID;
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
|
|
/** check for wildcard character - handle appropriately */
|
|
if (0 == strcmp(ORTE_SCHEMA_WILDCARD_STRING, vpidstring)) {
|
|
*vpid = ORTE_VPID_WILDCARD;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/* check for invalid value */
|
|
if (0 == strcmp(ORTE_SCHEMA_INVALID_STRING, vpidstring)) {
|
|
*vpid = ORTE_VPID_INVALID;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
*vpid = strtol(vpidstring, NULL, 10);
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
int orte_util_convert_string_to_process_name(orte_process_name_t *name,
|
|
const char* name_string)
|
|
{
|
|
char *temp, *token;
|
|
orte_jobid_t job;
|
|
orte_vpid_t vpid;
|
|
int return_code=ORTE_SUCCESS;
|
|
|
|
/* set default */
|
|
name->jobid = ORTE_JOBID_INVALID;
|
|
name->vpid = ORTE_VPID_INVALID;
|
|
|
|
/* check for NULL string - error */
|
|
if (NULL == name_string) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
|
|
temp = strdup(name_string); /** copy input string as the strtok process is destructive */
|
|
token = strtok(temp, ORTE_SCHEMA_DELIMITER_STRING); /** get first field -> jobid */
|
|
|
|
/* check for error */
|
|
if (NULL == token) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
|
|
/* check for WILDCARD character - assign
|
|
* value accordingly, if found
|
|
*/
|
|
if (0 == strcmp(token, ORTE_SCHEMA_WILDCARD_STRING)) {
|
|
job = ORTE_JOBID_WILDCARD;
|
|
} else if (0 == strcmp(token, ORTE_SCHEMA_INVALID_STRING)) {
|
|
job = ORTE_JOBID_INVALID;
|
|
} else {
|
|
job = strtoul(token, NULL, 10);
|
|
}
|
|
|
|
token = strtok(NULL, ORTE_SCHEMA_DELIMITER_STRING); /** get next field -> vpid */
|
|
|
|
/* check for error */
|
|
if (NULL == token) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
|
|
/* check for WILDCARD character - assign
|
|
* value accordingly, if found
|
|
*/
|
|
if (0 == strcmp(token, ORTE_SCHEMA_WILDCARD_STRING)) {
|
|
vpid = ORTE_VPID_WILDCARD;
|
|
} else if (0 == strcmp(token, ORTE_SCHEMA_INVALID_STRING)) {
|
|
vpid = ORTE_VPID_INVALID;
|
|
} else {
|
|
vpid = strtoul(token, NULL, 10);
|
|
}
|
|
|
|
name->jobid = job;
|
|
name->vpid = vpid;
|
|
|
|
free(temp);
|
|
|
|
return return_code;
|
|
}
|
|
|
|
int orte_util_convert_process_name_to_string(char **name_string,
|
|
const orte_process_name_t* name)
|
|
{
|
|
char *tmp;
|
|
|
|
if (NULL == name) { /* got an error */
|
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
|
return ORTE_ERR_BAD_PARAM;
|
|
}
|
|
|
|
/* check for wildcard and invalid values - where encountered, insert the
|
|
* corresponding string so we can correctly parse the name string when
|
|
* it is passed back to us later
|
|
*/
|
|
if (ORTE_JOBID_WILDCARD == name->jobid) {
|
|
asprintf(&tmp, "%s", ORTE_SCHEMA_WILDCARD_STRING);
|
|
} else if (ORTE_JOBID_INVALID == name->jobid) {
|
|
asprintf(&tmp, "%s", ORTE_SCHEMA_INVALID_STRING);
|
|
} else {
|
|
asprintf(&tmp, "%lu", (unsigned long)name->jobid);
|
|
}
|
|
|
|
if (ORTE_VPID_WILDCARD == name->vpid) {
|
|
asprintf(name_string, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_WILDCARD_STRING);
|
|
} else if (ORTE_VPID_INVALID == name->vpid) {
|
|
asprintf(name_string, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_INVALID_STRING);
|
|
} else {
|
|
asprintf(name_string, "%s%c%lu", tmp, ORTE_SCHEMA_DELIMITER_CHAR, (unsigned long)name->vpid);
|
|
}
|
|
free(tmp);
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
/**** CREATE PROCESS NAME ****/
|
|
int orte_util_create_process_name(orte_process_name_t **name,
|
|
orte_jobid_t job,
|
|
orte_vpid_t vpid)
|
|
{
|
|
*name = NULL;
|
|
|
|
*name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
|
|
if (NULL == *name) { /* got an error */
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
(*name)->jobid = job;
|
|
(*name)->vpid = vpid;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/**** COMPARE NAME FIELDS ****/
|
|
int orte_util_compare_name_fields(orte_ns_cmp_bitmask_t fields,
|
|
const orte_process_name_t* name1,
|
|
const orte_process_name_t* name2)
|
|
{
|
|
/* handle the NULL pointer case */
|
|
if (NULL == name1 && NULL == name2) {
|
|
return OPAL_EQUAL;
|
|
} else if (NULL == name1) {
|
|
return OPAL_VALUE2_GREATER;
|
|
} else if (NULL == name2) {
|
|
return OPAL_VALUE1_GREATER;
|
|
}
|
|
|
|
/* in this comparison function, we check for exact equalities.
|
|
* In the case of wildcards, we check to ensure that the fields
|
|
* actually match those values - thus, a "wildcard" in this
|
|
* function does not actually stand for a wildcard value, but
|
|
* rather a specific value
|
|
*/
|
|
|
|
/* check job id */
|
|
|
|
if (ORTE_NS_CMP_JOBID & fields) {
|
|
if (name1->jobid < name2->jobid) {
|
|
return OPAL_VALUE2_GREATER;
|
|
} else if (name1->jobid > name2->jobid) {
|
|
return OPAL_VALUE1_GREATER;
|
|
}
|
|
}
|
|
|
|
/* get here if jobid's are equal, or not being checked
|
|
* now check vpid
|
|
*/
|
|
|
|
if (ORTE_NS_CMP_VPID & fields) {
|
|
if (name1->vpid < name2->vpid) {
|
|
return OPAL_VALUE2_GREATER;
|
|
} else if (name1->vpid > name2->vpid) {
|
|
return OPAL_VALUE1_GREATER;
|
|
}
|
|
}
|
|
|
|
/* only way to get here is if all fields are being checked and are equal,
|
|
* or jobid not checked, but vpid equal,
|
|
* only vpid being checked, and equal
|
|
* return that fact
|
|
*/
|
|
return OPAL_EQUAL;
|
|
}
|
|
|
|
|
|
uint64_t orte_util_hash_name(const orte_process_name_t * name) {
|
|
uint64_t hash;
|
|
|
|
hash = name->jobid;
|
|
hash <<= sizeof(name->jobid) * 8;
|
|
hash += name->vpid;
|
|
|
|
return hash;
|
|
}
|