784b9b9f5b
the BATCH_PARTITION_ID anymore, use the ras-alps-command.sh script to figure out the jobs ID to query from ALPS. Gracefully report errors, update the help file and parse the sysconfig file This commit was SVN r21772.
353 строки
13 KiB
C
353 строки
13 KiB
C
/*
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
#include "orte_config.h"
|
|
#include "orte/constants.h"
|
|
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <alps/apInfo.h>
|
|
|
|
#include "opal/mca/installdirs/installdirs.h"
|
|
#include "opal/util/output.h"
|
|
#include "orte/util/show_help.h"
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
#include "ras_alps.h"
|
|
#include "orte/mca/ras/base/ras_private.h"
|
|
|
|
|
|
/*
|
|
* Local functions
|
|
*/
|
|
static int orte_ras_alps_allocate(opal_list_t *nodes);
|
|
static int orte_ras_alps_finalize(void);
|
|
static char *ras_alps_getline(FILE *fp);
|
|
static int orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, unsigned int *uMe);
|
|
|
|
|
|
/*
|
|
* Global variable
|
|
*/
|
|
orte_ras_base_module_t orte_ras_alps_module = {
|
|
orte_ras_alps_allocate,
|
|
orte_ras_alps_finalize
|
|
};
|
|
|
|
/**
|
|
* Discover available (pre-allocated) nodes. Allocate the
|
|
* requested number of nodes/process slots to the job.
|
|
*
|
|
*/
|
|
static int orte_ras_alps_allocate(opal_list_t *nodes)
|
|
{
|
|
const char alps_sysconfig[] = "/etc/sysconfig/alps"; /** Get ALPS scheduler information
|
|
file pathname from system configuration. **/
|
|
unsigned int alps_res_id;
|
|
int ret;
|
|
FILE *fp;
|
|
char *alps_batch_id;
|
|
char *endptr;
|
|
char *str;
|
|
char *alps_config_str;
|
|
|
|
alps_batch_id = getenv("OMPI_ALPS_RESID");
|
|
if (NULL == alps_batch_id) {
|
|
orte_show_help("help-ras-alps.txt", "alps-env-var-not-found", 1,
|
|
"OMPI_ALPS_RESID");
|
|
return ORTE_ERR_NOT_FOUND;
|
|
}
|
|
|
|
alps_res_id=(unsigned int)strtol(alps_batch_id, &endptr, 10);
|
|
|
|
if (alps_batch_id[0] == '\0' || endptr[0] != '\0') {
|
|
orte_show_help("help-ras-alps.txt", "alps-env-var-invalid", 1,
|
|
alps_batch_id);
|
|
return ORTE_ERR_NOT_FOUND;
|
|
}
|
|
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:allocate: Using ALPS configuration file: \"%s\"",
|
|
alps_sysconfig);
|
|
|
|
fp = fopen(alps_sysconfig, "r");
|
|
if (NULL == fp) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
|
|
return ORTE_ERR_FILE_OPEN_FAILURE;
|
|
}
|
|
|
|
while( (alps_config_str=ras_alps_getline(fp)) ) {
|
|
|
|
char *cpq;
|
|
char *cpr;
|
|
|
|
cpq=strchr( alps_config_str, '#' ); /* Parse comments, actually ANY # */
|
|
cpr=strchr( alps_config_str, '=' ); /* Parse for variables */
|
|
if( !cpr || /* Skip if not definition */
|
|
(cpq && cpq<cpr) ) { /* Skip if commented */
|
|
|
|
free(alps_config_str);
|
|
continue;
|
|
}
|
|
for( cpr--; /* Kill trailing whitespace */
|
|
(*cpr==' ' || *cpr=='\t'); cpr-- ) ;
|
|
for( cpq=alps_config_str; /* Kill leading whitespace */
|
|
(*cpq==' ' || *cpq=='\t'); cpq++ ) ;
|
|
|
|
/* Filter to needed variable. */
|
|
if(strncmp( cpq, "ALPS_SHARED_DIR_PATH", 20 )) {
|
|
|
|
free(alps_config_str);
|
|
continue;
|
|
}
|
|
|
|
if( !(cpq=strchr( cpr, '"' )) ) { /* Can't find pathname start */
|
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
|
|
return ORTE_ERR_FILE_OPEN_FAILURE;
|
|
}
|
|
if( !(cpr=strchr( ++cpq, '"' )) ) { /* Can't find pathname end */
|
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
|
|
return ORTE_ERR_FILE_OPEN_FAILURE;
|
|
}
|
|
*cpr='\0';
|
|
if( strlen(cpq)+8 > PATH_MAX ) { /* Bad configuration */
|
|
|
|
errno=ENAMETOOLONG;
|
|
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
|
|
return ORTE_ERR_FILE_OPEN_FAILURE;
|
|
}
|
|
asprintf(&str, "%s/appinfo", cpq);
|
|
if (NULL == str) {
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
free(alps_config_str);
|
|
break;
|
|
}
|
|
fclose(fp);
|
|
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:allocate: Located ALPS scheduler file: \"%s\"", str);
|
|
|
|
/* Parse ALPS scheduler information file (appinfo) for node list. */
|
|
if (ORTE_SUCCESS != (ret = orte_ras_alps_read_appinfo_file(nodes, str, &alps_res_id))) {
|
|
ORTE_ERROR_LOG(ret);
|
|
goto cleanup;
|
|
}
|
|
free(str);
|
|
|
|
cleanup:
|
|
|
|
/* All done */
|
|
if (ORTE_SUCCESS == ret) {
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:allocate: success");
|
|
} else {
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:allocate: failure (base_allocate_nodes=%d)", ret);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
#define RAS_BASE_FILE_MAX_LINE_LENGTH PATH_MAX*2
|
|
|
|
static char *ras_alps_getline(FILE *fp)
|
|
{
|
|
char *ret, *buff = NULL;
|
|
char input[RAS_BASE_FILE_MAX_LINE_LENGTH+1];
|
|
|
|
ret = fgets(input, RAS_BASE_FILE_MAX_LINE_LENGTH, fp);
|
|
if (NULL != ret) {
|
|
input[strlen(input)-1] = '\0'; /* remove newline */
|
|
buff = strdup(input);
|
|
}
|
|
|
|
return buff;
|
|
}
|
|
|
|
|
|
static int orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, unsigned int *uMe)
|
|
{
|
|
int iq;
|
|
int ix;
|
|
int iFd; /* file descriptor for appinfo */
|
|
int iTrips; /* counter appinfo read attempts */
|
|
int max_appinfo_read_attempts;
|
|
struct stat ssBuf; /* stat buffer */
|
|
size_t szLen; /* size of appinfo (file) */
|
|
off_t oNow; /* current appinfo data offset */
|
|
off_t oInfo=sizeof(appInfoHdr_t);
|
|
off_t oDet=sizeof(appInfo_t);
|
|
off_t oSlots;
|
|
off_t oEntry;
|
|
int32_t sNodes=0;
|
|
char *cpBuf;
|
|
char *hostname;
|
|
orte_node_t *node = NULL;
|
|
appInfoHdr_t *apHdr; /* ALPS header structure */
|
|
appInfo_t *apInfo; /* ALPS table info structure */
|
|
cmdDetail_t *apDet; /* ALPS command details */
|
|
placeList_t *apSlots; /* ALPS node specific info */
|
|
|
|
orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts);
|
|
oNow=0;
|
|
iTrips=0;
|
|
while(!oNow) { /* Until appinfo read is complete */
|
|
iTrips++; /* Increment trip count */
|
|
|
|
iFd=open( filename, O_RDONLY );
|
|
if( iFd==-1 ) { /* If file absent, ALPS is down */
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:allocate: ALPS information open failure");
|
|
usleep(iTrips*50000); /* Increasing delays, .05 s/try */
|
|
|
|
/* Fail only when number of attempts have been exhausted. */
|
|
if( iTrips <= max_appinfo_read_attempts ) continue;
|
|
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
|
|
return ORTE_ERR_FILE_OPEN_FAILURE;
|
|
}
|
|
if( fstat( iFd, &ssBuf )==-1 ) { /* If stat fails, access denied */
|
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE);
|
|
return ORTE_ERR_NOT_AVAILABLE;
|
|
}
|
|
|
|
szLen=ssBuf.st_size; /* Get buffer size */
|
|
cpBuf=malloc(szLen+1); /* Allocate buffer */
|
|
if (NULL == cpBuf) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
/* Repeated attempts to read appinfo, with an increasing delay between *
|
|
* successive attempts to allow scheduler I/O a chance to complete. */
|
|
if( (oNow=read( iFd, cpBuf, szLen ))!=(off_t)szLen ) {
|
|
|
|
/* This is where apstat fails; we will record it and try again. */
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:allocate: ALPS information read failure: %ld bytes", oNow);
|
|
|
|
free(cpBuf); /* Free (old) buffer */
|
|
close(iFd); /* Close (old) descriptor */
|
|
oNow=0; /* Reset byte count */
|
|
usleep(iTrips*50000); /* Increasing delays, .05 s/try */
|
|
|
|
/* Fail only when number of attempts have been exhausted. */
|
|
if( iTrips<=max_appinfo_read_attempts ) continue;
|
|
ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE);
|
|
return ORTE_ERR_FILE_READ_FAILURE;
|
|
}
|
|
}
|
|
close(iFd);
|
|
|
|
/* Now that we have the scheduler information, we just have to parse it for *
|
|
* the data that we seek. */
|
|
oNow=0;
|
|
apHdr=(appInfoHdr_t *)cpBuf;
|
|
|
|
/* Header info (apHdr) tells us how many entries are in the file: *
|
|
* *
|
|
* apHdr->apNum */
|
|
|
|
for( iq=0; iq<apHdr->apNum; iq++ ) { /* Parse all entries in file */
|
|
|
|
/* Just at this level, a lot of information is available: *
|
|
* *
|
|
* apInfo->apid ... ALPS job ID *
|
|
* apInfo->resId ... ALPS reservation ID *
|
|
* apInfo->numCmds ... Number of executables *
|
|
* apInfo->numPlaces ... Number of PEs */
|
|
apInfo=(appInfo_t *)(cpBuf+oNow+oInfo);
|
|
|
|
/* Calculate the dependent offsets. */
|
|
oSlots=sizeof(cmdDetail_t)*apInfo->numCmds;
|
|
oEntry=sizeof(placeList_t)*apInfo->numPlaces;
|
|
|
|
/* Also, we can extract details of commands currently running on nodes: *
|
|
* *
|
|
* apDet[].fixedPerNode ... PEs per node *
|
|
* apDet[].nodeCnt ... number of nodes in use *
|
|
* apDet[].memory ... MB/PE memory limit *
|
|
* apDet[].cmd ... command being run */
|
|
apDet=(cmdDetail_t *)(cpBuf+oNow+oInfo+oDet);
|
|
|
|
/* Finally, we get to the actual node-specific information: *
|
|
* *
|
|
* apSlots[ix].cmdIx ... index of apDet[].cmd *
|
|
* apSlots[ix].nid ... NodeID (NID) *
|
|
* apSlots[ix].procMask ... mask for processors... need 16-bit shift */
|
|
apSlots=(placeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots);
|
|
|
|
oNow+=(oDet+oSlots+oEntry); /* Target next slot */
|
|
|
|
if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */
|
|
|
|
for( ix=0; ix<apInfo->numPlaces; ix++ ) {
|
|
|
|
opal_output_verbose(5, orte_ras_base.ras_output,
|
|
"ras:alps:read_appinfo: got NID %d", apSlots[ix].nid);
|
|
|
|
asprintf( &hostname, "%d", apSlots[ix].nid );
|
|
if (NULL == hostname) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
/* If this matches the prior nodename, just add to the slot count. */
|
|
if( NULL!=node && !strcmp(node->name, hostname) ) {
|
|
|
|
free(hostname); /* free hostname since not needed */
|
|
++node->slots;
|
|
} else { /* must be new, so add to list */
|
|
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:read_appinfo: added NID %d to list", apSlots[ix].nid);
|
|
|
|
node = OBJ_NEW(orte_node_t);
|
|
node->name = hostname;
|
|
node->launch_id = sNodes;
|
|
node->slots_inuse = 0;
|
|
node->slots_max = 0;
|
|
node->slots = 1;
|
|
opal_list_append(nodes, &node->super);
|
|
sNodes++; /* Increment the node count */
|
|
}
|
|
}
|
|
break; /* Extended details ignored */
|
|
}
|
|
free(cpBuf); /* Free the buffer */
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* There's really nothing to do here
|
|
*/
|
|
static int orte_ras_alps_finalize(void)
|
|
{
|
|
opal_output_verbose(1, orte_ras_base.ras_output,
|
|
"ras:alps:finalize: success (nothing to do)");
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|