1
1

Good ol' Cray changed the way node/cpu allocation is handled in their latest release of ALPS, and so our allocator is broken. Adjust for the revised method, but preserve the older method for those Cray users who have not updated their system.

cmr:v1.7

This commit was SVN r27911.
Этот коммит содержится в:
Ralph Castain 2013-01-25 21:53:31 +00:00
родитель f6b4db0b79
Коммит 6eaf601ae6

Просмотреть файл

@ -34,6 +34,10 @@
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#include <alps/apInfo.h>
typedef int (*parser_fn_t)(char **val_if_found, FILE *fp,
@ -88,6 +92,9 @@ parser_ini(char **val_if_found, FILE *fp, const char *var_name)
{
char *alps_config_str = NULL;
opal_output_verbose(1, orte_ras_base.ras_output,
"ras:alps:allocate: parser_ini");
/* invalid argument */
if (NULL == val_if_found) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
@ -157,6 +164,9 @@ parser_separated_columns(char **val_if_found, FILE *fp, const char *var_name)
int var_len = strlen(var_name);
int i;
opal_output_verbose(1, orte_ras_base.ras_output,
"ras:alps:allocate: parser_separated_columns");
/* invalid argument */
if (NULL == val_if_found) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
@ -374,14 +384,20 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
orte_node_t *node = NULL, *n2;
appInfoHdr_t *apHdr; /* ALPS header structure */
appInfo_t *apInfo; /* ALPS table info structure */
cmdDetail_t *apDet; /* ALPS command details */
#if ALPS_APPINFO_VERSION==0
placeList_t *apSlots; /* ALPS node specific info */
#else
placeNodeList_t *apNodes;
#endif
bool added;
opal_list_item_t *item;
orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts);
oNow=0;
iTrips=0;
opal_output_verbose(1, orte_ras_base.ras_output,
"ras:alps:allocate: begin processing appinfo file");
while(!oNow) { /* Until appinfo read is complete */
iTrips++; /* Increment trip count */
@ -415,7 +431,7 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
/* This is where apstat fails; we will record it and try again. */
opal_output_verbose(1, orte_ras_base.ras_output,
"ras:alps:allocate: ALPS information read failure: %ld bytes", oNow);
"ras:alps:allocate: ALPS information read failure: %ld bytes", (long int)oNow);
free(cpBuf); /* Free (old) buffer */
close(iFd); /* Close (old) descriptor */
@ -429,12 +445,17 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
}
}
close(iFd);
opal_output_verbose(1, orte_ras_base.ras_output,
"ras:alps:allocate: file %s read", filename);
/* Now that we have the scheduler information, we just have to parse it for *
* the data that we seek. */
oNow=0;
apHdr=(appInfoHdr_t *)cpBuf;
opal_output_verbose(1, orte_ras_base.ras_output,
"ras:alps:allocate: %d entries in file", apHdr->apNum);
/* Header info (apHdr) tells us how many entries are in the file: *
* *
* apHdr->apNum */
@ -451,15 +472,13 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
/* Calculate the dependent offsets. */
oSlots=sizeof(cmdDetail_t)*apInfo->numCmds;
oEntry=sizeof(placeList_t)*apInfo->numPlaces;
/* Also, we can extract details of commands currently running on nodes: *
* *
* apDet[].fixedPerNode ... PEs per node *
* apDet[].nodeCnt ... number of nodes in use *
* apDet[].memory ... MB/PE memory limit *
* apDet[].cmd ... command being run */
apDet=(cmdDetail_t *)(cpBuf+oNow+oInfo+oDet);
opal_output_verbose(1, orte_ras_base.ras_output,
"ras:alps:allocate: read data for resId %u - myId %u",
apInfo->resId, *uMe);
#if ALPS_APPINFO_VERSION==0
/* Finally, we get to the actual node-specific information: *
* *
@ -467,11 +486,16 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
* apSlots[ix].nid ... NodeID (NID) *
* apSlots[ix].procMask ... mask for processors... need 16-bit shift */
apSlots=(placeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots);
oEntry=sizeof(placeList_t)*apInfo->numPlaces;
oNow+=(oDet+oSlots+oEntry); /* Target next slot */
if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */
/* in this early version of alps, there is one entry for each PE in the
* allocation - so cycle across the numPlaces entries, assigning a slot
* for each time a node is named
*/
for( ix=0; ix<apInfo->numPlaces; ix++ ) {
opal_output_verbose(5, orte_ras_base.ras_output,
@ -521,6 +545,56 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
sNodes++; /* Increment the node count */
}
}
#else
/* in newer versions of alps, there is one entry for each node in the
* allocation, and that struct directly carries the number of PEs
* allocated on that node to this job.
*/
apNodes=(placeNodeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots);
oEntry=sizeof(placeNodeList_t)*apInfo->numPlaces;
oNow+=(oDet+oSlots+oEntry); /* Target next entry */
if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */
for( ix=0; ix<apInfo->numPlaces; ix++ ) {
opal_output_verbose(5, orte_ras_base.ras_output,
"ras:alps:read_appinfo(modern): processing NID %d with %d slots",
apNodes[ix].nid, apNodes[ix].numPEs);
asprintf( &hostname, "%d", apNodes[ix].nid );
if (NULL == hostname) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
node = OBJ_NEW(orte_node_t);
node->name = hostname;
node->launch_id = apNodes[ix].nid;
node->slots_inuse = 0;
node->slots_max = 0;
node->slots = apNodes[ix].numPEs;
/* need to order these node ids so the regex generator
* can properly function
*/
added = false;
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
n2 = (orte_node_t*)item;
if (node->launch_id < n2->launch_id) {
/* insert the new node before this one */
opal_list_insert_pos(nodes, item, &node->super);
added = true;
break;
}
}
if (!added) {
/* add it to the end */
opal_list_append(nodes, &node->super);
}
sNodes++; /* Increment the node count */
}
#endif
break; /* Extended details ignored */
}
free(cpBuf); /* Free the buffer */