From 9b7ab92de9b81f637f4e1f90e1bc9ab4a9d7be2a Mon Sep 17 00:00:00 2001 From: Rainer Keller Date: Thu, 2 Apr 2009 17:46:08 +0000 Subject: [PATCH] - Per mail and diff from Ken Matney: Allow multiple retries to open file as well, for ALPS to supply the file. This commit was SVN r20932. --- orte/mca/ras/alps/ras_alps_module.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/orte/mca/ras/alps/ras_alps_module.c b/orte/mca/ras/alps/ras_alps_module.c index cf1cbd85c0..faf050d7f6 100644 --- a/orte/mca/ras/alps/ras_alps_module.c +++ b/orte/mca/ras/alps/ras_alps_module.c @@ -227,10 +227,16 @@ int orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, unsigned oNow=0; iTrips=0; while(!oNow) { /* Until appinfo read is complete */ + iTrips++; /* Increment trip count */ iFd=open( filename, O_RDONLY ); if( iFd==-1 ) { /* If file absent, ALPS is down */ + opal_output_verbose(1, orte_ras_base.ras_output, + "ras:alps:allocate: ALPS information open failure"); + usleep(iTrips*50000); /* Increasing delays, .05 s/try */ +/* Fail only when number of attempts have been exhausted. */ + if( iTrips <= max_appinfo_read_attempts ) continue; ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); return ORTE_ERR_FILE_OPEN_FAILURE; } @@ -242,7 +248,6 @@ int orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, unsigned szLen=ssBuf.st_size; /* Get buffer size */ cpBuf=malloc(szLen+1); /* Allocate buffer */ - iTrips++; /* Increment trip count */ /* Repeated attempts to read appinfo, with an increasing delay between * * successive attempts to allow scheduler I/O a chance to complete. */