1
1
Ralph Castain 9902748108 ***** THIS INCLUDES A SMALL CHANGE IN THE MPI-RTE INTERFACE *****
Fix two problems that surfaced when using direct launch under SLURM:

1. locally store our own data because some BTLs want to retrieve 
   it during add_procs rather than use what they have internally

2. cleanup MPI_Abort so it correctly passes the error status all
   the way down to the actual exit. When someone implemented the
   "abort_peers" API, they left out the error status. So we lost
   it at that point and *always* exited with a status of 1. This 
   forces a change to the API to include the status.

cmr:v1.7.3:reviewer=jsquyres:subject=Fix MPI_Abort and modex_recv for direct launch

This commit was SVN r29405.
2013-10-08 18:37:59 +00:00

57 строки
1.2 KiB
C

/* -*- C -*-
*
* $HEADER$
*
* A program that just spins, with vpid 3 aborting - provides mechanism for testing
* abnormal program termination
*/
#include <stdio.h>
#include <unistd.h>
#include "orte/runtime/runtime.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
int main(int argc, char* argv[])
{
int i, rc;
double pi;
pid_t pid;
char hostname[500];
if (0 > (rc = orte_init(&argc, &argv, ORTE_PROC_NON_MPI))) {
fprintf(stderr, "orte_abort: couldn't init orte - error code %d\n", rc);
return rc;
}
pid = getpid();
gethostname(hostname, 500);
if (1 < argc) {
rc = strtol(argv[1], NULL, 10);
} else {
rc = 3;
}
printf("orte_abort: Name %s Host: %s Pid %ld\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
hostname, (long)pid);
fflush(stdout);
i = 0;
while (1) {
i++;
pi = i / 3.14159256;
if (i > 10000) i = 0;
if ((ORTE_PROC_MY_NAME->vpid == 3 ||
(orte_process_info.num_procs <= 3 && ORTE_PROC_MY_NAME->vpid == 0))
&& i == 9995) {
orte_errmgr.abort(rc, NULL);
}
}
return 0;
}