From ed0f42fa494633c0787df9b9cc9451a97aef8e13 Mon Sep 17 00:00:00 2001
From: Ralph Castain <rhc@open-mpi.org>
Date: Fri, 2 Apr 2010 07:08:34 +0000
Subject: [PATCH] Fix a bug courtesy of Jeff - since check_job_complete removes
 the child object and releases it, preserve the pointer to the next item on
 the list prior to working with it

This commit was SVN r22924.
---
 orte/mca/odls/base/odls_base_default_fns.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c
index 9ff62cabbe..62781688a0 100644
--- a/orte/mca/odls/base/odls_base_default_fns.c
+++ b/orte/mca/odls/base/odls_base_default_fns.c
@@ -1212,7 +1212,7 @@ static int pack_state_for_proc(opal_buffer_t *alert, bool include_startup_info,
 static int pack_state_update(opal_buffer_t *alert, bool include_startup_info, orte_odls_job_t *jobdat)
 {
     int rc;
-    opal_list_item_t *item;
+    opal_list_item_t *item, *next;
     orte_odls_child_t *child;
     orte_vpid_t null=ORTE_VPID_INVALID;
     
@@ -1237,8 +1237,9 @@ static int pack_state_update(opal_buffer_t *alert, bool include_startup_info, or
     }
     for (item = opal_list_get_first(&orte_local_children);
          item != opal_list_get_end(&orte_local_children);
-         item = opal_list_get_next(item)) {
+         item = next) {
         child = (orte_odls_child_t*)item;
+        next = opal_list_get_next(item);
         /* if this child is part of the job... */
         if (child->name->jobid == jobdat->jobid) {
             if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, include_startup_info, child))) {
@@ -2603,7 +2604,6 @@ static void check_proc_complete(orte_odls_child_t *child)
                  item != opal_list_get_end(&orte_local_children);
                  item = next) {
                 child = (orte_odls_child_t*)item;
-                
                 next = opal_list_get_next(item);
                 
                 if (jdat->jobid == child->name->jobid) {
@@ -2902,7 +2902,7 @@ MOVEON:
 void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata)
 {
     orte_odls_child_t *child;
-    opal_list_item_t *item;
+    opal_list_item_t *item, *next;
     int rc;
     opal_buffer_t cmdbuf;
     orte_daemon_cmd_flag_t command;
@@ -2923,8 +2923,9 @@ void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata)
     /* find this child */
     for (item = opal_list_get_first(&orte_local_children);
          item != opal_list_get_end(&orte_local_children);
-         item = opal_list_get_next(item)) {
+         item = next) {
         child = (orte_odls_child_t*)item;
+        next = opal_list_get_next(item);
         
         if (pid == child->pid) { /* found it */
             /* this is an independent entry point from the event library. To avoid
@@ -2976,7 +2977,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
                                             orte_odls_base_child_died_fn_t child_died)
 {
     orte_odls_child_t *child;
-    opal_list_item_t *item;
+    opal_list_item_t *item, *next;
     int rc = ORTE_SUCCESS;
     opal_list_t procs_killed;
     orte_proc_t *proc, proctmp;
@@ -3020,8 +3021,9 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
         }
         for (item = opal_list_get_first(&orte_local_children);
              item != opal_list_get_end(&orte_local_children);
-             item = opal_list_get_next(item)) {
+             item = next) {
             child = (orte_odls_child_t*)item;
+            next = opal_list_get_next(item);
             
             OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
                                  "%s odls:kill_local_proc checking child process %s",
@@ -3167,7 +3169,7 @@ int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
 {
     int rc;
     orte_odls_child_t *child;
-    opal_list_item_t *item;
+    opal_list_item_t *item, *next;
     opal_pstats_t stats, *statsptr;
     int j;
     
@@ -3179,8 +3181,9 @@ int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
     /* find this child */
     for (item = opal_list_get_first(&orte_local_children);
          item != opal_list_get_end(&orte_local_children);
-         item = opal_list_get_next(item)) {
+         item = next) {
         child = (orte_odls_child_t*)item;
+        next = opal_list_get_next(item);
         
         if (proc->jobid == child->name->jobid &&
             (proc->vpid == child->name->vpid ||