From ccc8aa5784f69632aa6f56c58241ff088884d091 Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Tue, 31 Mar 2009 15:41:55 +0000 Subject: [PATCH] Fix a segfault caused by making copies of the pointer to an array that is realloced meanwhile. The base pointer can change its address while the copy still tries to access pages that are not ours anymore. As a safeguard, good coding style should never access directly opal_pointer_array_t->addr or opal_value_array_t->bytes_array. I found another instance of the same bug somewhere else and will commit a separate patch for it. This commit fixes ticket #1858 and solves user case http://www.open-mpi.org/community/lists/devel/2009/03/5731.php . Aurelien This commit was SVN r20903. --- orte/mca/grpcomm/base/grpcomm_base_modex.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/orte/mca/grpcomm/base/grpcomm_base_modex.c b/orte/mca/grpcomm/base/grpcomm_base_modex.c index a3dae08d86..3260403c15 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_modex.c +++ b/orte/mca/grpcomm/base/grpcomm_base_modex.c @@ -56,7 +56,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db) int rc=ORTE_SUCCESS; int32_t arch; bool modex_reqd; - orte_nid_t *nid, **nids; + orte_nid_t *nid; orte_local_rank_t local_rank; orte_node_rank_t node_rank; orte_jmap_t *jmap; @@ -130,8 +130,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db) "%s grpcomm:base:full:modex: processing modex info", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* process the results */ - nids = (orte_nid_t**)orte_nidmap.addr; + /* extract the number of procs that put data in the buffer */ cnt=1; @@ -197,10 +196,8 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db) /* UPDATE THE NIDMAP/PIDMAP TO SUPPORT DYNAMIC OPERATIONS */ /* find this proc's node in the nidmap */ - nid = NULL; - for (n=0; n < orte_nidmap.size && NULL != nids[n]; n++) { - if (0 == strcmp(hostname, nids[n]->name)) { - nid = nids[n]; + for (n=0; NULL != (nid = opal_pointer_array_get_item(&orte_nidmap, n)); n++) { + if (0 == strcmp(hostname, nid->name)) { /* update the arch in case it differs * from what was reported by the daemon */