1
1

If the RTE fails to deliver the daemon information,

gracefully fallback to a non-reordered communicator.
Optimize the loops building the process hierarchy.
Этот коммит содержится в:
George Bosilca 2016-08-11 13:04:27 -04:00
родитель 23886754f0
Коммит 8d0baf140f

Просмотреть файл

@ -44,9 +44,9 @@
while(0); while(0);
#define FALLBACK() \ #define FALLBACK() \
do { free(nodes_roots); \ do { free(nodes_roots); \
free(local_procs); \ free(local_procs); \
hwloc_bitmap_free(set); \ if( NULL != set) hwloc_bitmap_free(set); \
goto fallback; } \ goto fallback; } \
while(0); while(0);
@ -181,19 +181,16 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
num_procs_in_node++; num_procs_in_node++;
} }
/* Get the ranks of the local procs in comm_old */ vpids = (int *)malloc(size * sizeof(int));
colors = (int *)malloc(size * sizeof(int));
local_procs = (int *)malloc(num_procs_in_node * sizeof(int)); local_procs = (int *)malloc(num_procs_in_node * sizeof(int));
for(i = idx = 0 ; i < size ; i++){ for(i = idx = 0 ; i < size ; i++){
proc = ompi_group_peer_lookup(comm_old->c_local_group, i); proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
if (( i == rank ) || if (( i == rank ) ||
(OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags))) (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags))) {
local_procs[idx++] = i; local_procs[idx++] = i;
} }
vpids = (int *)malloc(size * sizeof(int));
colors = (int *)malloc(size * sizeof(int));
for(i = 0; i < size ; i++) {
proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
pval = &val; pval = &val;
OPAL_MODEX_RECV_VALUE(err, OPAL_PMIX_NODEID, &(proc->super.proc_name), &pval, OPAL_UINT32); OPAL_MODEX_RECV_VALUE(err, OPAL_PMIX_NODEID, &(proc->super.proc_name), &pval, OPAL_UINT32);
if( OPAL_SUCCESS != err ) { if( OPAL_SUCCESS != err ) {
@ -220,22 +217,30 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
} }
#endif #endif
/* clean-up dupes in the array */ /* clean-up dupes in the array */
for(i = 0; i < size ; i++) for(i = 0; i < size; i++) {
if ( -1 == vpids[i] ) if( -1 == vpids[i] )
continue; continue;
else
for(j = i+1 ; j < size ; j++) num_nodes++; /* update the number of nodes */
if( vpids[j] != -1 )
if( vpids[i] == vpids[j] ) for(j = i+1; j < size; j++)
vpids[j] = -1; if( vpids[j] != -1 )
/* compute number of nodes */ if( vpids[i] == vpids[j] )
for(i = 0; i < size ; i++) vpids[j] = -1;
if( vpids[i] != -1 ) }
num_nodes++; if( 0 == num_nodes ) {
/* No useful info has been retrieved from the runtime. Fallback
* and create a duplicate of the original communicator */
free(vpids);
free(colors);
free(local_procs);
err = OMPI_SUCCESS; /* return with success */
goto fallback;
}
/* compute local roots ranks in comm_old */ /* compute local roots ranks in comm_old */
/* Only the global root needs to do this */ /* Only the global root needs to do this */
if(0 == rank) { if(0 == rank) {
nodes_roots = (int *)calloc(num_nodes,sizeof(int)); nodes_roots = (int *)calloc(num_nodes, sizeof(int));
for(i = idx = 0; i < size ; i++) for(i = idx = 0; i < size ; i++)
if( vpids[i] != -1 ) if( vpids[i] != -1 )
nodes_roots[idx++] = i; nodes_roots[idx++] = i;