ompi/dpm: improve scalability of ompi_dpm_mark_dyncomm
This commit removes the use of ompi_group_peer_lookup in the ompi_dpm_mark_dyncomm function. The function now uses ompi_group_get_proc_name which does not allocate an ompi_proc_t if one does not already exist. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
202c6a38e4
Коммит
ed005f2a61
@ -13,7 +13,7 @@
|
|||||||
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
|
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
|
||||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||||
@ -1293,6 +1293,22 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
|
|||||||
/**********************************************************************/
|
/**********************************************************************/
|
||||||
/**********************************************************************/
|
/**********************************************************************/
|
||||||
/**********************************************************************/
|
/**********************************************************************/
|
||||||
|
static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid)
|
||||||
|
{
|
||||||
|
int size = group ? ompi_group_size (group) : 0;
|
||||||
|
|
||||||
|
for (int i = 1 ; i < size ; ++i) {
|
||||||
|
opal_process_name_t name = ompi_group_get_proc_name (group, i);
|
||||||
|
|
||||||
|
if (thisjobid != ((ompi_process_name_t *) &name)->jobid) {
|
||||||
|
/* at least one is different */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* All we want to do in this function is determine if the number of
|
/* All we want to do in this function is determine if the number of
|
||||||
* jobids in the local and/or remote group is > 1. This tells us to
|
* jobids in the local and/or remote group is > 1. This tells us to
|
||||||
* set the disconnect flag. We don't actually care what the true
|
* set the disconnect flag. We don't actually care what the true
|
||||||
@ -1300,56 +1316,30 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
|
|||||||
*/
|
*/
|
||||||
void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm)
|
void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm)
|
||||||
{
|
{
|
||||||
int i;
|
bool found;
|
||||||
int size, rsize;
|
|
||||||
bool found=false;
|
|
||||||
ompi_jobid_t thisjobid;
|
ompi_jobid_t thisjobid;
|
||||||
ompi_group_t *grp=NULL;
|
|
||||||
ompi_proc_t *proc = NULL;
|
|
||||||
|
|
||||||
/* special case for MPI_COMM_NULL */
|
/* special case for MPI_COMM_NULL */
|
||||||
if (comm == MPI_COMM_NULL) {
|
if (comm == MPI_COMM_NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size = ompi_comm_size(comm);
|
thisjobid = ompi_group_get_proc_name (comm->c_local_group, 0).jobid;
|
||||||
rsize = ompi_comm_remote_size(comm);
|
|
||||||
|
|
||||||
/* loop over all processes in local group and check for
|
/* loop over all processes in local group and check for
|
||||||
* a different jobid
|
* a different jobid
|
||||||
*/
|
*/
|
||||||
grp = comm->c_local_group;
|
found = ompi_dpm_group_is_dyn (comm->c_local_group, thisjobid);
|
||||||
proc = ompi_group_peer_lookup(grp,0);
|
if (!found) {
|
||||||
thisjobid = ((ompi_process_name_t*)&proc->super.proc_name)->jobid;
|
/* if inter-comm, loop over all processes in remote_group
|
||||||
|
* and see if any are different from thisjobid
|
||||||
for (i=1; i< size; i++) {
|
*/
|
||||||
proc = ompi_group_peer_lookup(grp,i);
|
found = ompi_dpm_group_is_dyn (comm->c_remote_group, thisjobid);
|
||||||
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
|
|
||||||
/* at least one is different */
|
|
||||||
found = true;
|
|
||||||
goto complete;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if inter-comm, loop over all processes in remote_group
|
|
||||||
* and see if any are different from thisjobid
|
|
||||||
*/
|
|
||||||
grp = comm->c_remote_group;
|
|
||||||
for (i=0; i< rsize; i++) {
|
|
||||||
proc = ompi_group_peer_lookup(grp,i);
|
|
||||||
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
|
|
||||||
/* at least one is different */
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
complete:
|
|
||||||
/* if a different jobid was found, set the disconnect flag*/
|
/* if a different jobid was found, set the disconnect flag*/
|
||||||
if (found) {
|
if (found) {
|
||||||
ompi_comm_num_dyncomm++;
|
ompi_comm_num_dyncomm++;
|
||||||
OMPI_COMM_SET_DYNAMIC(comm);
|
OMPI_COMM_SET_DYNAMIC(comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user