Set the locality for remote procs even after a comm_spawn. Ensure we store our own local cpuset upon launch so it will be shared during comm_join.
This provides full locality - i.e., not just node-level, but all the way down to whatever common binding level exists between the procs. cmr=v1.7.5:reviewer=jsquyres This commit was SVN r31106.
Этот коммит содержится в:
родитель
5efd961149
Коммит
554da83865
@ -548,6 +548,11 @@ static int connect_accept(ompi_communicator_t *comm, int root,
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
|
||||
"%s dpm:orte:connect_accept new procs added",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* set the locality of the new procs */
|
||||
for (j=0; j < new_proc_len; j++) {
|
||||
ompi_proc_set_locality(new_proc_list[j]);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_RELEASE(nrbuf);
|
||||
|
@ -127,7 +127,7 @@ int ompi_proc_init(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_proc_set_locality(ompi_proc_t *proc)
|
||||
int ompi_proc_set_locality(ompi_proc_t *proc)
|
||||
{
|
||||
opal_hwloc_locality_t *hwlocale, locality;
|
||||
ompi_vpid_t vpid, *vptr;
|
||||
|
@ -315,6 +315,12 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf,
|
||||
*/
|
||||
OMPI_DECLSPEC int ompi_proc_refresh(void);
|
||||
|
||||
|
||||
/**
|
||||
* Set the locality of a proc relative to me
|
||||
*/
|
||||
OMPI_DECLSPEC int ompi_proc_set_locality(ompi_proc_t *proc);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OMPI_PROC_PROC_H */
|
||||
|
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -29,6 +30,7 @@
|
||||
#include <errno.h>
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/db/db.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -288,6 +290,15 @@ int orte_ess_base_proc_binding(void)
|
||||
}
|
||||
}
|
||||
hwloc_bitmap_free(mycpus);
|
||||
/* store our cpuset for exchange with non-peers
|
||||
* so that other procs in a comm_spawn can know it
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = opal_db.store((opal_identifier_t*)ORTE_PROC_MY_NAME,
|
||||
OPAL_SCOPE_NON_PEER,
|
||||
OPAL_DB_CPUSET, orte_process_info.cpuset, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user