Merge pull request #8199 from rhc54/topic/locality
Fix confusion between cpuset and locality
Этот коммит содержится в:
Коммит
d489030925
@ -355,7 +355,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
if (0 < opal_list_get_size(&ilist)) {
|
||||
uint32_t *peer_ranks = NULL;
|
||||
int prn, nprn = 0;
|
||||
char *val, *mycpuset;
|
||||
char *val;
|
||||
uint16_t u16;
|
||||
opal_process_name_t wildcard_rank;
|
||||
/* convert the list of new procs to a proc_t array */
|
||||
@ -380,16 +380,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
opal_argv_free(peers);
|
||||
}
|
||||
|
||||
/* get my locality string */
|
||||
val = NULL;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||
OMPI_PROC_MY_NAME, &val, PMIX_STRING);
|
||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
||||
mycpuset = val;
|
||||
} else {
|
||||
mycpuset = NULL;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
|
||||
proc = cd->p;
|
||||
@ -406,8 +396,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
val = NULL;
|
||||
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
|
||||
&proc->super.proc_name, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
||||
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
|
||||
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
|
||||
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
|
||||
free(val);
|
||||
} else {
|
||||
/* all we can say is that it shares our node */
|
||||
@ -425,9 +415,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (NULL != mycpuset) {
|
||||
free(mycpuset);
|
||||
}
|
||||
if (NULL != peer_ranks) {
|
||||
free(peer_ranks);
|
||||
}
|
||||
|
@ -764,7 +764,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
|
||||
/* identify our location */
|
||||
val = NULL;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_CPUSET,
|
||||
&opal_process_info.my_name, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
opal_process_info.cpuset = val;
|
||||
@ -774,6 +774,15 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
opal_process_info.cpuset = NULL;
|
||||
opal_process_info.proc_is_bound = false;
|
||||
}
|
||||
val = NULL;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||
&opal_process_info.my_name, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
opal_process_info.locality = val;
|
||||
val = NULL; // protect the string
|
||||
} else {
|
||||
opal_process_info.locality = NULL;
|
||||
}
|
||||
|
||||
/* retrieve the local peers - defaults to local node */
|
||||
val = NULL;
|
||||
@ -811,7 +820,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||
&pname, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
u16 = opal_hwloc_compute_relative_locality(opal_process_info.cpuset, val);
|
||||
u16 = opal_hwloc_compute_relative_locality(opal_process_info.locality, val);
|
||||
free(val);
|
||||
} else {
|
||||
/* all we can say is that it shares our node */
|
||||
@ -826,9 +835,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
ret = opal_pmix_convert_status(rc);
|
||||
error = "local store of locality";
|
||||
opal_argv_free(peers);
|
||||
if (NULL != opal_process_info.cpuset) {
|
||||
free(opal_process_info.cpuset);
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2020 Triad National Security, LLC. All rights
|
||||
@ -345,7 +345,7 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
|
||||
}
|
||||
|
||||
// compute relative locality
|
||||
relative_locality = opal_hwloc_compute_relative_locality(process_info->cpuset, locality_string);
|
||||
relative_locality = opal_hwloc_compute_relative_locality(process_info->locality, locality_string);
|
||||
free(locality_string);
|
||||
|
||||
if (relative_locality & OPAL_PROC_ON_SOCKET) {
|
||||
|
@ -41,6 +41,7 @@ opal_process_info_t opal_process_info = {
|
||||
.my_local_rank = 0, /* I'm the only process around here */
|
||||
.my_node_rank = 0,
|
||||
.cpuset = NULL,
|
||||
.locality = NULL,
|
||||
.pid = 0,
|
||||
.num_procs = 0,
|
||||
.app_num = 0,
|
||||
|
@ -115,6 +115,7 @@ typedef struct opal_process_info_t {
|
||||
uint16_t my_local_rank; /**< local rank on this node within my job */
|
||||
uint16_t my_node_rank;
|
||||
char *cpuset; /**< String-representation of bitmap where we are bound */
|
||||
char *locality; /**< String-representation of process locality */
|
||||
pid_t pid;
|
||||
uint32_t num_procs;
|
||||
uint32_t app_num;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user