Merge pull request #8199 from rhc54/topic/locality
Fix confusion between cpuset and locality
Этот коммит содержится в:
Коммит
d489030925
@ -355,7 +355,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
if (0 < opal_list_get_size(&ilist)) {
|
if (0 < opal_list_get_size(&ilist)) {
|
||||||
uint32_t *peer_ranks = NULL;
|
uint32_t *peer_ranks = NULL;
|
||||||
int prn, nprn = 0;
|
int prn, nprn = 0;
|
||||||
char *val, *mycpuset;
|
char *val;
|
||||||
uint16_t u16;
|
uint16_t u16;
|
||||||
opal_process_name_t wildcard_rank;
|
opal_process_name_t wildcard_rank;
|
||||||
/* convert the list of new procs to a proc_t array */
|
/* convert the list of new procs to a proc_t array */
|
||||||
@ -380,16 +380,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
opal_argv_free(peers);
|
opal_argv_free(peers);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get my locality string */
|
|
||||||
val = NULL;
|
|
||||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
|
||||||
OMPI_PROC_MY_NAME, &val, PMIX_STRING);
|
|
||||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
|
||||||
mycpuset = val;
|
|
||||||
} else {
|
|
||||||
mycpuset = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
|
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
|
||||||
proc = cd->p;
|
proc = cd->p;
|
||||||
@ -406,8 +396,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
val = NULL;
|
val = NULL;
|
||||||
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
|
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
|
||||||
&proc->super.proc_name, &val, OPAL_STRING);
|
&proc->super.proc_name, &val, OPAL_STRING);
|
||||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
|
||||||
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
|
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
|
||||||
free(val);
|
free(val);
|
||||||
} else {
|
} else {
|
||||||
/* all we can say is that it shares our node */
|
/* all we can say is that it shares our node */
|
||||||
@ -425,9 +415,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
}
|
}
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
if (NULL != mycpuset) {
|
|
||||||
free(mycpuset);
|
|
||||||
}
|
|
||||||
if (NULL != peer_ranks) {
|
if (NULL != peer_ranks) {
|
||||||
free(peer_ranks);
|
free(peer_ranks);
|
||||||
}
|
}
|
||||||
|
@ -764,7 +764,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
|||||||
|
|
||||||
/* identify our location */
|
/* identify our location */
|
||||||
val = NULL;
|
val = NULL;
|
||||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_CPUSET,
|
||||||
&opal_process_info.my_name, &val, PMIX_STRING);
|
&opal_process_info.my_name, &val, PMIX_STRING);
|
||||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||||
opal_process_info.cpuset = val;
|
opal_process_info.cpuset = val;
|
||||||
@ -774,6 +774,15 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
|||||||
opal_process_info.cpuset = NULL;
|
opal_process_info.cpuset = NULL;
|
||||||
opal_process_info.proc_is_bound = false;
|
opal_process_info.proc_is_bound = false;
|
||||||
}
|
}
|
||||||
|
val = NULL;
|
||||||
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||||
|
&opal_process_info.my_name, &val, PMIX_STRING);
|
||||||
|
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||||
|
opal_process_info.locality = val;
|
||||||
|
val = NULL; // protect the string
|
||||||
|
} else {
|
||||||
|
opal_process_info.locality = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* retrieve the local peers - defaults to local node */
|
/* retrieve the local peers - defaults to local node */
|
||||||
val = NULL;
|
val = NULL;
|
||||||
@ -811,7 +820,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
|||||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||||
&pname, &val, PMIX_STRING);
|
&pname, &val, PMIX_STRING);
|
||||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||||
u16 = opal_hwloc_compute_relative_locality(opal_process_info.cpuset, val);
|
u16 = opal_hwloc_compute_relative_locality(opal_process_info.locality, val);
|
||||||
free(val);
|
free(val);
|
||||||
} else {
|
} else {
|
||||||
/* all we can say is that it shares our node */
|
/* all we can say is that it shares our node */
|
||||||
@ -826,9 +835,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
|||||||
ret = opal_pmix_convert_status(rc);
|
ret = opal_pmix_convert_status(rc);
|
||||||
error = "local store of locality";
|
error = "local store of locality";
|
||||||
opal_argv_free(peers);
|
opal_argv_free(peers);
|
||||||
if (NULL != opal_process_info.cpuset) {
|
|
||||||
free(opal_process_info.cpuset);
|
|
||||||
}
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2020 Triad National Security, LLC. All rights
|
* Copyright (c) 2020 Triad National Security, LLC. All rights
|
||||||
@ -345,7 +345,7 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// compute relative locality
|
// compute relative locality
|
||||||
relative_locality = opal_hwloc_compute_relative_locality(process_info->cpuset, locality_string);
|
relative_locality = opal_hwloc_compute_relative_locality(process_info->locality, locality_string);
|
||||||
free(locality_string);
|
free(locality_string);
|
||||||
|
|
||||||
if (relative_locality & OPAL_PROC_ON_SOCKET) {
|
if (relative_locality & OPAL_PROC_ON_SOCKET) {
|
||||||
|
@ -41,6 +41,7 @@ opal_process_info_t opal_process_info = {
|
|||||||
.my_local_rank = 0, /* I'm the only process around here */
|
.my_local_rank = 0, /* I'm the only process around here */
|
||||||
.my_node_rank = 0,
|
.my_node_rank = 0,
|
||||||
.cpuset = NULL,
|
.cpuset = NULL,
|
||||||
|
.locality = NULL,
|
||||||
.pid = 0,
|
.pid = 0,
|
||||||
.num_procs = 0,
|
.num_procs = 0,
|
||||||
.app_num = 0,
|
.app_num = 0,
|
||||||
|
@ -115,6 +115,7 @@ typedef struct opal_process_info_t {
|
|||||||
uint16_t my_local_rank; /**< local rank on this node within my job */
|
uint16_t my_local_rank; /**< local rank on this node within my job */
|
||||||
uint16_t my_node_rank;
|
uint16_t my_node_rank;
|
||||||
char *cpuset; /**< String-representation of bitmap where we are bound */
|
char *cpuset; /**< String-representation of bitmap where we are bound */
|
||||||
|
char *locality; /**< String-representation of process locality */
|
||||||
pid_t pid;
|
pid_t pid;
|
||||||
uint32_t num_procs;
|
uint32_t num_procs;
|
||||||
uint32_t app_num;
|
uint32_t app_num;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user