Merge pull request #7652 from rhc54/topic/het
Cleanup heterogeneous builds
Этот коммит содержится в:
Коммит
91be01beb2
@ -102,7 +102,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
|
||||
opal_prrte_prefix_arg=
|
||||
fi
|
||||
|
||||
opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
|
||||
opal_prrte_args="--prefix=$prefix --with-proxy-version-string=$OPAL_VERSION --with-proxy-package-name=\"Open MPI\" --with-proxy-bugreport=\"https://www.open-mpi.org/community/help/\" $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
|
||||
AS_IF([test "$enable_debug" = "yes"],
|
||||
[opal_prrte_args="--enable-debug $opal_prrte_args"
|
||||
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
|
||||
|
@ -6,7 +6,7 @@ enable_mem_debug=yes
|
||||
enable_mem_profile=no
|
||||
enable_debug_symbols=yes
|
||||
enable_binaries=yes
|
||||
enable_heterogeneous=no
|
||||
enable_heterogeneous=yes
|
||||
enable_picky=yes
|
||||
enable_debug=yes
|
||||
enable_shared=yes
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
|
||||
* Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
||||
@ -346,15 +346,12 @@ ompi_mtl_psm2_component_close(void)
|
||||
static int
|
||||
get_local_rank(int *out_rank)
|
||||
{
|
||||
ompi_node_rank_t my_node_rank;
|
||||
|
||||
*out_rank = 0;
|
||||
|
||||
if (OMPI_NODE_RANK_INVALID == (my_node_rank =
|
||||
ompi_process_info.my_node_rank)) {
|
||||
if (OMPI_NODE_RANK_INVALID == ompi_process_info.my_node_rank) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
*out_rank = (int)my_node_rank;
|
||||
*out_rank = (int)ompi_process_info.my_node_rank;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -144,19 +144,24 @@ int ompi_proc_complete_init_single (ompi_proc_t *proc)
|
||||
{
|
||||
uint32_t *ui32ptr;
|
||||
int ret;
|
||||
ui32ptr = &(proc->super.proc_arch);
|
||||
OPAL_MODEX_RECV_VALUE(ret, PMIX_ARCH, &proc->super.proc_name,
|
||||
(void**)&ui32ptr, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
if (proc->super.proc_arch != opal_local_arch) {
|
||||
OBJ_RELEASE(proc->super.proc_convertor);
|
||||
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
|
||||
}
|
||||
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
||||
/* if the proc is local, then no need to fetch it */
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
} else {
|
||||
return ret;
|
||||
ui32ptr = &(proc->super.proc_arch);
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, "OMPI_ARCH", &proc->super.proc_name,
|
||||
(void**)&ui32ptr, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
if (proc->super.proc_arch != opal_local_arch) {
|
||||
OBJ_RELEASE(proc->super.proc_convertor);
|
||||
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
|
||||
}
|
||||
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
@ -258,7 +263,7 @@ int ompi_proc_init(void)
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
/* add our arch to the modex */
|
||||
OPAL_MODEX_SEND_VALUE(ret, PMIX_GLOBAL,
|
||||
PMIX_ARCH, &opal_local_arch, OPAL_UINT32);
|
||||
"OMPI_ARCH", &opal_local_arch, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
@ -62,30 +62,6 @@
|
||||
opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1};
|
||||
opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
|
||||
hwloc_cpuset_t ompi_proc_applied_binding = NULL;
|
||||
pmix_process_info_t pmix_process_info = {
|
||||
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
|
||||
.myprocid = {{0}, PMIX_RANK_INVALID},
|
||||
.nodename = NULL,
|
||||
.pid = 0,
|
||||
.top_session_dir = NULL,
|
||||
.job_session_dir = NULL,
|
||||
.proc_session_dir = NULL,
|
||||
.my_local_rank = 0,
|
||||
.my_node_rank = 0,
|
||||
.my_numa_rank = UINT16_MAX, /* Assume invalid NUMA rank, set to UINT16_MAX */
|
||||
.num_local_peers = 0,
|
||||
.num_procs = 0,
|
||||
.app_num = 0,
|
||||
.univ_size = 0,
|
||||
.app_sizes = NULL,
|
||||
.app_ldrs = NULL,
|
||||
.cpuset = NULL,
|
||||
.command = NULL,
|
||||
.num_apps = 0,
|
||||
.initial_wdir = NULL,
|
||||
.reincarnation = 0
|
||||
};
|
||||
bool pmix_proc_is_bound = false;
|
||||
bool ompi_singleton = false;
|
||||
|
||||
static int _setup_top_session_dir(char **sdir);
|
||||
@ -530,7 +506,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
|
||||
u32ptr = &u32;
|
||||
u16ptr = &u16;
|
||||
memset(&pmix_process_info, 0, sizeof(pmix_process_info));
|
||||
memset(&opal_process_info, 0, sizeof(opal_process_info));
|
||||
|
||||
/* Convince OPAL to use our naming scheme */
|
||||
opal_process_name_print = _process_name_print_for_opal;
|
||||
@ -552,7 +528,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
opal_pmix_setup_nspace_tracker();
|
||||
|
||||
/* initialize the selected module */
|
||||
if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&pmix_process_info.myprocid, NULL, 0)))) {
|
||||
if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0)))) {
|
||||
/* if we get PMIX_ERR_UNREACH indicating that we cannot reach the
|
||||
* server, then we assume we are operating as a singleton */
|
||||
if (PMIX_ERR_UNREACH == ret) {
|
||||
@ -567,14 +543,14 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
}
|
||||
|
||||
/* setup the process name fields - also registers the new nspace */
|
||||
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &pmix_process_info.myprocid);
|
||||
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &opal_process_info.myprocid);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
OPAL_PROC_MY_NAME.jobid = pname.jobid;
|
||||
OPAL_PROC_MY_NAME.vpid = pname.vpid;
|
||||
pmix_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid;
|
||||
pmix_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid;
|
||||
opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid;
|
||||
opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid;
|
||||
|
||||
/* set our hostname */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_HOSTNAME, &OPAL_PROC_MY_NAME,
|
||||
@ -585,11 +561,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
}
|
||||
opal_process_info.nodename = ev1; // ev1 is an allocated string
|
||||
}
|
||||
pmix_process_info.nodename = opal_process_info.nodename;
|
||||
opal_process_info.nodename = opal_process_info.nodename;
|
||||
|
||||
/* get our local rank from PMIx */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_RANK,
|
||||
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
if (ompi_singleton) {
|
||||
/* just assume 0 */
|
||||
@ -600,11 +576,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
pmix_process_info.my_local_rank = u16;
|
||||
opal_process_info.my_local_rank = u16;
|
||||
|
||||
/* get our node rank from PMIx */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NODE_RANK,
|
||||
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
if (ompi_singleton) {
|
||||
/* just assume 0 */
|
||||
@ -615,10 +591,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
pmix_process_info.my_node_rank = u16;
|
||||
opal_process_info.my_node_rank = u16;
|
||||
|
||||
/* get job size */
|
||||
pname.jobid = pmix_process_info.my_name.jobid;
|
||||
pname.jobid = opal_process_info.my_name.jobid;
|
||||
pname.vpid = OPAL_VPID_WILDCARD;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_JOB_SIZE,
|
||||
&pname, &u32ptr, PMIX_UINT32);
|
||||
@ -632,7 +608,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
pmix_process_info.num_procs = u32;
|
||||
opal_process_info.num_procs = u32;
|
||||
|
||||
/* get universe size */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_UNIV_SIZE,
|
||||
@ -643,49 +619,49 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
u32 = 1;
|
||||
} else {
|
||||
/* default to job size */
|
||||
u32 = pmix_process_info.num_procs;
|
||||
u32 = opal_process_info.num_procs;
|
||||
}
|
||||
}
|
||||
pmix_process_info.univ_size = u32;
|
||||
opal_process_info.univ_size = u32;
|
||||
|
||||
/* get number of app contexts */
|
||||
pname.jobid = pmix_process_info.my_name.jobid;
|
||||
pname.jobid = opal_process_info.my_name.jobid;
|
||||
pname.vpid = OPAL_VPID_WILDCARD;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_JOB_NUM_APPS,
|
||||
&pname, &u32ptr, PMIX_UINT32);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
pmix_process_info.num_apps = u32;
|
||||
opal_process_info.num_apps = u32;
|
||||
} else {
|
||||
pmix_process_info.num_apps = 1;
|
||||
opal_process_info.num_apps = 1;
|
||||
}
|
||||
|
||||
/* get our app number from PMIx - ok if not found */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_APPNUM,
|
||||
&pmix_process_info.my_name, &u32ptr, PMIX_UINT32);
|
||||
&opal_process_info.my_name, &u32ptr, PMIX_UINT32);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
pmix_process_info.app_num = u32;
|
||||
opal_process_info.app_num = u32;
|
||||
} else {
|
||||
pmix_process_info.app_num = 0;
|
||||
opal_process_info.app_num = 0;
|
||||
}
|
||||
|
||||
/* if more than one app context, get the number of procs and first rank of each */
|
||||
if (1 == pmix_process_info.num_apps) {
|
||||
pmix_process_info.app_ldrs = strdup("0");
|
||||
opal_asprintf(&pmix_process_info.app_sizes, "%u", pmix_process_info.num_procs);
|
||||
if (1 == opal_process_info.num_apps) {
|
||||
opal_process_info.app_ldrs = strdup("0");
|
||||
opal_asprintf(&opal_process_info.app_sizes, "%u", opal_process_info.num_procs);
|
||||
} else {
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_APP_SIZES", &pname, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
/* assume it is just us */
|
||||
opal_asprintf(&pmix_process_info.app_sizes, "%u", pmix_process_info.num_procs);
|
||||
opal_asprintf(&opal_process_info.app_sizes, "%u", opal_process_info.num_procs);
|
||||
} else {
|
||||
pmix_process_info.app_sizes = val;
|
||||
opal_process_info.app_sizes = val;
|
||||
}
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_FIRST_RANKS", &pname, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
/* assume it is just us */
|
||||
pmix_process_info.app_ldrs = strdup("0");
|
||||
opal_process_info.app_ldrs = strdup("0");
|
||||
} else {
|
||||
pmix_process_info.app_ldrs = val;
|
||||
opal_process_info.app_ldrs = val;
|
||||
}
|
||||
}
|
||||
|
||||
@ -693,11 +669,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_APP_ARGV,
|
||||
&pname, (char**)&ev1, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
pmix_process_info.command = ev1; // ev1 is an allocated string
|
||||
opal_process_info.command = ev1; // ev1 is an allocated string
|
||||
} else if (NULL != pargv) {
|
||||
tmp = *pargv;
|
||||
if (NULL != tmp) {
|
||||
pmix_process_info.command = opal_argv_join(tmp, ' ');
|
||||
opal_process_info.command = opal_argv_join(tmp, ' ');
|
||||
}
|
||||
}
|
||||
|
||||
@ -705,7 +681,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_REINCARNATION,
|
||||
&OPAL_PROC_MY_NAME, &u32ptr, PMIX_UINT32);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
pmix_process_info.reincarnation = u32;
|
||||
opal_process_info.reincarnation = u32;
|
||||
}
|
||||
|
||||
/* get the number of local peers - required for wireup of
|
||||
@ -713,7 +689,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_SIZE,
|
||||
&pname, &u32ptr, PMIX_UINT32);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
pmix_process_info.num_local_peers = u32 - 1; // want number besides ourselves
|
||||
opal_process_info.num_local_peers = u32 - 1; // want number besides ourselves
|
||||
} else {
|
||||
ret = opal_pmix_convert_status(rc);
|
||||
error = "local size";
|
||||
@ -723,10 +699,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
/* retrieve temp directories info */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_TMPDIR, &pname, &val, PMIX_STRING);
|
||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
||||
pmix_process_info.top_session_dir = val;
|
||||
opal_process_info.top_session_dir = val;
|
||||
} else {
|
||||
/* we need to create something */
|
||||
rc = _setup_top_session_dir(&pmix_process_info.top_session_dir);
|
||||
rc = _setup_top_session_dir(&opal_process_info.top_session_dir);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
error = "top session directory";
|
||||
goto error;
|
||||
@ -736,11 +712,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
/* retrieve job-session directory info */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NSDIR, &pname, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
pmix_process_info.job_session_dir = val;
|
||||
opal_process_info.job_session_dir = val;
|
||||
val = NULL;
|
||||
} else {
|
||||
/* we need to create something */
|
||||
rc = _setup_job_session_dir(&pmix_process_info.job_session_dir);
|
||||
rc = _setup_job_session_dir(&opal_process_info.job_session_dir);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
error = "job session directory";
|
||||
goto error;
|
||||
@ -750,10 +726,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
/* retrieve proc-session directory info */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_PROCDIR, &OPAL_PROC_MY_NAME, &val, PMIX_STRING);
|
||||
if (OPAL_SUCCESS == rc && NULL != val) {
|
||||
pmix_process_info.proc_session_dir = val;
|
||||
opal_process_info.proc_session_dir = val;
|
||||
} else {
|
||||
/* we need to create something */
|
||||
rc = _setup_proc_session_dir(&pmix_process_info.proc_session_dir);
|
||||
rc = _setup_proc_session_dir(&opal_process_info.proc_session_dir);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
error = "proc session directory";
|
||||
goto error;
|
||||
@ -764,26 +740,26 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
* for our app */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_WDIR, &pname, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
pmix_process_info.initial_wdir = val;
|
||||
opal_process_info.initial_wdir = val;
|
||||
val = NULL;
|
||||
}
|
||||
|
||||
/* identify our location */
|
||||
val = NULL;
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||
&pmix_process_info.my_name, &val, PMIX_STRING);
|
||||
&opal_process_info.my_name, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
pmix_process_info.cpuset = val;
|
||||
pmix_proc_is_bound = true;
|
||||
opal_process_info.cpuset = val;
|
||||
opal_process_info.proc_is_bound = true;
|
||||
} else {
|
||||
pmix_process_info.cpuset = NULL;
|
||||
pmix_proc_is_bound = false;
|
||||
opal_process_info.cpuset = NULL;
|
||||
opal_process_info.proc_is_bound = false;
|
||||
}
|
||||
|
||||
/* get our numa rank from PMIx */
|
||||
if (pmix_proc_is_bound) {
|
||||
if (opal_process_info.proc_is_bound) {
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NUMA_RANK,
|
||||
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
if (ompi_singleton) {
|
||||
/* just assume the numa_rank is invalid, set to UINT16_MAX */
|
||||
@ -794,18 +770,18 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
pmix_process_info.my_numa_rank = u16;
|
||||
opal_process_info.my_numa_rank = u16;
|
||||
} else {
|
||||
/* If processes are not bound, the numa_rank is not available
|
||||
* Assign UINT16_MAX to the numa_rank to indicate an invalid value
|
||||
*/
|
||||
pmix_process_info.my_numa_rank = UINT16_MAX;
|
||||
opal_process_info.my_numa_rank = UINT16_MAX;
|
||||
}
|
||||
|
||||
/* get our local peers */
|
||||
if (0 < pmix_process_info.num_local_peers) {
|
||||
if (0 < opal_process_info.num_local_peers) {
|
||||
/* if my local rank if too high, then that's an error */
|
||||
if (pmix_process_info.num_local_peers < pmix_process_info.my_local_rank) {
|
||||
if (opal_process_info.num_local_peers < opal_process_info.my_local_rank) {
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
error = "num local peers";
|
||||
goto error;
|
||||
@ -827,10 +803,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
|
||||
/* set the locality */
|
||||
if (NULL != peers) {
|
||||
pname.jobid = pmix_process_info.my_name.jobid;
|
||||
pname.jobid = opal_process_info.my_name.jobid;
|
||||
for (i=0; NULL != peers[i]; i++) {
|
||||
pname.vpid = strtoul(peers[i], NULL, 10);
|
||||
if (pname.vpid == pmix_process_info.my_name.vpid) {
|
||||
if (pname.vpid == opal_process_info.my_name.vpid) {
|
||||
/* we are fully local to ourselves */
|
||||
u16 = OPAL_PROC_ALL_LOCAL;
|
||||
} else {
|
||||
@ -838,7 +814,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
|
||||
&pname, &val, PMIX_STRING);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
u16 = opal_hwloc_compute_relative_locality(pmix_process_info.cpuset, val);
|
||||
u16 = opal_hwloc_compute_relative_locality(opal_process_info.cpuset, val);
|
||||
free(val);
|
||||
} else {
|
||||
/* all we can say is that it shares our node */
|
||||
@ -853,8 +829,8 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
ret = opal_pmix_convert_status(rc);
|
||||
error = "local store of locality";
|
||||
opal_argv_free(peers);
|
||||
if (NULL != pmix_process_info.cpuset) {
|
||||
free(pmix_process_info.cpuset);
|
||||
if (NULL != opal_process_info.cpuset) {
|
||||
free(opal_process_info.cpuset);
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
@ -868,7 +844,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
* as they wish.
|
||||
*/
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_STREAM_BUFFERING",
|
||||
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
if (0 == u16) {
|
||||
setvbuf(stdout, NULL, _IONBF, 0);
|
||||
@ -882,18 +858,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
}
|
||||
}
|
||||
|
||||
/* set the remaining opal_process_info fields. Note that
|
||||
* the OPAL layer will have initialized these to NULL, and
|
||||
* anyone between us would not have strdup'd the string, so
|
||||
* we cannot free it here */
|
||||
opal_process_info.top_session_dir = pmix_process_info.top_session_dir;
|
||||
opal_process_info.job_session_dir = pmix_process_info.job_session_dir;
|
||||
opal_process_info.proc_session_dir = pmix_process_info.proc_session_dir;
|
||||
opal_process_info.num_local_peers = (int32_t)pmix_process_info.num_local_peers;
|
||||
opal_process_info.my_local_rank = (int32_t)pmix_process_info.my_local_rank;
|
||||
opal_process_info.my_numa_rank = pmix_process_info.my_numa_rank;
|
||||
opal_process_info.cpuset = pmix_process_info.cpuset;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
|
||||
error:
|
||||
@ -936,46 +900,46 @@ int ompi_rte_finalize(void)
|
||||
PMIx_Finalize(NULL, 0);
|
||||
|
||||
/* cleanup the session directory we created */
|
||||
if (NULL != pmix_process_info.job_session_dir) {
|
||||
opal_os_dirpath_destroy(pmix_process_info.job_session_dir,
|
||||
if (NULL != opal_process_info.job_session_dir) {
|
||||
opal_os_dirpath_destroy(opal_process_info.job_session_dir,
|
||||
false, check_file);
|
||||
free(pmix_process_info.job_session_dir);
|
||||
pmix_process_info.job_session_dir = NULL;
|
||||
free(opal_process_info.job_session_dir);
|
||||
opal_process_info.job_session_dir = NULL;
|
||||
}
|
||||
|
||||
if (NULL != pmix_process_info.top_session_dir) {
|
||||
free(pmix_process_info.top_session_dir);
|
||||
pmix_process_info.top_session_dir = NULL;
|
||||
if (NULL != opal_process_info.top_session_dir) {
|
||||
free(opal_process_info.top_session_dir);
|
||||
opal_process_info.top_session_dir = NULL;
|
||||
}
|
||||
|
||||
if (NULL != pmix_process_info.proc_session_dir) {
|
||||
free(pmix_process_info.proc_session_dir);
|
||||
pmix_process_info.proc_session_dir = NULL;
|
||||
if (NULL != opal_process_info.proc_session_dir) {
|
||||
free(opal_process_info.proc_session_dir);
|
||||
opal_process_info.proc_session_dir = NULL;
|
||||
}
|
||||
|
||||
if (NULL != pmix_process_info.app_sizes) {
|
||||
free(pmix_process_info.app_sizes);
|
||||
pmix_process_info.app_sizes = NULL;
|
||||
if (NULL != opal_process_info.app_sizes) {
|
||||
free(opal_process_info.app_sizes);
|
||||
opal_process_info.app_sizes = NULL;
|
||||
}
|
||||
|
||||
if (NULL != pmix_process_info.app_ldrs) {
|
||||
free(pmix_process_info.app_ldrs);
|
||||
pmix_process_info.app_ldrs = NULL;
|
||||
if (NULL != opal_process_info.app_ldrs) {
|
||||
free(opal_process_info.app_ldrs);
|
||||
opal_process_info.app_ldrs = NULL;
|
||||
}
|
||||
|
||||
if (NULL != pmix_process_info.cpuset) {
|
||||
free(pmix_process_info.cpuset);
|
||||
pmix_process_info.cpuset = NULL;
|
||||
if (NULL != opal_process_info.cpuset) {
|
||||
free(opal_process_info.cpuset);
|
||||
opal_process_info.cpuset = NULL;
|
||||
}
|
||||
|
||||
if (NULL != pmix_process_info.command) {
|
||||
free(pmix_process_info.command);
|
||||
pmix_process_info.command = NULL;
|
||||
if (NULL != opal_process_info.command) {
|
||||
free(opal_process_info.command);
|
||||
opal_process_info.command = NULL;
|
||||
}
|
||||
|
||||
if (NULL != pmix_process_info.initial_wdir) {
|
||||
free(pmix_process_info.initial_wdir);
|
||||
pmix_process_info.initial_wdir = NULL;
|
||||
if (NULL != opal_process_info.initial_wdir) {
|
||||
free(opal_process_info.initial_wdir);
|
||||
opal_process_info.initial_wdir = NULL;
|
||||
}
|
||||
|
||||
/* cleanup our internal nspace hack */
|
||||
@ -1096,11 +1060,11 @@ static int _setup_job_session_dir(char **sdir)
|
||||
uid_t uid = geteuid();
|
||||
|
||||
if (0 > opal_asprintf(sdir, "%s/ompi.%s.%lu/jf.0/%u",
|
||||
pmix_process_info.top_session_dir,
|
||||
pmix_process_info.nodename,
|
||||
opal_process_info.top_session_dir,
|
||||
opal_process_info.nodename,
|
||||
(unsigned long)uid,
|
||||
pmix_process_info.my_name.jobid)) {
|
||||
pmix_process_info.job_session_dir = NULL;
|
||||
opal_process_info.my_name.jobid)) {
|
||||
opal_process_info.job_session_dir = NULL;
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -1110,9 +1074,9 @@ static int _setup_job_session_dir(char **sdir)
|
||||
static int _setup_proc_session_dir(char **sdir)
|
||||
{
|
||||
if (0 > opal_asprintf(sdir, "%s/%d",
|
||||
pmix_process_info.job_session_dir,
|
||||
pmix_process_info.my_name.vpid)) {
|
||||
pmix_process_info.proc_session_dir = NULL;
|
||||
opal_process_info.job_session_dir,
|
||||
opal_process_info.my_name.vpid)) {
|
||||
opal_process_info.proc_session_dir = NULL;
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
@ -190,11 +190,10 @@ typedef uint32_t ompi_jobid_t;
|
||||
typedef uint32_t ompi_vpid_t;
|
||||
|
||||
/* some local storage */
|
||||
OMPI_DECLSPEC extern opal_process_name_t pmix_name_wildcard;
|
||||
OMPI_DECLSPEC extern hwloc_cpuset_t ompi_proc_applied_binding;
|
||||
|
||||
#define OMPI_PROC_MY_NAME (&pmix_process_info.my_name)
|
||||
#define OMPI_NAME_WILDCARD (&pmix_name_wildcard)
|
||||
#define OMPI_PROC_MY_NAME (&opal_process_info.my_name)
|
||||
#define OMPI_NAME_WILDCARD (&opal_name_wildcard)
|
||||
|
||||
typedef uint8_t ompi_rte_cmp_bitmask_t;
|
||||
#define OMPI_RTE_CMP_NONE 0x00
|
||||
@ -240,40 +239,8 @@ static inline opal_process_name_t * OMPI_CAST_RTE_NAME(opal_process_name_t * nam
|
||||
#endif
|
||||
|
||||
/* Process info struct and values */
|
||||
typedef uint16_t ompi_node_rank_t;
|
||||
typedef uint16_t ompi_local_rank_t;
|
||||
#define OMPI_NODE_RANK_INVALID UINT16_MAX
|
||||
#define OMPI_LOCAL_RANK_INVALID UINT16_MAX
|
||||
|
||||
typedef struct {
|
||||
opal_process_name_t my_name;
|
||||
pmix_proc_t myprocid;
|
||||
char *nodename;
|
||||
pid_t pid;
|
||||
char *top_session_dir;
|
||||
char *job_session_dir;
|
||||
char *proc_session_dir;
|
||||
uint16_t my_local_rank;
|
||||
uint16_t my_node_rank;
|
||||
/* process rank on local NUMA node. Set to UINT16_MAX if NUMA rank is unavailable */
|
||||
uint16_t my_numa_rank;
|
||||
int32_t num_local_peers;
|
||||
uint32_t num_procs;
|
||||
uint32_t app_num;
|
||||
uint32_t univ_size;
|
||||
char *app_sizes;
|
||||
char *app_ldrs;
|
||||
char *cpuset;
|
||||
char *command;
|
||||
uint32_t num_apps;
|
||||
char *initial_wdir;
|
||||
uint32_t reincarnation;
|
||||
} pmix_process_info_t;
|
||||
OMPI_DECLSPEC extern pmix_process_info_t pmix_process_info;
|
||||
#define ompi_process_info pmix_process_info
|
||||
|
||||
OMPI_DECLSPEC extern bool pmix_proc_is_bound;
|
||||
#define ompi_rte_proc_is_bound pmix_proc_is_bound
|
||||
#define ompi_process_info opal_process_info
|
||||
#define ompi_rte_proc_is_bound opal_process_info.proc_is_bound
|
||||
|
||||
/* Error handling objects and operations */
|
||||
OMPI_DECLSPEC void __opal_attribute_noreturn__
|
||||
|
@ -17,6 +17,7 @@
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2020 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -434,11 +435,15 @@ typedef struct mca_btl_base_segment_t mca_btl_base_segment_t;
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN)
|
||||
#define MCA_BTL_BASE_SEGMENT_HTON(s) \
|
||||
do { \
|
||||
(s).seg_addr.lval = hton64((s).seg_addr.lval); \
|
||||
(s).seg_len = hton64((s).seg_len);
|
||||
(s).seg_len = hton64((s).seg_len); \
|
||||
} while(0)
|
||||
#define MCA_BTL_BASE_SEGMENT_NTOH(s) \
|
||||
do { \
|
||||
(s).seg_addr.lval = ntoh64((s).seg_addr.lval); \
|
||||
(s).seg_len = ntoh64((s).seg_len);
|
||||
(s).seg_len = ntoh64((s).seg_len); \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_BTL_BASE_SEGMENT_HTON(s)
|
||||
#define MCA_BTL_BASE_SEGMENT_NTOH(s)
|
||||
|
@ -368,7 +368,7 @@ static int vader_add_procs (struct mca_btl_base_module_t* btl,
|
||||
|
||||
/* setup endpoint */
|
||||
int rank = opal_atomic_fetch_add_32(&component -> local_rank, 1);
|
||||
|
||||
|
||||
peers[proc] = component->endpoints + rank;
|
||||
rc = init_vader_endpoint (peers[proc], procs[proc], rank);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
@ -426,7 +426,7 @@ static int vader_finalize(struct mca_btl_base_module_t *btl)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < 1 + MCA_BTL_VADER_NUM_LOCAL_PEERS ; ++i) {
|
||||
for (int i = 0 ; i < (int)(1 + MCA_BTL_VADER_NUM_LOCAL_PEERS) ; ++i) {
|
||||
fini_vader_endpoint (component->endpoints + i);
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "opal/mca/base/mca_base_framework.h"
|
||||
#include "opal/mca/pmix/pmix-internal.h"
|
||||
#include "opal/memoryhooks/memory.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include <ucm/api/ucm.h>
|
||||
|
||||
|
@ -28,10 +28,7 @@
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/mca/threads/threads.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/hash_string.h"
|
||||
|
||||
/* include implementation to call */
|
||||
@ -219,7 +216,6 @@ typedef struct {
|
||||
pmix_value_t _kv; \
|
||||
PMIX_VALUE_LOAD(&_kv, (d), (t)); \
|
||||
(r) = PMIx_Put((sc), (s), &(_kv)); \
|
||||
OPAL_ERROR_LOG((r)); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit e353951a67665a9a623fd3590365f493a1bdb8dd
|
||||
Subproject commit 8a47268db46c70ac48266e6efa792b3762c28d95
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2015-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2018-2020 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -36,6 +36,7 @@
|
||||
#include "opal/mca/timer/base/base.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/runtime/opal_params.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
|
||||
#define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
|
||||
#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2020 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -34,7 +35,6 @@ BEGIN_C_DECLS
|
||||
|
||||
#include "opal_config.h"
|
||||
#include "opal/mca/threads/mutex.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
|
||||
/**
|
||||
* Initialize the progress engine
|
||||
|
@ -30,6 +30,8 @@ opal_process_name_t opal_name_wildcard = {OPAL_JOBID_WILDCARD, OPAL_VPID_WILDCAR
|
||||
opal_process_name_t opal_name_invalid = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID};
|
||||
|
||||
opal_process_info_t opal_process_info = {
|
||||
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
|
||||
.myprocid = {{0}, PMIX_RANK_INVALID},
|
||||
.nativelaunch = false,
|
||||
.nodename = NULL,
|
||||
.top_session_dir = NULL,
|
||||
@ -37,8 +39,21 @@ opal_process_info_t opal_process_info = {
|
||||
.proc_session_dir = NULL,
|
||||
.num_local_peers = 0, /* there is nobody else but me */
|
||||
.my_local_rank = 0, /* I'm the only process around here */
|
||||
.my_node_rank = 0,
|
||||
.my_numa_rank = UINT16_MAX, /* Assume numa_rank is unavailable, set to UINT16_MAX */
|
||||
.cpuset = NULL,
|
||||
.pid = 0,
|
||||
.num_procs = 0,
|
||||
.app_num = 0,
|
||||
.univ_size = 0,
|
||||
.app_sizes = NULL,
|
||||
.app_ldrs = NULL,
|
||||
.cpuset = NULL,
|
||||
.command = NULL,
|
||||
.num_apps = 0,
|
||||
.initial_wdir = NULL,
|
||||
.reincarnation = 0,
|
||||
.proc_is_bound = false
|
||||
};
|
||||
|
||||
static opal_proc_t opal_local_proc = {
|
||||
|
@ -22,8 +22,9 @@
|
||||
#include "opal_config.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include "opal/mca/pmix/pmix-internal.h"
|
||||
#include "opal/types.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
#include <arpa/inet.h>
|
||||
@ -103,15 +104,29 @@ typedef struct {
|
||||
OBJ_CLASS_DECLARATION(opal_namelist_t);
|
||||
|
||||
typedef struct opal_process_info_t {
|
||||
opal_process_name_t my_name;
|
||||
pmix_proc_t myprocid;
|
||||
bool nativelaunch; /**< launched by mpirun */
|
||||
char *nodename; /**< string name for this node */
|
||||
char *top_session_dir; /**< Top-level session directory */
|
||||
char *job_session_dir; /**< Session directory for job */
|
||||
char *proc_session_dir; /**< Session directory for the process */
|
||||
int32_t num_local_peers; /**< number of procs from my job that share my node with me */
|
||||
int32_t my_local_rank; /**< local rank on this node within my job */
|
||||
int16_t my_numa_rank; /**< rank on this processes NUMA node. A value of UINT16_MAX indicates unavailable numa_rank */
|
||||
uint32_t num_local_peers; /**< number of procs from my job that share my node with me */
|
||||
uint16_t my_local_rank; /**< local rank on this node within my job */
|
||||
uint16_t my_node_rank;
|
||||
uint16_t my_numa_rank; /**< rank on this processes NUMA node. A value of UINT16_MAX indicates unavailable numa_rank */
|
||||
char *cpuset; /**< String-representation of bitmap where we are bound */
|
||||
pid_t pid;
|
||||
uint32_t num_procs;
|
||||
uint32_t app_num;
|
||||
uint32_t univ_size;
|
||||
char *app_sizes;
|
||||
char *app_ldrs;
|
||||
char *command;
|
||||
uint32_t num_apps;
|
||||
char *initial_wdir;
|
||||
uint32_t reincarnation;
|
||||
bool proc_is_bound;
|
||||
} opal_process_info_t;
|
||||
OPAL_DECLSPEC extern opal_process_info_t opal_process_info;
|
||||
|
||||
|
2
prrte
2
prrte
@ -1 +1 @@
|
||||
Subproject commit 743fb9c42e6037735011d6c16f0d42d116d49d90
|
||||
Subproject commit 4b84c2c8564b6c926bc747d5c99aa1fab0b61bcb
|
@ -15,6 +15,7 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2020 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,6 +27,7 @@
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user