1
1

Merge pull request #7652 from rhc54/topic/het

Cleanup heterogeneous builds
Этот коммит содержится в:
Ralph Castain 2020-04-22 16:20:06 -07:00 коммит произвёл GitHub
родитель 53154756e1 60c650e79b
Коммит 91be01beb2
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
17 изменённых файлов: 164 добавлений и 196 удалений

Просмотреть файл

@ -102,7 +102,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
opal_prrte_prefix_arg=
fi
opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
opal_prrte_args="--prefix=$prefix --with-proxy-version-string=$OPAL_VERSION --with-proxy-package-name=\"Open MPI\" --with-proxy-bugreport=\"https://www.open-mpi.org/community/help/\" $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
AS_IF([test "$enable_debug" = "yes"],
[opal_prrte_args="--enable-debug $opal_prrte_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],

Просмотреть файл

@ -6,7 +6,7 @@ enable_mem_debug=yes
enable_mem_profile=no
enable_debug_symbols=yes
enable_binaries=yes
enable_heterogeneous=no
enable_heterogeneous=yes
enable_picky=yes
enable_debug=yes
enable_shared=yes

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
* Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
@ -346,15 +346,12 @@ ompi_mtl_psm2_component_close(void)
static int
get_local_rank(int *out_rank)
{
ompi_node_rank_t my_node_rank;
*out_rank = 0;
if (OMPI_NODE_RANK_INVALID == (my_node_rank =
ompi_process_info.my_node_rank)) {
if (OMPI_NODE_RANK_INVALID == ompi_process_info.my_node_rank) {
return OMPI_ERROR;
}
*out_rank = (int)my_node_rank;
*out_rank = (int)ompi_process_info.my_node_rank;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -144,19 +144,24 @@ int ompi_proc_complete_init_single (ompi_proc_t *proc)
{
uint32_t *ui32ptr;
int ret;
ui32ptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE(ret, PMIX_ARCH, &proc->super.proc_name,
(void**)&ui32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
/* if the proc is local, then no need to fetch it */
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
proc->super.proc_arch = opal_local_arch;
} else {
return ret;
ui32ptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, "OMPI_ARCH", &proc->super.proc_name,
(void**)&ui32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
proc->super.proc_arch = opal_local_arch;
} else {
return ret;
}
}
}
#else
@ -258,7 +263,7 @@ int ompi_proc_init(void)
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* add our arch to the modex */
OPAL_MODEX_SEND_VALUE(ret, PMIX_GLOBAL,
PMIX_ARCH, &opal_local_arch, OPAL_UINT32);
"OMPI_ARCH", &opal_local_arch, OPAL_UINT32);
if (OPAL_SUCCESS != ret) {
return ret;
}

Просмотреть файл

@ -62,30 +62,6 @@
opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1};
opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
hwloc_cpuset_t ompi_proc_applied_binding = NULL;
pmix_process_info_t pmix_process_info = {
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
.myprocid = {{0}, PMIX_RANK_INVALID},
.nodename = NULL,
.pid = 0,
.top_session_dir = NULL,
.job_session_dir = NULL,
.proc_session_dir = NULL,
.my_local_rank = 0,
.my_node_rank = 0,
.my_numa_rank = UINT16_MAX, /* Assume invalid NUMA rank, set to UINT16_MAX */
.num_local_peers = 0,
.num_procs = 0,
.app_num = 0,
.univ_size = 0,
.app_sizes = NULL,
.app_ldrs = NULL,
.cpuset = NULL,
.command = NULL,
.num_apps = 0,
.initial_wdir = NULL,
.reincarnation = 0
};
bool pmix_proc_is_bound = false;
bool ompi_singleton = false;
static int _setup_top_session_dir(char **sdir);
@ -530,7 +506,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
u32ptr = &u32;
u16ptr = &u16;
memset(&pmix_process_info, 0, sizeof(pmix_process_info));
memset(&opal_process_info, 0, sizeof(opal_process_info));
/* Convince OPAL to use our naming scheme */
opal_process_name_print = _process_name_print_for_opal;
@ -552,7 +528,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
opal_pmix_setup_nspace_tracker();
/* initialize the selected module */
if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&pmix_process_info.myprocid, NULL, 0)))) {
if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0)))) {
/* if we get PMIX_ERR_UNREACH indicating that we cannot reach the
* server, then we assume we are operating as a singleton */
if (PMIX_ERR_UNREACH == ret) {
@ -567,14 +543,14 @@ int ompi_rte_init(int *pargc, char ***pargv)
}
/* setup the process name fields - also registers the new nspace */
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &pmix_process_info.myprocid);
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &opal_process_info.myprocid);
if (OPAL_SUCCESS != rc) {
return rc;
}
OPAL_PROC_MY_NAME.jobid = pname.jobid;
OPAL_PROC_MY_NAME.vpid = pname.vpid;
pmix_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid;
pmix_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid;
opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid;
opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid;
/* set our hostname */
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_HOSTNAME, &OPAL_PROC_MY_NAME,
@ -585,11 +561,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
}
opal_process_info.nodename = ev1; // ev1 is an allocated string
}
pmix_process_info.nodename = opal_process_info.nodename;
opal_process_info.nodename = opal_process_info.nodename;
/* get our local rank from PMIx */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_RANK,
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
if (PMIX_SUCCESS != rc) {
if (ompi_singleton) {
/* just assume 0 */
@ -600,11 +576,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
goto error;
}
}
pmix_process_info.my_local_rank = u16;
opal_process_info.my_local_rank = u16;
/* get our node rank from PMIx */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NODE_RANK,
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
if (PMIX_SUCCESS != rc) {
if (ompi_singleton) {
/* just assume 0 */
@ -615,10 +591,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
goto error;
}
}
pmix_process_info.my_node_rank = u16;
opal_process_info.my_node_rank = u16;
/* get job size */
pname.jobid = pmix_process_info.my_name.jobid;
pname.jobid = opal_process_info.my_name.jobid;
pname.vpid = OPAL_VPID_WILDCARD;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_JOB_SIZE,
&pname, &u32ptr, PMIX_UINT32);
@ -632,7 +608,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
goto error;
}
}
pmix_process_info.num_procs = u32;
opal_process_info.num_procs = u32;
/* get universe size */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_UNIV_SIZE,
@ -643,49 +619,49 @@ int ompi_rte_init(int *pargc, char ***pargv)
u32 = 1;
} else {
/* default to job size */
u32 = pmix_process_info.num_procs;
u32 = opal_process_info.num_procs;
}
}
pmix_process_info.univ_size = u32;
opal_process_info.univ_size = u32;
/* get number of app contexts */
pname.jobid = pmix_process_info.my_name.jobid;
pname.jobid = opal_process_info.my_name.jobid;
pname.vpid = OPAL_VPID_WILDCARD;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_JOB_NUM_APPS,
&pname, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
pmix_process_info.num_apps = u32;
opal_process_info.num_apps = u32;
} else {
pmix_process_info.num_apps = 1;
opal_process_info.num_apps = 1;
}
/* get our app number from PMIx - ok if not found */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_APPNUM,
&pmix_process_info.my_name, &u32ptr, PMIX_UINT32);
&opal_process_info.my_name, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
pmix_process_info.app_num = u32;
opal_process_info.app_num = u32;
} else {
pmix_process_info.app_num = 0;
opal_process_info.app_num = 0;
}
/* if more than one app context, get the number of procs and first rank of each */
if (1 == pmix_process_info.num_apps) {
pmix_process_info.app_ldrs = strdup("0");
opal_asprintf(&pmix_process_info.app_sizes, "%u", pmix_process_info.num_procs);
if (1 == opal_process_info.num_apps) {
opal_process_info.app_ldrs = strdup("0");
opal_asprintf(&opal_process_info.app_sizes, "%u", opal_process_info.num_procs);
} else {
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_APP_SIZES", &pname, &val, PMIX_STRING);
if (PMIX_SUCCESS != rc) {
/* assume it is just us */
opal_asprintf(&pmix_process_info.app_sizes, "%u", pmix_process_info.num_procs);
opal_asprintf(&opal_process_info.app_sizes, "%u", opal_process_info.num_procs);
} else {
pmix_process_info.app_sizes = val;
opal_process_info.app_sizes = val;
}
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_FIRST_RANKS", &pname, &val, PMIX_STRING);
if (PMIX_SUCCESS != rc) {
/* assume it is just us */
pmix_process_info.app_ldrs = strdup("0");
opal_process_info.app_ldrs = strdup("0");
} else {
pmix_process_info.app_ldrs = val;
opal_process_info.app_ldrs = val;
}
}
@ -693,11 +669,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_APP_ARGV,
&pname, (char**)&ev1, PMIX_STRING);
if (PMIX_SUCCESS == rc) {
pmix_process_info.command = ev1; // ev1 is an allocated string
opal_process_info.command = ev1; // ev1 is an allocated string
} else if (NULL != pargv) {
tmp = *pargv;
if (NULL != tmp) {
pmix_process_info.command = opal_argv_join(tmp, ' ');
opal_process_info.command = opal_argv_join(tmp, ' ');
}
}
@ -705,7 +681,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_REINCARNATION,
&OPAL_PROC_MY_NAME, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
pmix_process_info.reincarnation = u32;
opal_process_info.reincarnation = u32;
}
/* get the number of local peers - required for wireup of
@ -713,7 +689,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_SIZE,
&pname, &u32ptr, PMIX_UINT32);
if (PMIX_SUCCESS == rc) {
pmix_process_info.num_local_peers = u32 - 1; // want number besides ourselves
opal_process_info.num_local_peers = u32 - 1; // want number besides ourselves
} else {
ret = opal_pmix_convert_status(rc);
error = "local size";
@ -723,10 +699,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
/* retrieve temp directories info */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_TMPDIR, &pname, &val, PMIX_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
pmix_process_info.top_session_dir = val;
opal_process_info.top_session_dir = val;
} else {
/* we need to create something */
rc = _setup_top_session_dir(&pmix_process_info.top_session_dir);
rc = _setup_top_session_dir(&opal_process_info.top_session_dir);
if (OPAL_SUCCESS != rc) {
error = "top session directory";
goto error;
@ -736,11 +712,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
/* retrieve job-session directory info */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NSDIR, &pname, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
pmix_process_info.job_session_dir = val;
opal_process_info.job_session_dir = val;
val = NULL;
} else {
/* we need to create something */
rc = _setup_job_session_dir(&pmix_process_info.job_session_dir);
rc = _setup_job_session_dir(&opal_process_info.job_session_dir);
if (OPAL_SUCCESS != rc) {
error = "job session directory";
goto error;
@ -750,10 +726,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
/* retrieve proc-session directory info */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_PROCDIR, &OPAL_PROC_MY_NAME, &val, PMIX_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
pmix_process_info.proc_session_dir = val;
opal_process_info.proc_session_dir = val;
} else {
/* we need to create something */
rc = _setup_proc_session_dir(&pmix_process_info.proc_session_dir);
rc = _setup_proc_session_dir(&opal_process_info.proc_session_dir);
if (OPAL_SUCCESS != rc) {
error = "proc session directory";
goto error;
@ -764,26 +740,26 @@ int ompi_rte_init(int *pargc, char ***pargv)
* for our app */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_WDIR, &pname, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
pmix_process_info.initial_wdir = val;
opal_process_info.initial_wdir = val;
val = NULL;
}
/* identify our location */
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
&pmix_process_info.my_name, &val, PMIX_STRING);
&opal_process_info.my_name, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
pmix_process_info.cpuset = val;
pmix_proc_is_bound = true;
opal_process_info.cpuset = val;
opal_process_info.proc_is_bound = true;
} else {
pmix_process_info.cpuset = NULL;
pmix_proc_is_bound = false;
opal_process_info.cpuset = NULL;
opal_process_info.proc_is_bound = false;
}
/* get our numa rank from PMIx */
if (pmix_proc_is_bound) {
if (opal_process_info.proc_is_bound) {
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NUMA_RANK,
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
if (PMIX_SUCCESS != rc) {
if (ompi_singleton) {
/* just assume the numa_rank is invalid, set to UINT16_MAX */
@ -794,18 +770,18 @@ int ompi_rte_init(int *pargc, char ***pargv)
goto error;
}
}
pmix_process_info.my_numa_rank = u16;
opal_process_info.my_numa_rank = u16;
} else {
/* If processes are not bound, the numa_rank is not available
* Assign UINT16_MAX to the numa_rank to indicate an invalid value
*/
pmix_process_info.my_numa_rank = UINT16_MAX;
opal_process_info.my_numa_rank = UINT16_MAX;
}
/* get our local peers */
if (0 < pmix_process_info.num_local_peers) {
if (0 < opal_process_info.num_local_peers) {
/* if my local rank if too high, then that's an error */
if (pmix_process_info.num_local_peers < pmix_process_info.my_local_rank) {
if (opal_process_info.num_local_peers < opal_process_info.my_local_rank) {
ret = OPAL_ERR_BAD_PARAM;
error = "num local peers";
goto error;
@ -827,10 +803,10 @@ int ompi_rte_init(int *pargc, char ***pargv)
/* set the locality */
if (NULL != peers) {
pname.jobid = pmix_process_info.my_name.jobid;
pname.jobid = opal_process_info.my_name.jobid;
for (i=0; NULL != peers[i]; i++) {
pname.vpid = strtoul(peers[i], NULL, 10);
if (pname.vpid == pmix_process_info.my_name.vpid) {
if (pname.vpid == opal_process_info.my_name.vpid) {
/* we are fully local to ourselves */
u16 = OPAL_PROC_ALL_LOCAL;
} else {
@ -838,7 +814,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
&pname, &val, PMIX_STRING);
if (PMIX_SUCCESS == rc && NULL != val) {
u16 = opal_hwloc_compute_relative_locality(pmix_process_info.cpuset, val);
u16 = opal_hwloc_compute_relative_locality(opal_process_info.cpuset, val);
free(val);
} else {
/* all we can say is that it shares our node */
@ -853,8 +829,8 @@ int ompi_rte_init(int *pargc, char ***pargv)
ret = opal_pmix_convert_status(rc);
error = "local store of locality";
opal_argv_free(peers);
if (NULL != pmix_process_info.cpuset) {
free(pmix_process_info.cpuset);
if (NULL != opal_process_info.cpuset) {
free(opal_process_info.cpuset);
}
goto error;
}
@ -868,7 +844,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
* as they wish.
*/
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, "OMPI_STREAM_BUFFERING",
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
&opal_process_info.my_name, &u16ptr, PMIX_UINT16);
if (PMIX_SUCCESS == rc) {
if (0 == u16) {
setvbuf(stdout, NULL, _IONBF, 0);
@ -882,18 +858,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
}
}
/* set the remaining opal_process_info fields. Note that
* the OPAL layer will have initialized these to NULL, and
* anyone between us would not have strdup'd the string, so
* we cannot free it here */
opal_process_info.top_session_dir = pmix_process_info.top_session_dir;
opal_process_info.job_session_dir = pmix_process_info.job_session_dir;
opal_process_info.proc_session_dir = pmix_process_info.proc_session_dir;
opal_process_info.num_local_peers = (int32_t)pmix_process_info.num_local_peers;
opal_process_info.my_local_rank = (int32_t)pmix_process_info.my_local_rank;
opal_process_info.my_numa_rank = pmix_process_info.my_numa_rank;
opal_process_info.cpuset = pmix_process_info.cpuset;
return OPAL_SUCCESS;
error:
@ -936,46 +900,46 @@ int ompi_rte_finalize(void)
PMIx_Finalize(NULL, 0);
/* cleanup the session directory we created */
if (NULL != pmix_process_info.job_session_dir) {
opal_os_dirpath_destroy(pmix_process_info.job_session_dir,
if (NULL != opal_process_info.job_session_dir) {
opal_os_dirpath_destroy(opal_process_info.job_session_dir,
false, check_file);
free(pmix_process_info.job_session_dir);
pmix_process_info.job_session_dir = NULL;
free(opal_process_info.job_session_dir);
opal_process_info.job_session_dir = NULL;
}
if (NULL != pmix_process_info.top_session_dir) {
free(pmix_process_info.top_session_dir);
pmix_process_info.top_session_dir = NULL;
if (NULL != opal_process_info.top_session_dir) {
free(opal_process_info.top_session_dir);
opal_process_info.top_session_dir = NULL;
}
if (NULL != pmix_process_info.proc_session_dir) {
free(pmix_process_info.proc_session_dir);
pmix_process_info.proc_session_dir = NULL;
if (NULL != opal_process_info.proc_session_dir) {
free(opal_process_info.proc_session_dir);
opal_process_info.proc_session_dir = NULL;
}
if (NULL != pmix_process_info.app_sizes) {
free(pmix_process_info.app_sizes);
pmix_process_info.app_sizes = NULL;
if (NULL != opal_process_info.app_sizes) {
free(opal_process_info.app_sizes);
opal_process_info.app_sizes = NULL;
}
if (NULL != pmix_process_info.app_ldrs) {
free(pmix_process_info.app_ldrs);
pmix_process_info.app_ldrs = NULL;
if (NULL != opal_process_info.app_ldrs) {
free(opal_process_info.app_ldrs);
opal_process_info.app_ldrs = NULL;
}
if (NULL != pmix_process_info.cpuset) {
free(pmix_process_info.cpuset);
pmix_process_info.cpuset = NULL;
if (NULL != opal_process_info.cpuset) {
free(opal_process_info.cpuset);
opal_process_info.cpuset = NULL;
}
if (NULL != pmix_process_info.command) {
free(pmix_process_info.command);
pmix_process_info.command = NULL;
if (NULL != opal_process_info.command) {
free(opal_process_info.command);
opal_process_info.command = NULL;
}
if (NULL != pmix_process_info.initial_wdir) {
free(pmix_process_info.initial_wdir);
pmix_process_info.initial_wdir = NULL;
if (NULL != opal_process_info.initial_wdir) {
free(opal_process_info.initial_wdir);
opal_process_info.initial_wdir = NULL;
}
/* cleanup our internal nspace hack */
@ -1096,11 +1060,11 @@ static int _setup_job_session_dir(char **sdir)
uid_t uid = geteuid();
if (0 > opal_asprintf(sdir, "%s/ompi.%s.%lu/jf.0/%u",
pmix_process_info.top_session_dir,
pmix_process_info.nodename,
opal_process_info.top_session_dir,
opal_process_info.nodename,
(unsigned long)uid,
pmix_process_info.my_name.jobid)) {
pmix_process_info.job_session_dir = NULL;
opal_process_info.my_name.jobid)) {
opal_process_info.job_session_dir = NULL;
return OPAL_ERR_OUT_OF_RESOURCE;
}
@ -1110,9 +1074,9 @@ static int _setup_job_session_dir(char **sdir)
static int _setup_proc_session_dir(char **sdir)
{
if (0 > opal_asprintf(sdir, "%s/%d",
pmix_process_info.job_session_dir,
pmix_process_info.my_name.vpid)) {
pmix_process_info.proc_session_dir = NULL;
opal_process_info.job_session_dir,
opal_process_info.my_name.vpid)) {
opal_process_info.proc_session_dir = NULL;
return OPAL_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -190,11 +190,10 @@ typedef uint32_t ompi_jobid_t;
typedef uint32_t ompi_vpid_t;
/* some local storage */
OMPI_DECLSPEC extern opal_process_name_t pmix_name_wildcard;
OMPI_DECLSPEC extern hwloc_cpuset_t ompi_proc_applied_binding;
#define OMPI_PROC_MY_NAME (&pmix_process_info.my_name)
#define OMPI_NAME_WILDCARD (&pmix_name_wildcard)
#define OMPI_PROC_MY_NAME (&opal_process_info.my_name)
#define OMPI_NAME_WILDCARD (&opal_name_wildcard)
typedef uint8_t ompi_rte_cmp_bitmask_t;
#define OMPI_RTE_CMP_NONE 0x00
@ -240,40 +239,8 @@ static inline opal_process_name_t * OMPI_CAST_RTE_NAME(opal_process_name_t * nam
#endif
/* Process info struct and values */
typedef uint16_t ompi_node_rank_t;
typedef uint16_t ompi_local_rank_t;
#define OMPI_NODE_RANK_INVALID UINT16_MAX
#define OMPI_LOCAL_RANK_INVALID UINT16_MAX
typedef struct {
opal_process_name_t my_name;
pmix_proc_t myprocid;
char *nodename;
pid_t pid;
char *top_session_dir;
char *job_session_dir;
char *proc_session_dir;
uint16_t my_local_rank;
uint16_t my_node_rank;
/* process rank on local NUMA node. Set to UINT16_MAX if NUMA rank is unavailable */
uint16_t my_numa_rank;
int32_t num_local_peers;
uint32_t num_procs;
uint32_t app_num;
uint32_t univ_size;
char *app_sizes;
char *app_ldrs;
char *cpuset;
char *command;
uint32_t num_apps;
char *initial_wdir;
uint32_t reincarnation;
} pmix_process_info_t;
OMPI_DECLSPEC extern pmix_process_info_t pmix_process_info;
#define ompi_process_info pmix_process_info
OMPI_DECLSPEC extern bool pmix_proc_is_bound;
#define ompi_rte_proc_is_bound pmix_proc_is_bound
#define ompi_process_info opal_process_info
#define ompi_rte_proc_is_bound opal_process_info.proc_is_bound
/* Error handling objects and operations */
OMPI_DECLSPEC void __opal_attribute_noreturn__

Просмотреть файл

@ -17,6 +17,7 @@
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -434,11 +435,15 @@ typedef struct mca_btl_base_segment_t mca_btl_base_segment_t;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN)
#define MCA_BTL_BASE_SEGMENT_HTON(s) \
do { \
(s).seg_addr.lval = hton64((s).seg_addr.lval); \
(s).seg_len = hton64((s).seg_len);
(s).seg_len = hton64((s).seg_len); \
} while(0)
#define MCA_BTL_BASE_SEGMENT_NTOH(s) \
do { \
(s).seg_addr.lval = ntoh64((s).seg_addr.lval); \
(s).seg_len = ntoh64((s).seg_len);
(s).seg_len = ntoh64((s).seg_len); \
} while(0)
#else
#define MCA_BTL_BASE_SEGMENT_HTON(s)
#define MCA_BTL_BASE_SEGMENT_NTOH(s)

Просмотреть файл

@ -368,7 +368,7 @@ static int vader_add_procs (struct mca_btl_base_module_t* btl,
/* setup endpoint */
int rank = opal_atomic_fetch_add_32(&component -> local_rank, 1);
peers[proc] = component->endpoints + rank;
rc = init_vader_endpoint (peers[proc], procs[proc], rank);
if (OPAL_SUCCESS != rc) {
@ -426,7 +426,7 @@ static int vader_finalize(struct mca_btl_base_module_t *btl)
return OPAL_SUCCESS;
}
for (int i = 0 ; i < 1 + MCA_BTL_VADER_NUM_LOCAL_PEERS ; ++i) {
for (int i = 0 ; i < (int)(1 + MCA_BTL_VADER_NUM_LOCAL_PEERS) ; ++i) {
fini_vader_endpoint (component->endpoints + i);
}

Просмотреть файл

@ -7,6 +7,7 @@
#include "opal/mca/base/mca_base_framework.h"
#include "opal/mca/pmix/pmix-internal.h"
#include "opal/memoryhooks/memory.h"
#include "opal/util/proc.h"
#include <ucm/api/ucm.h>

Просмотреть файл

@ -28,10 +28,7 @@
#include "opal/mca/event/event.h"
#include "opal/mca/threads/threads.h"
#include "opal/dss/dss.h"
#include "opal/runtime/opal.h"
#include "opal/dss/dss.h"
#include "opal/util/error.h"
#include "opal/util/proc.h"
#include "opal/hash_string.h"
/* include implementation to call */
@ -219,7 +216,6 @@ typedef struct {
pmix_value_t _kv; \
PMIX_VALUE_LOAD(&_kv, (d), (t)); \
(r) = PMIx_Put((sc), (s), &(_kv)); \
OPAL_ERROR_LOG((r)); \
} while(0);
/**

@ -1 +1 @@
Subproject commit e353951a67665a9a623fd3590365f493a1bdb8dd
Subproject commit 8a47268db46c70ac48266e6efa792b3762c28d95

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* Copyright (c) 2018 Intel, Inc. All rights reserved.
* Copyright (c) 2018-2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -36,6 +36,7 @@
#include "opal/mca/timer/base/base.h"
#include "opal/util/output.h"
#include "opal/runtime/opal_params.h"
#include "opal/runtime/opal.h"
#define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,7 +35,6 @@ BEGIN_C_DECLS
#include "opal_config.h"
#include "opal/mca/threads/mutex.h"
#include "opal/runtime/opal.h"
/**
* Initialize the progress engine

Просмотреть файл

@ -30,6 +30,8 @@ opal_process_name_t opal_name_wildcard = {OPAL_JOBID_WILDCARD, OPAL_VPID_WILDCAR
opal_process_name_t opal_name_invalid = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID};
opal_process_info_t opal_process_info = {
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
.myprocid = {{0}, PMIX_RANK_INVALID},
.nativelaunch = false,
.nodename = NULL,
.top_session_dir = NULL,
@ -37,8 +39,21 @@ opal_process_info_t opal_process_info = {
.proc_session_dir = NULL,
.num_local_peers = 0, /* there is nobody else but me */
.my_local_rank = 0, /* I'm the only process around here */
.my_node_rank = 0,
.my_numa_rank = UINT16_MAX, /* Assume numa_rank is unavailable, set to UINT16_MAX */
.cpuset = NULL,
.pid = 0,
.num_procs = 0,
.app_num = 0,
.univ_size = 0,
.app_sizes = NULL,
.app_ldrs = NULL,
.cpuset = NULL,
.command = NULL,
.num_apps = 0,
.initial_wdir = NULL,
.reincarnation = 0,
.proc_is_bound = false
};
static opal_proc_t opal_local_proc = {

Просмотреть файл

@ -22,8 +22,9 @@
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/hwloc/hwloc-internal.h"
#include "opal/mca/pmix/pmix-internal.h"
#include "opal/types.h"
#include "opal/dss/dss.h"
#include "opal/dss/dss_types.h"
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
#include <arpa/inet.h>
@ -103,15 +104,29 @@ typedef struct {
OBJ_CLASS_DECLARATION(opal_namelist_t);
typedef struct opal_process_info_t {
opal_process_name_t my_name;
pmix_proc_t myprocid;
bool nativelaunch; /**< launched by mpirun */
char *nodename; /**< string name for this node */
char *top_session_dir; /**< Top-level session directory */
char *job_session_dir; /**< Session directory for job */
char *proc_session_dir; /**< Session directory for the process */
int32_t num_local_peers; /**< number of procs from my job that share my node with me */
int32_t my_local_rank; /**< local rank on this node within my job */
int16_t my_numa_rank; /**< rank on this processes NUMA node. A value of UINT16_MAX indicates unavailable numa_rank */
uint32_t num_local_peers; /**< number of procs from my job that share my node with me */
uint16_t my_local_rank; /**< local rank on this node within my job */
uint16_t my_node_rank;
uint16_t my_numa_rank; /**< rank on this processes NUMA node. A value of UINT16_MAX indicates unavailable numa_rank */
char *cpuset; /**< String-representation of bitmap where we are bound */
pid_t pid;
uint32_t num_procs;
uint32_t app_num;
uint32_t univ_size;
char *app_sizes;
char *app_ldrs;
char *command;
uint32_t num_apps;
char *initial_wdir;
uint32_t reincarnation;
bool proc_is_bound;
} opal_process_info_t;
OPAL_DECLSPEC extern opal_process_info_t opal_process_info;

2
prrte

@ -1 +1 @@
Subproject commit 743fb9c42e6037735011d6c16f0d42d116d49d90
Subproject commit 4b84c2c8564b6c926bc747d5c99aa1fab0b61bcb

Просмотреть файл

@ -15,6 +15,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -26,6 +27,7 @@
#include "ompi/datatype/ompi_datatype.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/proc/proc.h"
#include "opal/runtime/opal.h"
#include <stdlib.h>
#include <string.h>