If the HWLOC shared memory system is unable to connect, then fallback to providing the topology via XML. Do not automatically provide the XML to every process as that defeats the purpose of the shared memory system. Instead, use PMIx_Query_info_nb to get the info from the server when required.
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
8273cea9d6
Коммит
d80b0c7990
@ -20,7 +20,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -151,7 +151,7 @@ int ompi_comm_init(void)
|
||||
because MPI_COMM_WORLD has some predefined attributes. */
|
||||
ompi_attr_hash_init(&ompi_mpi_comm_world.comm.c_keyhash);
|
||||
|
||||
/* Check for the binding policy used. We are only interested in
|
||||
/* Check for the binding policy used. We are only interested in
|
||||
whether mapby-node has been set right now (could be extended later)
|
||||
and only on MPI_COMM_WORLD, since for all other sub-communicators
|
||||
it is virtually impossible to identify their layout across nodes
|
||||
@ -161,9 +161,9 @@ int ompi_comm_init(void)
|
||||
opal_process_name_t wildcard = {ORTE_PROC_MY_NAME->jobid, OPAL_VPID_WILDCARD};
|
||||
char *str=NULL;
|
||||
int rc;
|
||||
|
||||
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_MAPBY, &wildcard, &str, OPAL_STRING);
|
||||
if ( 0 == rc ) {
|
||||
if ( 0 == rc && NULL != str) {
|
||||
if ( strstr ( str, "BYNODE") ) {
|
||||
OMPI_COMM_SET_MAPBY_NODE(&ompi_mpi_comm_world.comm);
|
||||
}
|
||||
|
@ -314,14 +314,13 @@ int opal_hwloc_base_get_topology(void)
|
||||
FILE *file = fopen("/proc/self/maps", "r");
|
||||
if (file) {
|
||||
char line[256];
|
||||
opal_output(0, opal_hwloc_base_framework.framework_output,
|
||||
"Dumping /proc/self/maps");
|
||||
opal_output(0, "Dumping /proc/self/maps");
|
||||
while (fgets(line, sizeof(line), file) != NULL) {
|
||||
char *end = strchr(line, '\n');
|
||||
if (end)
|
||||
if (end) {
|
||||
*end = '\0';
|
||||
opal_output(0, opal_hwloc_base_framework.framework_output,
|
||||
"%s", line);
|
||||
}
|
||||
opal_output(0, "%s", line);
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
@ -338,9 +337,15 @@ int opal_hwloc_base_get_topology(void)
|
||||
/* if that isn't available, then try to retrieve
|
||||
* the xml representation from the PMIx data store */
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base getting topology XML string");
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
"hwloc:base[%s:%d] getting topology XML string",
|
||||
__FILE__, __LINE__);
|
||||
#if HWLOC_API_VERSION >= 0x20000
|
||||
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_HWLOC_XML_V2,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
#else
|
||||
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_HWLOC_XML_V1,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
#endif
|
||||
} else {
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base PMIx not available");
|
||||
|
@ -161,6 +161,47 @@ extern int opal_pmix_base_exchange(opal_value_t *info,
|
||||
OPAL_LIST_DESTRUCT(&(_ilist)); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* Provide a simplified macro for retrieving modex data
|
||||
* from another process when we want the PMIx module
|
||||
* to request it from the server if not found, but do not
|
||||
* want the server to go find it if the server doesn't
|
||||
* already have it:
|
||||
*
|
||||
* r - the integer return status from the modex op (int)
|
||||
* s - string key (char*)
|
||||
* p - pointer to the opal_process_name_t of the proc that posted
|
||||
* the data (opal_process_name_t*)
|
||||
* d - pointer to a location wherein the data object
|
||||
* is to be returned
|
||||
* t - the expected data type
|
||||
*/
|
||||
#define OPAL_MODEX_RECV_VALUE_IMMEDIATE(r, s, p, d, t) \
|
||||
do { \
|
||||
opal_value_t *_kv, *_info; \
|
||||
opal_list_t _ilist; \
|
||||
opal_output_verbose(1, opal_pmix_verbose_output, \
|
||||
"%s[%s:%d] MODEX RECV VALUE IMMEDIATE FOR PROC %s KEY %s", \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
OPAL_NAME_PRINT(*(p)), (s)); \
|
||||
OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
|
||||
_info = OBJ_NEW(opal_value_t); \
|
||||
_info->key = strdup(OPAL_PMIX_IMMEDIATE); \
|
||||
_info->type = OPAL_BOOL; \
|
||||
_info->data.flag = true; \
|
||||
opal_list_append(&(_ilist), &(_info)->super); \
|
||||
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \
|
||||
if (NULL == _kv) { \
|
||||
(r) = OPAL_ERR_NOT_FOUND; \
|
||||
} else { \
|
||||
(r) = opal_value_unload(_kv, (void**)(d), (t)); \
|
||||
OBJ_RELEASE(_kv); \
|
||||
} \
|
||||
} \
|
||||
OPAL_LIST_DESTRUCT(&(_ilist)); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* Provide a simplified macro for retrieving modex data
|
||||
* from another process:
|
||||
|
@ -234,6 +234,8 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_HWLOC_SHMEM_ADDR "pmix.hwlocaddr" // (size_t) address of HWLOC shared memory segment
|
||||
#define PMIX_HWLOC_SHMEM_SIZE "pmix.hwlocsize" // (size_t) size of HWLOC shared memory segment
|
||||
#define PMIX_HWLOC_SHMEM_FILE "pmix.hwlocfile" // (char*) path to HWLOC shared memory file
|
||||
#define PMIX_HWLOC_XML_V1 "pmix.hwlocxml1" // (char*) XML representation of local topology using HWLOC v1.x format
|
||||
#define PMIX_HWLOC_XML_V2 "pmix.hwlocxml2" // (char*) XML representation of local topology using HWLOC v2.x format
|
||||
|
||||
/* request-related info */
|
||||
#define PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation
|
||||
@ -347,6 +349,7 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation
|
||||
// for the specified nspace
|
||||
|
||||
|
||||
/* log attributes */
|
||||
#define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr
|
||||
#define PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout
|
||||
|
@ -568,7 +568,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
|
||||
/* lood for a debugger attach key */
|
||||
(void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
|
||||
wildcard.rank = PMIX_RANK_WILDCARD;
|
||||
PMIX_INFO_LOAD(&ginfo, PMIX_IMMEDIATE, NULL, PMIX_BOOL);
|
||||
PMIX_INFO_LOAD(&ginfo, PMIX_OPTIONAL, NULL, PMIX_BOOL);
|
||||
if (PMIX_SUCCESS == PMIx_Get(&wildcard, PMIX_DEBUG_STOP_IN_INIT, &ginfo, 1, &val)) {
|
||||
PMIX_VALUE_FREE(val, 1); // cleanup memory
|
||||
/* if the value was found, then we need to wait for debugger attach here */
|
||||
|
@ -398,6 +398,45 @@ static pmix_status_t process_values(pmix_value_t **v, pmix_cb_t *cb)
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void infocb(pmix_status_t status,
|
||||
pmix_info_t *info, size_t ninfo,
|
||||
void *cbdata,
|
||||
pmix_release_cbfunc_t release_fn,
|
||||
void *release_cbdata)
|
||||
{
|
||||
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
|
||||
pmix_value_t *kv = NULL;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (PMIX_SUCCESS == status) {
|
||||
if (NULL != info) {
|
||||
/* there should be only one returned value */
|
||||
if (1 != ninfo) {
|
||||
rc = PMIX_ERR_INVALID_VAL;
|
||||
} else {
|
||||
PMIX_VALUE_CREATE(kv, 1);
|
||||
if (NULL == kv) {
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
} else {
|
||||
rc = pmix_value_xfer(kv, &info[0].value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rc = PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
} else {
|
||||
rc = status;
|
||||
}
|
||||
if (NULL != cd->valcbfunc) {
|
||||
cd->valcbfunc(rc, kv, cd->cbdata);
|
||||
}
|
||||
PMIX_RELEASE(cd);
|
||||
PMIX_VALUE_FREE(kv, 1);
|
||||
if (NULL != release_fn) {
|
||||
release_fn(release_cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
@ -409,7 +448,9 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
char *tmp;
|
||||
pmix_proc_t proc;
|
||||
bool optional = false;
|
||||
bool immediate = false;
|
||||
struct timeval tv;
|
||||
pmix_query_caddy_t *cd;
|
||||
|
||||
/* cb was passed to us from another thread - acquire it */
|
||||
PMIX_ACQUIRE_OBJECT(cb);
|
||||
@ -431,6 +472,11 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
cb->info[n].value.data.flag) {
|
||||
optional = true;
|
||||
}
|
||||
} else if (0 == strncmp(cb->info[n].key, PMIX_IMMEDIATE, PMIX_MAX_KEYLEN)) {
|
||||
if (PMIX_UNDEF == cb->info[n].value.type ||
|
||||
cb->info[n].value.data.flag) {
|
||||
immediate = true;
|
||||
}
|
||||
} else if (0 == strncmp(cb->info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) {
|
||||
/* set a timer to kick us out if we don't
|
||||
* have an answer within their window */
|
||||
@ -473,6 +519,25 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
*/
|
||||
goto request;
|
||||
} else {
|
||||
/* if immediate was given, then we are being directed to
|
||||
* check with the server even though the caller is looking for
|
||||
* job-level info. In some cases, a server may elect not
|
||||
* to provide info at init to save memory */
|
||||
if (immediate) {
|
||||
/* the direct modex request doesn't pass a key as it
|
||||
* was intended to support non-job-level information.
|
||||
* So instead, we will use the PMIx_Query function
|
||||
* to request the information */
|
||||
cd = PMIX_NEW(pmix_query_caddy_t);
|
||||
cd->cbdata = cb->cbdata;
|
||||
cd->valcbfunc = cb->cbfunc.valuefn;
|
||||
PMIX_QUERY_CREATE(cd->queries, 1);
|
||||
cd->nqueries = 1;
|
||||
pmix_argv_append_nosize(&cd->queries[0].keys, cb->key);
|
||||
PMIx_Query_info_nb(cd->queries, 1, infocb, cd);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
/* we should have had this info, so respond with the error */
|
||||
goto respond;
|
||||
}
|
||||
@ -494,25 +559,25 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
respond:
|
||||
/* if a callback was provided, execute it */
|
||||
if (NULL != cb->cbfunc.valuefn) {
|
||||
if (NULL != val) {
|
||||
/* if this is a compressed string, then uncompress it */
|
||||
if (PMIX_COMPRESSED_STRING == val->type) {
|
||||
pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size);
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
val = NULL;
|
||||
} else {
|
||||
PMIX_VALUE_DESTRUCT(val);
|
||||
PMIX_VAL_ASSIGN(val, string, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
cb->cbfunc.valuefn(rc, val, cb->cbdata);
|
||||
if (NULL != val) {
|
||||
/* if this is a compressed string, then uncompress it */
|
||||
if (PMIX_COMPRESSED_STRING == val->type) {
|
||||
pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size);
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
val = NULL;
|
||||
} else {
|
||||
PMIX_VALUE_DESTRUCT(val);
|
||||
PMIX_VAL_ASSIGN(val, string, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
cb->cbfunc.valuefn(rc, val, cb->cbdata);
|
||||
}
|
||||
if (NULL != val) {
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
|
@ -245,6 +245,7 @@ static void qcon(pmix_query_caddy_t *p)
|
||||
p->info = NULL;
|
||||
p->ninfo = 0;
|
||||
p->cbfunc = NULL;
|
||||
p->valcbfunc = NULL;
|
||||
p->cbdata = NULL;
|
||||
p->relcbfunc = NULL;
|
||||
}
|
||||
|
@ -219,6 +219,7 @@ typedef struct {
|
||||
pmix_info_t *info;
|
||||
size_t ninfo;
|
||||
pmix_info_cbfunc_t cbfunc;
|
||||
pmix_value_cbfunc_t valcbfunc;
|
||||
pmix_release_cbfunc_t relcbfunc;
|
||||
void *cbdata;
|
||||
} pmix_query_caddy_t;
|
||||
|
@ -161,6 +161,9 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_HWLOC_SHMEM_ADDR "pmix.hwlocaddr" // (size_t) address of HWLOC shared memory segment
|
||||
#define OPAL_PMIX_HWLOC_SHMEM_SIZE "pmix.hwlocsize" // (size_t) size of HWLOC shared memory segment
|
||||
#define OPAL_PMIX_HWLOC_SHMEM_FILE "pmix.hwlocfile" // (char*) path to HWLOC shared memory file
|
||||
#define OPAL_PMIX_HWLOC_XML_V1 "pmix.hwlocxml1" // (char*) XML representation of local topology using HWLOC v1.x format
|
||||
#define OPAL_PMIX_HWLOC_XML_V2 "pmix.hwlocxml2" // (char*) XML representation of local topology using HWLOC v2.x format
|
||||
|
||||
|
||||
/* request-related info */
|
||||
#define OPAL_PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation
|
||||
|
@ -116,14 +116,14 @@ static int init(void)
|
||||
FILE *file = fopen("/proc/self/maps", "r");
|
||||
if (file) {
|
||||
char line[256];
|
||||
opal_output(0, orte_rtc_base_framework.framework_output,
|
||||
"%s Dumping /proc/self/maps", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s Dumping /proc/self/maps",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
while (fgets(line, sizeof(line), file) != NULL) {
|
||||
char *end = strchr(line, '\n');
|
||||
if (end)
|
||||
if (end) {
|
||||
*end = '\0';
|
||||
opal_output(0, orte_rtc_base_framework.framework_output,
|
||||
"%s", line);
|
||||
}
|
||||
opal_output(0, "%s", line);
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
|
@ -242,24 +242,7 @@ int pmix_server_init(void)
|
||||
/* ensure the PMIx server uses the proper rendezvous directory */
|
||||
opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ);
|
||||
|
||||
/* pass the server the local topology - we do this so the procs won't read the
|
||||
* topology themselves as this could overwhelm the local
|
||||
* system on large-scale SMPs */
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
char *xmlbuffer=NULL;
|
||||
int len;
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
|
||||
if (0 != opal_hwloc_base_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
|
||||
OBJ_RELEASE(kv);
|
||||
OBJ_DESTRUCT(&info);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
kv->data.string = xmlbuffer;
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(&info, &kv->super);
|
||||
}
|
||||
/* tell the server our temp directory */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR);
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include "opal/mca/pstat/pstat.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -635,6 +636,47 @@ static void _query(int sd, short args, void *cbdata)
|
||||
} else {
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_HWLOC_XML_V1)) {
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
char *xmlbuffer=NULL;
|
||||
int len;
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_HWLOC_XML_V1);
|
||||
#if HWLOC_API_VERSION < 0x20000
|
||||
/* get this from the v1.x API */
|
||||
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
|
||||
OBJ_RELEASE(kv);
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
/* get it from the v2 API */
|
||||
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len,
|
||||
HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1)) {
|
||||
OBJ_RELEASE(kv);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
kv->data.string = xmlbuffer;
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_HWLOC_XML_V2)) {
|
||||
/* we cannot provide it if we are using v1.x */
|
||||
#if HWLOC_API_VERSION >= 0x20000
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
char *xmlbuffer=NULL;
|
||||
int len;
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_HWLOC_XML_V2);
|
||||
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len, 0)) {
|
||||
OBJ_RELEASE(kv);
|
||||
continue;
|
||||
}
|
||||
kv->data.string = xmlbuffer;
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user