Merge pull request #4133 from rhc54/topic/modex
Optimize discovery of HWLOC topology
Этот коммит содержится в:
Коммит
f6fd699d44
@ -20,7 +20,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -151,7 +151,7 @@ int ompi_comm_init(void)
|
||||
because MPI_COMM_WORLD has some predefined attributes. */
|
||||
ompi_attr_hash_init(&ompi_mpi_comm_world.comm.c_keyhash);
|
||||
|
||||
/* Check for the binding policy used. We are only interested in
|
||||
/* Check for the binding policy used. We are only interested in
|
||||
whether mapby-node has been set right now (could be extended later)
|
||||
and only on MPI_COMM_WORLD, since for all other sub-communicators
|
||||
it is virtually impossible to identify their layout across nodes
|
||||
@ -161,9 +161,9 @@ int ompi_comm_init(void)
|
||||
opal_process_name_t wildcard = {ORTE_PROC_MY_NAME->jobid, OPAL_VPID_WILDCARD};
|
||||
char *str=NULL;
|
||||
int rc;
|
||||
|
||||
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_MAPBY, &wildcard, &str, OPAL_STRING);
|
||||
if ( 0 == rc ) {
|
||||
if ( 0 == rc && NULL != str) {
|
||||
if ( strstr ( str, "BYNODE") ) {
|
||||
OMPI_COMM_SET_MAPBY_NODE(&ompi_mpi_comm_world.comm);
|
||||
}
|
||||
|
@ -314,14 +314,14 @@ int opal_hwloc_base_get_topology(void)
|
||||
FILE *file = fopen("/proc/self/maps", "r");
|
||||
if (file) {
|
||||
char line[256];
|
||||
opal_output(opal_hwloc_base_framework.framework_output,
|
||||
"Dumping /proc/self/maps");
|
||||
opal_output(0, "Dumping /proc/self/maps");
|
||||
|
||||
while (fgets(line, sizeof(line), file) != NULL) {
|
||||
char *end = strchr(line, '\n');
|
||||
if (end)
|
||||
if (end) {
|
||||
*end = '\0';
|
||||
opal_output(opal_hwloc_base_framework.framework_output,
|
||||
line);
|
||||
}
|
||||
opal_output(0, "%s", line);
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
@ -338,9 +338,15 @@ int opal_hwloc_base_get_topology(void)
|
||||
/* if that isn't available, then try to retrieve
|
||||
* the xml representation from the PMIx data store */
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base getting topology XML string");
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
"hwloc:base[%s:%d] getting topology XML string",
|
||||
__FILE__, __LINE__);
|
||||
#if HWLOC_API_VERSION >= 0x20000
|
||||
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_HWLOC_XML_V2,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
#else
|
||||
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_HWLOC_XML_V1,
|
||||
&wildcard_rank, &val, OPAL_STRING);
|
||||
#endif
|
||||
} else {
|
||||
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base PMIx not available");
|
||||
|
@ -161,6 +161,47 @@ extern int opal_pmix_base_exchange(opal_value_t *info,
|
||||
OPAL_LIST_DESTRUCT(&(_ilist)); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* Provide a simplified macro for retrieving modex data
|
||||
* from another process when we want the PMIx module
|
||||
* to request it from the server if not found, but do not
|
||||
* want the server to go find it if the server doesn't
|
||||
* already have it:
|
||||
*
|
||||
* r - the integer return status from the modex op (int)
|
||||
* s - string key (char*)
|
||||
* p - pointer to the opal_process_name_t of the proc that posted
|
||||
* the data (opal_process_name_t*)
|
||||
* d - pointer to a location wherein the data object
|
||||
* is to be returned
|
||||
* t - the expected data type
|
||||
*/
|
||||
#define OPAL_MODEX_RECV_VALUE_IMMEDIATE(r, s, p, d, t) \
|
||||
do { \
|
||||
opal_value_t *_kv, *_info; \
|
||||
opal_list_t _ilist; \
|
||||
opal_output_verbose(1, opal_pmix_verbose_output, \
|
||||
"%s[%s:%d] MODEX RECV VALUE IMMEDIATE FOR PROC %s KEY %s", \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
OPAL_NAME_PRINT(*(p)), (s)); \
|
||||
OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
|
||||
_info = OBJ_NEW(opal_value_t); \
|
||||
_info->key = strdup(OPAL_PMIX_IMMEDIATE); \
|
||||
_info->type = OPAL_BOOL; \
|
||||
_info->data.flag = true; \
|
||||
opal_list_append(&(_ilist), &(_info)->super); \
|
||||
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \
|
||||
if (NULL == _kv) { \
|
||||
(r) = OPAL_ERR_NOT_FOUND; \
|
||||
} else { \
|
||||
(r) = opal_value_unload(_kv, (void**)(d), (t)); \
|
||||
OBJ_RELEASE(_kv); \
|
||||
} \
|
||||
} \
|
||||
OPAL_LIST_DESTRUCT(&(_ilist)); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* Provide a simplified macro for retrieving modex data
|
||||
* from another process:
|
||||
|
@ -234,6 +234,8 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_HWLOC_SHMEM_ADDR "pmix.hwlocaddr" // (size_t) address of HWLOC shared memory segment
|
||||
#define PMIX_HWLOC_SHMEM_SIZE "pmix.hwlocsize" // (size_t) size of HWLOC shared memory segment
|
||||
#define PMIX_HWLOC_SHMEM_FILE "pmix.hwlocfile" // (char*) path to HWLOC shared memory file
|
||||
#define PMIX_HWLOC_XML_V1 "pmix.hwlocxml1" // (char*) XML representation of local topology using HWLOC v1.x format
|
||||
#define PMIX_HWLOC_XML_V2 "pmix.hwlocxml2" // (char*) XML representation of local topology using HWLOC v2.x format
|
||||
|
||||
/* request-related info */
|
||||
#define PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation
|
||||
@ -347,6 +349,7 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation
|
||||
// for the specified nspace
|
||||
|
||||
|
||||
/* log attributes */
|
||||
#define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr
|
||||
#define PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout
|
||||
|
@ -568,7 +568,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
|
||||
/* lood for a debugger attach key */
|
||||
(void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
|
||||
wildcard.rank = PMIX_RANK_WILDCARD;
|
||||
PMIX_INFO_LOAD(&ginfo, PMIX_IMMEDIATE, NULL, PMIX_BOOL);
|
||||
PMIX_INFO_LOAD(&ginfo, PMIX_OPTIONAL, NULL, PMIX_BOOL);
|
||||
if (PMIX_SUCCESS == PMIx_Get(&wildcard, PMIX_DEBUG_STOP_IN_INIT, &ginfo, 1, &val)) {
|
||||
PMIX_VALUE_FREE(val, 1); // cleanup memory
|
||||
/* if the value was found, then we need to wait for debugger attach here */
|
||||
|
@ -398,6 +398,45 @@ static pmix_status_t process_values(pmix_value_t **v, pmix_cb_t *cb)
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void infocb(pmix_status_t status,
|
||||
pmix_info_t *info, size_t ninfo,
|
||||
void *cbdata,
|
||||
pmix_release_cbfunc_t release_fn,
|
||||
void *release_cbdata)
|
||||
{
|
||||
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
|
||||
pmix_value_t *kv = NULL;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (PMIX_SUCCESS == status) {
|
||||
if (NULL != info) {
|
||||
/* there should be only one returned value */
|
||||
if (1 != ninfo) {
|
||||
rc = PMIX_ERR_INVALID_VAL;
|
||||
} else {
|
||||
PMIX_VALUE_CREATE(kv, 1);
|
||||
if (NULL == kv) {
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
} else {
|
||||
rc = pmix_value_xfer(kv, &info[0].value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rc = PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
} else {
|
||||
rc = status;
|
||||
}
|
||||
if (NULL != cd->valcbfunc) {
|
||||
cd->valcbfunc(rc, kv, cd->cbdata);
|
||||
}
|
||||
PMIX_RELEASE(cd);
|
||||
PMIX_VALUE_FREE(kv, 1);
|
||||
if (NULL != release_fn) {
|
||||
release_fn(release_cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
@ -409,7 +448,9 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
char *tmp;
|
||||
pmix_proc_t proc;
|
||||
bool optional = false;
|
||||
bool immediate = false;
|
||||
struct timeval tv;
|
||||
pmix_query_caddy_t *cd;
|
||||
|
||||
/* cb was passed to us from another thread - acquire it */
|
||||
PMIX_ACQUIRE_OBJECT(cb);
|
||||
@ -431,6 +472,11 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
cb->info[n].value.data.flag) {
|
||||
optional = true;
|
||||
}
|
||||
} else if (0 == strncmp(cb->info[n].key, PMIX_IMMEDIATE, PMIX_MAX_KEYLEN)) {
|
||||
if (PMIX_UNDEF == cb->info[n].value.type ||
|
||||
cb->info[n].value.data.flag) {
|
||||
immediate = true;
|
||||
}
|
||||
} else if (0 == strncmp(cb->info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) {
|
||||
/* set a timer to kick us out if we don't
|
||||
* have an answer within their window */
|
||||
@ -473,6 +519,25 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
*/
|
||||
goto request;
|
||||
} else {
|
||||
/* if immediate was given, then we are being directed to
|
||||
* check with the server even though the caller is looking for
|
||||
* job-level info. In some cases, a server may elect not
|
||||
* to provide info at init to save memory */
|
||||
if (immediate) {
|
||||
/* the direct modex request doesn't pass a key as it
|
||||
* was intended to support non-job-level information.
|
||||
* So instead, we will use the PMIx_Query function
|
||||
* to request the information */
|
||||
cd = PMIX_NEW(pmix_query_caddy_t);
|
||||
cd->cbdata = cb->cbdata;
|
||||
cd->valcbfunc = cb->cbfunc.valuefn;
|
||||
PMIX_QUERY_CREATE(cd->queries, 1);
|
||||
cd->nqueries = 1;
|
||||
pmix_argv_append_nosize(&cd->queries[0].keys, cb->key);
|
||||
PMIx_Query_info_nb(cd->queries, 1, infocb, cd);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
/* we should have had this info, so respond with the error */
|
||||
goto respond;
|
||||
}
|
||||
@ -494,25 +559,25 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
respond:
|
||||
/* if a callback was provided, execute it */
|
||||
if (NULL != cb->cbfunc.valuefn) {
|
||||
if (NULL != val) {
|
||||
/* if this is a compressed string, then uncompress it */
|
||||
if (PMIX_COMPRESSED_STRING == val->type) {
|
||||
pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size);
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
val = NULL;
|
||||
} else {
|
||||
PMIX_VALUE_DESTRUCT(val);
|
||||
PMIX_VAL_ASSIGN(val, string, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
cb->cbfunc.valuefn(rc, val, cb->cbdata);
|
||||
if (NULL != val) {
|
||||
/* if this is a compressed string, then uncompress it */
|
||||
if (PMIX_COMPRESSED_STRING == val->type) {
|
||||
pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size);
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
val = NULL;
|
||||
} else {
|
||||
PMIX_VALUE_DESTRUCT(val);
|
||||
PMIX_VAL_ASSIGN(val, string, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
cb->cbfunc.valuefn(rc, val, cb->cbdata);
|
||||
}
|
||||
if (NULL != val) {
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
|
@ -245,6 +245,7 @@ static void qcon(pmix_query_caddy_t *p)
|
||||
p->info = NULL;
|
||||
p->ninfo = 0;
|
||||
p->cbfunc = NULL;
|
||||
p->valcbfunc = NULL;
|
||||
p->cbdata = NULL;
|
||||
p->relcbfunc = NULL;
|
||||
}
|
||||
|
@ -219,6 +219,7 @@ typedef struct {
|
||||
pmix_info_t *info;
|
||||
size_t ninfo;
|
||||
pmix_info_cbfunc_t cbfunc;
|
||||
pmix_value_cbfunc_t valcbfunc;
|
||||
pmix_release_cbfunc_t relcbfunc;
|
||||
void *cbdata;
|
||||
} pmix_query_caddy_t;
|
||||
|
@ -161,6 +161,9 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_HWLOC_SHMEM_ADDR "pmix.hwlocaddr" // (size_t) address of HWLOC shared memory segment
|
||||
#define OPAL_PMIX_HWLOC_SHMEM_SIZE "pmix.hwlocsize" // (size_t) size of HWLOC shared memory segment
|
||||
#define OPAL_PMIX_HWLOC_SHMEM_FILE "pmix.hwlocfile" // (char*) path to HWLOC shared memory file
|
||||
#define OPAL_PMIX_HWLOC_XML_V1 "pmix.hwlocxml1" // (char*) XML representation of local topology using HWLOC v1.x format
|
||||
#define OPAL_PMIX_HWLOC_XML_V2 "pmix.hwlocxml2" // (char*) XML representation of local topology using HWLOC v2.x format
|
||||
|
||||
|
||||
/* request-related info */
|
||||
#define OPAL_PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation
|
||||
|
@ -116,14 +116,14 @@ static int init(void)
|
||||
FILE *file = fopen("/proc/self/maps", "r");
|
||||
if (file) {
|
||||
char line[256];
|
||||
opal_output(0, orte_rtc_base_framework.framework_output,
|
||||
"%s Dumping /proc/self/maps", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s Dumping /proc/self/maps",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
while (fgets(line, sizeof(line), file) != NULL) {
|
||||
char *end = strchr(line, '\n');
|
||||
if (end)
|
||||
if (end) {
|
||||
*end = '\0';
|
||||
opal_output(0, orte_rtc_base_framework.framework_output,
|
||||
"%s", line);
|
||||
}
|
||||
opal_output(0, "%s", line);
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
|
@ -242,24 +242,7 @@ int pmix_server_init(void)
|
||||
/* ensure the PMIx server uses the proper rendezvous directory */
|
||||
opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ);
|
||||
|
||||
/* pass the server the local topology - we do this so the procs won't read the
|
||||
* topology themselves as this could overwhelm the local
|
||||
* system on large-scale SMPs */
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
char *xmlbuffer=NULL;
|
||||
int len;
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
|
||||
if (0 != opal_hwloc_base_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
|
||||
OBJ_RELEASE(kv);
|
||||
OBJ_DESTRUCT(&info);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
kv->data.string = xmlbuffer;
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(&info, &kv->super);
|
||||
}
|
||||
/* tell the server our temp directory */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR);
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include "opal/mca/pstat/pstat.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -635,6 +636,47 @@ static void _query(int sd, short args, void *cbdata)
|
||||
} else {
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_HWLOC_XML_V1)) {
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
char *xmlbuffer=NULL;
|
||||
int len;
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_HWLOC_XML_V1);
|
||||
#if HWLOC_API_VERSION < 0x20000
|
||||
/* get this from the v1.x API */
|
||||
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
|
||||
OBJ_RELEASE(kv);
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
/* get it from the v2 API */
|
||||
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len,
|
||||
HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1)) {
|
||||
OBJ_RELEASE(kv);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
kv->data.string = xmlbuffer;
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_HWLOC_XML_V2)) {
|
||||
/* we cannot provide it if we are using v1.x */
|
||||
#if HWLOC_API_VERSION >= 0x20000
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
char *xmlbuffer=NULL;
|
||||
int len;
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_HWLOC_XML_V2);
|
||||
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len, 0)) {
|
||||
OBJ_RELEASE(kv);
|
||||
continue;
|
||||
}
|
||||
kv->data.string = xmlbuffer;
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(results, &kv->super);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user