410 строки
14 KiB
C
410 строки
14 KiB
C
/*
|
|
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
|
* All rights reserved.
|
|
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
|
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
|
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
|
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
|
* All rights reserved.
|
|
* Copyright (c) 2014 Research Organization for Information Science
|
|
* and Technology (RIST). All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#include <fcntl.h>
|
|
|
|
#include "opal_stdint.h"
|
|
#include "opal/types.h"
|
|
#include "opal/util/argv.h"
|
|
#include "opal/util/output.h"
|
|
#include "opal/util/error.h"
|
|
#include "opal/mca/dstore/dstore.h"
|
|
#include "opal/mca/event/event.h"
|
|
|
|
#include "orte/util/name_fns.h"
|
|
#include "orte/runtime/orte_globals.h"
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
#include "pmix_server_internal.h"
|
|
|
|
/* stuff proc attributes for sending back to a proc */
|
|
int pmix_server_fetch_proc_map(opal_buffer_t *reply,
|
|
orte_job_t *jdata,
|
|
orte_proc_t *proc)
|
|
{
|
|
char *tmp;
|
|
opal_value_t kv, *kp;
|
|
int rc;
|
|
orte_node_t *node;
|
|
orte_app_context_t *app;
|
|
orte_proc_t *pptr;
|
|
int i;
|
|
char **list;
|
|
orte_process_name_t name;
|
|
opal_buffer_t buf, buf2;
|
|
|
|
/* convenience def */
|
|
node = proc->node;
|
|
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx);
|
|
kp = &kv;
|
|
|
|
#if OPAL_HAVE_HWLOC
|
|
/* pass the local topology for the app so it doesn't
|
|
* have to discover it for itself */
|
|
if (NULL != opal_hwloc_topology) {
|
|
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&buf);
|
|
return rc;
|
|
}
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_LOCAL_TOPO);
|
|
kv.type = OPAL_BYTE_OBJECT;
|
|
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
|
|
OBJ_DESTRUCT(&buf);
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
}
|
|
#endif /* OPAL_HAVE_HWLOC */
|
|
/* cpuset */
|
|
tmp = NULL;
|
|
if (orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_CPUSET);
|
|
kv.type = OPAL_STRING;
|
|
kv.data.string = tmp;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
}
|
|
/* jobid */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_JOBID);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = proc->name.jobid;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_RANK);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = proc->name.vpid;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
/* offset */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_NPROC_OFFSET);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = jdata->offset;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* pass a blob - for each proc in this job, include the info describing
|
|
* it so the recipient has a complete picture */
|
|
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
|
/* jobid, for simplicity when unpacking */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_JOBID);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = proc->name.jobid;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
for (i=0; i < jdata->procs->size; i++) {
|
|
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
|
|
continue;
|
|
}
|
|
/* rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_RANK);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = pptr->name.vpid;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* create the buffer for this rank */
|
|
OBJ_CONSTRUCT(&buf2, opal_buffer_t);
|
|
/* appnum */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_APPNUM);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = pptr->app_idx;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* global rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_GLOBAL_RANK);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = pptr->name.vpid + jdata->offset;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* app rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_APP_RANK);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = pptr->app_rank;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* local rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_LOCAL_RANK);
|
|
kv.type = OPAL_UINT16;
|
|
kv.data.uint16 = pptr->local_rank;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* node rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_NODE_RANK);
|
|
kv.type = OPAL_UINT16;
|
|
kv.data.uint16 = pptr->node_rank;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* node id */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_NODE_ID);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = pptr->node->daemon->name.vpid;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* add the rank's blob */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_PROC_MAP);
|
|
kv.type = OPAL_BYTE_OBJECT;
|
|
opal_dss.unload(&buf2, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
|
|
OBJ_DESTRUCT(&buf2);
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
}
|
|
/* now pass the blob as the proc-map key */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_PROC_MAP);
|
|
kv.type = OPAL_BYTE_OBJECT;
|
|
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
|
|
OBJ_DESTRUCT(&buf);
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* construct the list of local peers */
|
|
list = NULL;
|
|
name.jobid = jdata->jobid;
|
|
name.vpid = 0;
|
|
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
|
for (i=0; i < node->procs->size; i++) {
|
|
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
|
|
continue;
|
|
}
|
|
if (pptr->name.jobid == jdata->jobid) {
|
|
opal_argv_append_nosize(&list, ORTE_VPID_PRINT(pptr->name.vpid));
|
|
if (pptr->name.vpid < name.vpid) {
|
|
name.vpid = pptr->name.vpid;
|
|
}
|
|
/* note that we have to pass the cpuset for each local
|
|
* peer so locality can be computed */
|
|
tmp = NULL;
|
|
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
|
|
/* add the name of the proc */
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &pptr->name, 1, OPAL_NAME))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
free(tmp);
|
|
opal_argv_free(list);
|
|
return rc;
|
|
}
|
|
/* add its cpuset */
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &tmp, 1, OPAL_STRING))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
free(tmp);
|
|
opal_argv_free(list);
|
|
return rc;
|
|
}
|
|
free(tmp);
|
|
}
|
|
}
|
|
}
|
|
/* pass the blob containing the cpusets for all local peers - note
|
|
* that the cpuset of the proc we are responding to will be included,
|
|
* so we don't need to send it separately */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_LOCAL_CPUSETS);
|
|
kv.type = OPAL_BYTE_OBJECT;
|
|
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
|
|
OBJ_DESTRUCT(&buf);
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
opal_argv_free(list);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* construct the list of peers for transmission */
|
|
tmp = opal_argv_join(list, ',');
|
|
opal_argv_free(list);
|
|
/* pass the local ldr */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_LOCALLDR);
|
|
kv.type = OPAL_NAME;
|
|
kv.data.name = name;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
free(tmp);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* pass the list of peers */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_LOCAL_PEERS);
|
|
kv.type = OPAL_STRING;
|
|
kv.data.string = tmp;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* app ldr */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_APPLDR);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = app->first_rank;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* univ size */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_UNIV_SIZE);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = jdata->num_procs;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* job size */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_JOB_SIZE);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = jdata->num_procs;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* local size */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_LOCAL_SIZE);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = jdata->num_local_procs;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* node size */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_NODE_SIZE);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = node->num_procs;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* max procs */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(PMIX_MAX_PROCS);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint16 = jdata->total_slots_alloc;
|
|
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
return rc;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
/* local topology - we do this so the procs won't read the
|
|
* topology themselves as this could overwhelm the local
|
|
* system on large-scale SMPs */
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|