1
1

Take first step towards refactoring the PMIx server code by splitting out the proc_map function into its own file. Update ignore to include .DS_Store from the Mac

Этот коммит содержится в:
Ralph Castain 2014-12-17 19:08:52 -08:00
родитель 269d7f9713
Коммит f489e871c2
5 изменённых файлов: 415 добавлений и 361 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -19,6 +19,7 @@
*.out *.out
*.plist *.plist
*~ *~
.DS_Store
Makefile Makefile
Makefile.in Makefile.in
static-components.h static-components.h

Просмотреть файл

@ -15,4 +15,5 @@ headers += \
lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \
orted/pmix/pmix_server.c \ orted/pmix/pmix_server.c \
orted/pmix/pmix_server_connection.c \ orted/pmix/pmix_server_connection.c \
orted/pmix/pmix_server_sendrecv.c orted/pmix/pmix_server_sendrecv.c \
orted/pmix/pmix_server_db.c

408
orte/orted/pmix/pmix_server_db.c Обычный файл
Просмотреть файл

@ -0,0 +1,408 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <fcntl.h>
#include "opal_stdint.h"
#include "opal/types.h"
#include "opal/util/output.h"
#include "opal/util/error.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/mca/event/event.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "pmix_server_internal.h"
/* stuff proc attributes for sending back to a proc */
int pmix_server_fetch_proc_map(opal_buffer_t *reply,
orte_job_t *jdata,
orte_proc_t *proc)
{
char *tmp;
opal_value_t kv, *kp;
int rc;
orte_node_t *node;
orte_app_context_t *app;
orte_proc_t *pptr;
int i;
char **list;
orte_process_name_t name;
opal_buffer_t buf, buf2;
/* convenience def */
node = proc->node;
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx);
kp = &kv;
#if OPAL_HAVE_HWLOC
/* pass the local topology for the app so it doesn't
* have to discover it for itself */
if (NULL != opal_hwloc_topology) {
OBJ_CONSTRUCT(&buf, opal_buffer_t);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return rc;
}
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_TOPO);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
}
#endif /* OPAL_HAVE_HWLOC */
/* cpuset */
tmp = NULL;
if (orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_CPUSET);
kv.type = OPAL_STRING;
kv.data.string = tmp;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
}
/* jobid */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_JOBID);
kv.type = OPAL_UINT32;
kv.data.uint32 = proc->name.jobid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = proc->name.vpid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
/* offset */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NPROC_OFFSET);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->offset;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* pass a blob - for each proc in this job, include the info describing
* it so the recipient has a complete picture */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* jobid, for simplicity when unpacking */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_JOBID);
kv.type = OPAL_UINT32;
kv.data.uint32 = proc->name.jobid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
for (i=0; i < jdata->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
continue;
}
/* rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->name.vpid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* create the buffer for this rank */
OBJ_CONSTRUCT(&buf2, opal_buffer_t);
/* appnum */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_APPNUM);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->app_idx;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* global rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_GLOBAL_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->name.vpid + jdata->offset;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* app rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_APP_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->app_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* local rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_RANK);
kv.type = OPAL_UINT16;
kv.data.uint16 = pptr->local_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* node rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NODE_RANK);
kv.type = OPAL_UINT16;
kv.data.uint16 = pptr->node_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* node id */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NODE_ID);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->node->daemon->name.vpid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* add the rank's blob */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_PROC_MAP);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf2, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf2);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
}
/* now pass the blob as the proc-map key */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_PROC_MAP);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* construct the list of local peers */
list = NULL;
name.jobid = jdata->jobid;
name.vpid = 0;
OBJ_CONSTRUCT(&buf, opal_buffer_t);
for (i=0; i < node->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;
}
if (pptr->name.jobid == jdata->jobid) {
opal_argv_append_nosize(&list, ORTE_VPID_PRINT(pptr->name.vpid));
if (pptr->name.vpid < name.vpid) {
name.vpid = pptr->name.vpid;
}
/* note that we have to pass the cpuset for each local
* peer so locality can be computed */
tmp = NULL;
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
/* add the name of the proc */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &pptr->name, 1, OPAL_NAME))) {
ORTE_ERROR_LOG(rc);
free(tmp);
opal_argv_free(list);
return rc;
}
/* add its cpuset */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
free(tmp);
opal_argv_free(list);
return rc;
}
free(tmp);
}
}
}
/* pass the blob containing the cpusets for all local peers - note
* that the cpuset of the proc we are responding to will be included,
* so we don't need to send it separately */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_CPUSETS);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
opal_argv_free(list);
return rc;
}
OBJ_DESTRUCT(&kv);
/* construct the list of peers for transmission */
tmp = opal_argv_join(list, ',');
opal_argv_free(list);
/* pass the local ldr */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCALLDR);
kv.type = OPAL_NAME;
kv.data.name = name;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
free(tmp);
return rc;
}
OBJ_DESTRUCT(&kv);
/* pass the list of peers */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_PEERS);
kv.type = OPAL_STRING;
kv.data.string = tmp;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* app ldr */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_APPLDR);
kv.type = OPAL_UINT32;
kv.data.uint32 = app->first_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* univ size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_UNIV_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->num_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* job size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_JOB_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->num_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* local size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->num_local_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* node size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NODE_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = node->num_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* max procs */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_MAX_PROCS);
kv.type = OPAL_UINT32;
kv.data.uint16 = jdata->total_slots_alloc;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* local topology - we do this so the procs won't read the
* topology themselves as this could overwhelm the local
* system on large-scale SMPs */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -205,6 +205,9 @@ extern pmix_server_peer_t* pmix_server_peer_lookup(int sd);
extern void pmix_server_peer_dump(pmix_server_peer_t* peer, const char* msg); extern void pmix_server_peer_dump(pmix_server_peer_t* peer, const char* msg);
extern int pack_segment_info(opal_process_name_t id, opal_buffer_t *reply); extern int pack_segment_info(opal_process_name_t id, opal_buffer_t *reply);
extern int pmix_server_fetch_proc_map(opal_buffer_t *reply,
orte_job_t *jdata,
orte_proc_t *proc);
/* exposed shared variables */ /* exposed shared variables */
extern bool pmix_server_distribute_data; extern bool pmix_server_distribute_data;

Просмотреть файл

@ -321,365 +321,6 @@ static int read_bytes(pmix_server_peer_t* peer)
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* stuff proc attributes for sending back to a proc */
static int stuff_proc_values(opal_buffer_t *reply, orte_job_t *jdata, orte_proc_t *proc)
{
char *tmp;
opal_value_t kv, *kp;
int rc;
orte_node_t *node;
orte_app_context_t *app;
orte_proc_t *pptr;
int i;
char **list;
orte_process_name_t name;
opal_buffer_t buf, buf2;
/* convenience def */
node = proc->node;
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx);
kp = &kv;
#if OPAL_HAVE_HWLOC
/* pass the local topology for the app so it doesn't
* have to discover it for itself */
if (NULL != opal_hwloc_topology) {
OBJ_CONSTRUCT(&buf, opal_buffer_t);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return rc;
}
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_TOPO);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
}
#endif /* OPAL_HAVE_HWLOC */
/* cpuset */
tmp = NULL;
if (orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_CPUSET);
kv.type = OPAL_STRING;
kv.data.string = tmp;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
}
/* jobid */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_JOBID);
kv.type = OPAL_UINT32;
kv.data.uint32 = proc->name.jobid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = proc->name.vpid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
/* offset */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NPROC_OFFSET);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->offset;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* pass a blob - for each proc in this job, include the info describing
* it so the recipient has a complete picture */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* jobid, for simplicity when unpacking */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_JOBID);
kv.type = OPAL_UINT32;
kv.data.uint32 = proc->name.jobid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
for (i=0; i < jdata->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
continue;
}
/* rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->name.vpid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* create the buffer for this rank */
OBJ_CONSTRUCT(&buf2, opal_buffer_t);
/* appnum */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_APPNUM);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->app_idx;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* global rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_GLOBAL_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->name.vpid + jdata->offset;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* app rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_APP_RANK);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->app_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* local rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_RANK);
kv.type = OPAL_UINT16;
kv.data.uint16 = pptr->local_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* node rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NODE_RANK);
kv.type = OPAL_UINT16;
kv.data.uint16 = pptr->node_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* node id */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NODE_ID);
kv.type = OPAL_UINT32;
kv.data.uint32 = pptr->node->daemon->name.vpid;
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf2, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* add the rank's blob */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_PROC_MAP);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf2, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf2);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
}
/* now pass the blob as the proc-map key */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_PROC_MAP);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* construct the list of local peers */
list = NULL;
name.jobid = jdata->jobid;
name.vpid = 0;
OBJ_CONSTRUCT(&buf, opal_buffer_t);
for (i=0; i < node->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;
}
if (pptr->name.jobid == jdata->jobid) {
opal_argv_append_nosize(&list, ORTE_VPID_PRINT(pptr->name.vpid));
if (pptr->name.vpid < name.vpid) {
name.vpid = pptr->name.vpid;
}
/* note that we have to pass the cpuset for each local
* peer so locality can be computed */
tmp = NULL;
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
/* add the name of the proc */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &pptr->name, 1, OPAL_NAME))) {
ORTE_ERROR_LOG(rc);
free(tmp);
opal_argv_free(list);
return rc;
}
/* add its cpuset */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
free(tmp);
opal_argv_free(list);
return rc;
}
free(tmp);
}
}
}
/* pass the blob containing the cpusets for all local peers - note
* that the cpuset of the proc we are responding to will be included,
* so we don't need to send it separately */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_CPUSETS);
kv.type = OPAL_BYTE_OBJECT;
opal_dss.unload(&buf, (void**)&kv.data.bo.bytes, &kv.data.bo.size);
OBJ_DESTRUCT(&buf);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
opal_argv_free(list);
return rc;
}
OBJ_DESTRUCT(&kv);
/* construct the list of peers for transmission */
tmp = opal_argv_join(list, ',');
opal_argv_free(list);
/* pass the local ldr */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCALLDR);
kv.type = OPAL_NAME;
kv.data.name = name;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
free(tmp);
return rc;
}
OBJ_DESTRUCT(&kv);
/* pass the list of peers */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_PEERS);
kv.type = OPAL_STRING;
kv.data.string = tmp;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* app ldr */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_APPLDR);
kv.type = OPAL_UINT32;
kv.data.uint32 = app->first_rank;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* univ size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_UNIV_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->num_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* job size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_JOB_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->num_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* local size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_LOCAL_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = jdata->num_local_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* node size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_NODE_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = node->num_procs;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* max procs */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(PMIX_MAX_PROCS);
kv.type = OPAL_UINT32;
kv.data.uint16 = jdata->total_slots_alloc;
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &kp, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
return rc;
}
OBJ_DESTRUCT(&kv);
/* local topology - we do this so the procs won't read the
* topology themselves as this could overwhelm the local
* system on large-scale SMPs */
return ORTE_SUCCESS;
}
/* /*
* Dispatch to the appropriate action routine based on the state * Dispatch to the appropriate action routine based on the state
* of the connection with the peer. * of the connection with the peer.
@ -1610,7 +1251,7 @@ static void process_message(pmix_server_peer_t *peer)
/* mark the proc as having registered */ /* mark the proc as having registered */
ORTE_ACTIVATE_PROC_STATE(&proc->name, ORTE_PROC_STATE_REGISTERED); ORTE_ACTIVATE_PROC_STATE(&proc->name, ORTE_PROC_STATE_REGISTERED);
/* stuff the values corresponding to the list of supported attrs */ /* stuff the values corresponding to the list of supported attrs */
if (ORTE_SUCCESS != (ret = stuff_proc_values(&buf, jdata, proc))) { if (ORTE_SUCCESS != (ret = pmix_server_fetch_proc_map(&buf, jdata, proc))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&buf); OBJ_DESTRUCT(&buf);
OBJ_DESTRUCT(&xfer); OBJ_DESTRUCT(&xfer);