1
1

fixes for grpcomm rcd/brucks algorithms

Этот коммит содержится в:
Elena 2014-10-09 06:12:26 +02:00
родитель 9947758d98
Коммит e319c95267
11 изменённых файлов: 606 добавлений и 436 удалений

Просмотреть файл

@ -30,6 +30,7 @@
#include "orte_config.h" #include "orte_config.h"
#include "opal/class/opal_list.h" #include "opal/class/opal_list.h"
#include "opal/class/opal_hash_table.h"
#include "opal/dss/dss_types.h" #include "opal/dss/dss_types.h"
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/mca/hwloc/hwloc.h" #include "opal/mca/hwloc/hwloc.h"
@ -67,6 +68,7 @@ OBJ_CLASS_DECLARATION(orte_grpcomm_base_active_t);
typedef struct { typedef struct {
opal_list_t actives; opal_list_t actives;
opal_list_t ongoing; opal_list_t ongoing;
opal_hash_table_t sig_table;
} orte_grpcomm_base_t; } orte_grpcomm_base_t;
ORTE_DECLSPEC extern orte_grpcomm_base_t orte_grpcomm_base; ORTE_DECLSPEC extern orte_grpcomm_base_t orte_grpcomm_base;

Просмотреть файл

@ -70,6 +70,7 @@ static int orte_grpcomm_base_close(void)
} }
OPAL_LIST_DESTRUCT(&orte_grpcomm_base.actives); OPAL_LIST_DESTRUCT(&orte_grpcomm_base.actives);
OPAL_LIST_DESTRUCT(&orte_grpcomm_base.ongoing); OPAL_LIST_DESTRUCT(&orte_grpcomm_base.ongoing);
OBJ_DESTRUCT(&orte_grpcomm_base.sig_table);
return mca_base_framework_components_close(&orte_grpcomm_base_framework, NULL); return mca_base_framework_components_close(&orte_grpcomm_base_framework, NULL);
} }
@ -82,6 +83,8 @@ static int orte_grpcomm_base_open(mca_base_open_flag_t flags)
{ {
OBJ_CONSTRUCT(&orte_grpcomm_base.actives, opal_list_t); OBJ_CONSTRUCT(&orte_grpcomm_base.actives, opal_list_t);
OBJ_CONSTRUCT(&orte_grpcomm_base.ongoing, opal_list_t); OBJ_CONSTRUCT(&orte_grpcomm_base.ongoing, opal_list_t);
OBJ_CONSTRUCT(&orte_grpcomm_base.sig_table, opal_hash_table_t);
opal_hash_table_init(&orte_grpcomm_base.sig_table, 128);
return mca_base_framework_components_open(&orte_grpcomm_base_framework, flags); return mca_base_framework_components_open(&orte_grpcomm_base_framework, flags);
} }
@ -97,6 +100,7 @@ static void scon(orte_grpcomm_signature_t *p)
{ {
p->signature = NULL; p->signature = NULL;
p->sz = 0; p->sz = 0;
p->seq_num = 0;
} }
static void sdes(orte_grpcomm_signature_t *p) static void sdes(orte_grpcomm_signature_t *p)
{ {
@ -115,8 +119,10 @@ static void ccon(orte_grpcomm_coll_t *p)
p->dmns = NULL; p->dmns = NULL;
p->ndmns = 0; p->ndmns = 0;
p->nreported = 0; p->nreported = 0;
p->distance_mask_recv = 0;
p->cbfunc = NULL; p->cbfunc = NULL;
p->cbdata = NULL; p->cbdata = NULL;
p->buffers = NULL;
} }
static void cdes(orte_grpcomm_coll_t *p) static void cdes(orte_grpcomm_coll_t *p)
{ {
@ -127,6 +133,7 @@ static void cdes(orte_grpcomm_coll_t *p)
if (NULL != p->dmns) { if (NULL != p->dmns) {
free(p->dmns); free(p->dmns);
} }
free(p->buffers);
} }
OBJ_CLASS_INSTANCE(orte_grpcomm_coll_t, OBJ_CLASS_INSTANCE(orte_grpcomm_coll_t,
opal_list_item_t, opal_list_item_t,

Просмотреть файл

@ -128,9 +128,11 @@ int orte_grpcomm_API_xcast(orte_grpcomm_signature_t *sig,
static void allgather_stub(int fd, short args, void *cbdata) static void allgather_stub(int fd, short args, void *cbdata)
{ {
orte_grpcomm_caddy_t *cd = (orte_grpcomm_caddy_t*)cbdata; orte_grpcomm_caddy_t *cd = (orte_grpcomm_caddy_t*)cbdata;
int ret = OPAL_SUCCESS;
int rc; int rc;
orte_grpcomm_base_active_t *active; orte_grpcomm_base_active_t *active;
orte_grpcomm_coll_t *coll; orte_grpcomm_coll_t *coll;
void *seq_number;
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:allgather stub", "%s grpcomm:base:allgather stub",
@ -139,6 +141,28 @@ static void allgather_stub(int fd, short args, void *cbdata)
/* retrieve an existing tracker, create it if not /* retrieve an existing tracker, create it if not
* already found. The allgather module is responsible * already found. The allgather module is responsible
* for releasing it upon completion of the collective */ * for releasing it upon completion of the collective */
ret = opal_hash_table_get_value_ptr(&orte_grpcomm_base.sig_table, (void *)cd->sig->signature, cd->sig->sz * sizeof(orte_process_name_t), &seq_number);
if (OPAL_ERR_NOT_FOUND == ret) {
cd->sig->seq_num = 0;
} else if (OPAL_SUCCESS == ret) {
cd->sig->seq_num = *((uint32_t *)(seq_number)) + 1;
} else {
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"%s rpcomm:base:allgather can't not get signature from hash table",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(cd);
return;
}
ret = opal_hash_table_set_value_ptr(&orte_grpcomm_base.sig_table, (void *)cd->sig->signature, cd->sig->sz * sizeof(orte_process_name_t), (void *)&cd->sig->seq_num);
if (OPAL_SUCCESS != ret) {
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"%s rpcomm:base:allgather can't not add new signature to hash table",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(cd);
return;
}
coll = orte_grpcomm_base_get_tracker(cd->sig, true); coll = orte_grpcomm_base_get_tracker(cd->sig, true);
coll->cbfunc = cd->cbfunc; coll->cbfunc = cd->cbfunc;
coll->cbdata = cd->cbdata; coll->cbdata = cd->cbdata;
@ -169,9 +193,8 @@ int orte_grpcomm_API_allgather(orte_grpcomm_signature_t *sig,
* access framework-global data safely */ * access framework-global data safely */
cd = OBJ_NEW(orte_grpcomm_caddy_t); cd = OBJ_NEW(orte_grpcomm_caddy_t);
/* ensure the data doesn't go away */ /* ensure the data doesn't go away */
OBJ_RETAIN(sig);
OBJ_RETAIN(buf); OBJ_RETAIN(buf);
cd->sig = sig; opal_dss.copy((void **)&cd->sig, (void *)sig, ORTE_SIGNATURE);
cd->buf = buf; cd->buf = buf;
cd->cbfunc = cbfunc; cd->cbfunc = cbfunc;
cd->cbdata = cbdata; cd->cbdata = cbdata;
@ -197,7 +220,7 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
/* if only one is NULL, then we can't possibly match */ /* if only one is NULL, then we can't possibly match */
break; break;
} }
if (OPAL_EQUAL == opal_dss.compare(sig, coll->sig, ORTE_SIGNATURE)) { if (OPAL_EQUAL == (rc = opal_dss.compare(sig, coll->sig, ORTE_SIGNATURE))) {
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:returning existing collective", "%s grpcomm:base:returning existing collective",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -213,16 +236,17 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
return NULL; return NULL;
} }
coll = OBJ_NEW(orte_grpcomm_coll_t);
opal_dss.copy((void **)&coll->sig, (void *)sig, ORTE_SIGNATURE);
if (1 < opal_output_get_verbosity(orte_grpcomm_base_framework.framework_output)) { if (1 < opal_output_get_verbosity(orte_grpcomm_base_framework.framework_output)) {
char *tmp=NULL; char *tmp=NULL;
(void)opal_dss.print(&tmp, NULL, sig, ORTE_SIGNATURE); (void)opal_dss.print(&tmp, NULL, coll->sig, ORTE_SIGNATURE);
opal_output(0, "%s grpcomm:base: creating new coll for procs %s", opal_output(0, "%s grpcomm:base: creating new coll for procs %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
free(tmp); free(tmp);
} }
coll = OBJ_NEW(orte_grpcomm_coll_t);
OBJ_RETAIN(sig);
coll->sig = sig;
opal_list_append(&orte_grpcomm_base.ongoing, &coll->super); opal_list_append(&orte_grpcomm_base.ongoing, &coll->super);
/* now get the daemons involved */ /* now get the daemons involved */

Просмотреть файл

@ -6,6 +6,8 @@
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
* rights reserved. * rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -39,7 +41,8 @@ static int xcast(orte_vpid_t *vpids,
opal_buffer_t *msg); opal_buffer_t *msg);
static int allgather(orte_grpcomm_coll_t *coll, static int allgather(orte_grpcomm_coll_t *coll,
opal_buffer_t *buf); opal_buffer_t *buf);
static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t distance); static void brks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance);
static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance);
static void brks_allgather_recv_dist(int status, orte_process_name_t* sender, static void brks_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag, opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata); void* cbdata);
@ -91,65 +94,28 @@ static int allgather(orte_grpcomm_coll_t *coll,
"%s grpcomm:coll:bruck algo employed for %d processes", "%s grpcomm:coll:bruck algo employed for %d processes",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns));
/* if we only have one proc participating, just copy the data across and return */ /* start by seeding the collection with our own data */
if ((coll->ndmns != 0) && ((coll->ndmns & (coll->ndmns - 1)) == 0)) { opal_dss.copy_payload(&coll->bucket, sendbuf);
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:bruck number of participating daemons (%d) is power 2", /* record that we contributed */
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int) coll->ndmns )); coll->nreported += 1;
return ORTE_ERROR;
} /* mark local data received */
coll->distance_mask_recv |= 1;
/* start by seeding the collection with our own data */ /* start by seeding the collection with our own data */
opal_dss.copy_payload(&coll->bucket, sendbuf); opal_dss.copy_payload(&coll->bucket, sendbuf);
/* Communication step: /* process data */
At every step i, rank r: brks_allgather_process_data(coll, 1);
- doubles the distance
- sends message containing all data collected so far to rank r - distance
- receives message containing all data collected so far from rank (r + distance)
*/
/* find my position in the group of participants. This
* value is the "rank" we will use in the algo
*/
brks_allgather_send_dist(coll, 1);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t distance) { static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) {
orte_process_name_t peer_send, peer_recv;
opal_buffer_t *send_buf; opal_buffer_t *send_buf;
int rc; int rc;
peer_send.jobid = ORTE_PROC_MY_NAME->jobid;
peer_recv.jobid = ORTE_PROC_MY_NAME->jobid;
if (1 == coll->ndmns) {
peer_send.vpid = ORTE_PROC_MY_NAME->vpid;
peer_recv.vpid = ORTE_PROC_MY_NAME->vpid;
} else {
orte_vpid_t nv, rank;
rank = ORTE_VPID_INVALID;
for (nv = 0; nv < coll->ndmns; nv++) {
if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) {
rank = nv;
break;
}
}
/* check for bozo case */
if (ORTE_VPID_INVALID == rank) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* first send my current contents */
nv = (coll->ndmns + rank - distance) % coll->ndmns;
peer_send.vpid = coll->dmns[nv];
/* now setup to recv from my other partner */
nv = (rank + distance) % coll->ndmns;
peer_recv.vpid = coll->dmns[nv];
}
send_buf = OBJ_NEW(opal_buffer_t); send_buf = OBJ_NEW(opal_buffer_t);
/* pack the signature */ /* pack the signature */
@ -164,12 +130,6 @@ static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t dista
OBJ_RELEASE(send_buf); OBJ_RELEASE(send_buf);
return rc; return rc;
} }
/* pack the number of reported processes */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->nreported, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
/* pack the data */ /* pack the data */
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) { if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -178,11 +138,12 @@ static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t dista
} }
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:bruck sending to %s", "%s grpcomm:coll:brks SENDING TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer_send))); ORTE_NAME_PRINT(peer)));
if (0 > (rc = orte_rml.send_buffer_nb(&peer_send, send_buf,
if (0 > (rc = orte_rml.send_buffer_nb(peer, send_buf,
ORTE_RML_TAG_ALLGATHER_BRKS, ORTE_RML_TAG_ALLGATHER_BRKS,
orte_rml_send_callback, NULL))) { orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -190,27 +151,95 @@ static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t dista
return rc; return rc;
}; };
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:bruck receiving from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer_recv)));
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static void brks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) {
/* Communication step:
At every step i, rank r:
- doubles the distance
- sends message containing all data collected so far to rank r - distance
- receives message containing all data collected so far from rank (r + distance)
*/
orte_process_name_t peer;
orte_vpid_t nv, rank;
int rc;
peer.jobid = ORTE_PROC_MY_NAME->jobid;
/* get my own rank */
rank = ORTE_VPID_INVALID;
for (orte_vpid_t nv = 0; nv < coll->ndmns; nv++) {
if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) {
rank = nv;
break;
}
}
/* check for bozo case */
if (ORTE_VPID_INVALID == rank) {
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"Peer not found"));
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
brks_finalize_coll(coll, ORTE_ERR_NOT_FOUND);
return;
}
while (distance < coll->ndmns) {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brks process distance %u (mask recv: 0x%x)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance, coll->distance_mask_recv));
/* first send my current contents */
nv = (coll->ndmns + rank - distance) % coll->ndmns;
peer.vpid = coll->dmns[nv];
brks_allgather_send_dist(coll, &peer, distance);
/* check whether data for next distance is available*/
if ((NULL != coll->buffers) && (coll->buffers[distance - 1] != NULL)) {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brks %u distance data found",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, coll->buffers[distance - 1]))) {
ORTE_ERROR_LOG(rc);
brks_finalize_coll(coll, rc);
return;
}
coll->nreported += distance;
coll->distance_mask_recv |= (uint32_t)(1 << distance);
OBJ_RELEASE(coll->buffers[distance - 1]);
coll->buffers[distance - 1] = NULL;
distance = distance << 1;
continue;
}
break;
}
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brks reported %lu process from %lu",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported,
(unsigned long)coll->ndmns));
/* if we are done, then complete things */
if (coll->nreported >= coll->ndmns){
brks_finalize_coll(coll, ORTE_SUCCESS);
}
return;
}
static void brks_allgather_recv_dist(int status, orte_process_name_t* sender, static void brks_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag, opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata) void* cbdata)
{ {
int32_t cnt, num_remote; int32_t cnt;
int rc; int rc;
orte_grpcomm_signature_t *sig; orte_grpcomm_signature_t *sig;
orte_grpcomm_coll_t *coll; orte_grpcomm_coll_t *coll;
orte_vpid_t distance, new_distance; uint32_t distance;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub received data", "%s grpcomm:coll:brks RECEIVING FROM %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* unpack the signature */ /* unpack the signature */
cnt = 1; cnt = 1;
@ -225,7 +254,6 @@ static void brks_allgather_recv_dist(int status, orte_process_name_t* sender,
OBJ_RELEASE(sig); OBJ_RELEASE(sig);
return; return;
} }
/* unpack the distance */ /* unpack the distance */
distance = 1; distance = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) { if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) {
@ -234,31 +262,51 @@ static void brks_allgather_recv_dist(int status, orte_process_name_t* sender,
brks_finalize_coll(coll, rc); brks_finalize_coll(coll, rc);
return; return;
} }
assert(0 == (coll->distance_mask_recv & (uint32_t)(1 << distance)));
/* unpack number of reported processes */ /* Check whether we can process next distance */
num_remote = 0; if (coll->distance_mask_recv & ((uint32_t)(1 << (distance >> 1)))) {
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &num_remote, &cnt, OPAL_INT32))) { OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
OBJ_RELEASE(sig); "%s grpcomm:coll:brks data from %d distance received, "
ORTE_ERROR_LOG(rc); "Process the next distance (mask recv: 0x%x).",
brks_finalize_coll(coll, rc); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance, coll->distance_mask_recv));
return; /* capture any provided content */
} if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) {
coll->nreported += num_remote; OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
/* capture any provided content */ brks_finalize_coll(coll, rc);
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { return;
OBJ_RELEASE(sig); }
ORTE_ERROR_LOG(rc); coll->nreported += distance;
brks_finalize_coll(coll, rc); coll->distance_mask_recv |= (uint32_t)(1 << distance);
return; brks_allgather_process_data(coll, (uint32_t)(distance << 1));
}
//update distance and send
new_distance = distance <<= 1;
if (new_distance < coll->ndmns) {
brks_allgather_send_dist(coll, new_distance);
} else { } else {
brks_finalize_coll(coll, ORTE_SUCCESS); OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brks data from %d distance received, "
"still waiting for data (mask recv: 0x%x).",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance, coll->distance_mask_recv));
if (NULL == coll->buffers) {
if (NULL == (coll->buffers = (opal_buffer_t **)calloc(sizeof(opal_buffer_t *), coll->ndmns - 1))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brks_finalize_coll(coll, rc);
return;
}
}
if (NULL == (coll->buffers[distance - 1] = OBJ_NEW(opal_buffer_t))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brks_finalize_coll(coll, rc);
return;
}
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance - 1], buffer))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
brks_finalize_coll(coll, rc);
return;
}
} }
OBJ_RELEASE(sig); OBJ_RELEASE(sig);
@ -270,6 +318,10 @@ static int brks_finalize_coll(orte_grpcomm_coll_t *coll, int ret) {
opal_buffer_t *reply; opal_buffer_t *reply;
int rc; int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:brks declared collective complete",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
reply = OBJ_NEW(opal_buffer_t); reply = OBJ_NEW(opal_buffer_t);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &coll->nreported, 1, OPAL_UINT64))) { if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &coll->nreported, 1, OPAL_UINT64))) {

Просмотреть файл

@ -57,6 +57,7 @@ typedef struct {
opal_object_t super; opal_object_t super;
orte_process_name_t *signature; orte_process_name_t *signature;
size_t sz; size_t sz;
uint32_t seq_num;
} orte_grpcomm_signature_t; } orte_grpcomm_signature_t;
OBJ_CLASS_DECLARATION(orte_grpcomm_signature_t); OBJ_CLASS_DECLARATION(orte_grpcomm_signature_t);
@ -73,6 +74,10 @@ typedef struct {
size_t ndmns; size_t ndmns;
/* number reported in */ /* number reported in */
size_t nreported; size_t nreported;
/* distance masks for receive */
uint32_t distance_mask_recv;
/* received buckets */
opal_buffer_t ** buffers;
/* callback function */ /* callback function */
orte_grpcomm_cbfunc_t cbfunc; orte_grpcomm_cbfunc_t cbfunc;
/* user-provided callback data */ /* user-provided callback data */

Просмотреть файл

@ -6,6 +6,8 @@
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
* rights reserved. * rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -18,6 +20,7 @@
#include "orte/types.h" #include "orte/types.h"
#include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_wait.h"
#include <math.h>
#include <string.h> #include <string.h>
#include "opal/dss/dss.h" #include "opal/dss/dss.h"
@ -39,11 +42,13 @@ static int xcast(orte_vpid_t *vpids,
opal_buffer_t *msg); opal_buffer_t *msg);
static int allgather(orte_grpcomm_coll_t *coll, static int allgather(orte_grpcomm_coll_t *coll,
opal_buffer_t *buf); opal_buffer_t *buf);
static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t distance); static void rcd_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance);
static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance);
static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag, opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata); void* cbdata);
static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret); static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret);
/* Module def */ /* Module def */
orte_grpcomm_base_module_t orte_grpcomm_rcd_module = { orte_grpcomm_base_module_t orte_grpcomm_rcd_module = {
init, init,
@ -85,58 +90,35 @@ static int xcast(orte_vpid_t *vpids,
static int allgather(orte_grpcomm_coll_t *coll, static int allgather(orte_grpcomm_coll_t *coll,
opal_buffer_t *sendbuf) opal_buffer_t *sendbuf)
{ {
/* check the number of involved daemons - if it is not a power of two,
* then we cannot do it */
if (0 == ((coll->ndmns != 0) && !(coll->ndmns & (coll->ndmns - 1)))) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub algo employed for %d processes", "%s grpcomm:coll:recdub algo employed for %d daemons",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns));
/* if we only have one proc participating, just copy the data across and return */ /* record that we contributed */
if (!((coll->ndmns != 0) && ((coll->ndmns & (coll->ndmns - 1)) == 0))) { coll->nreported += 1;
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub number of participating daemons (%d) is not power 2", /* mark local data received */
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns )); coll->distance_mask_recv |= 1;
return ORTE_ERROR;
}
/* start by seeding the collection with our own data */ /* start by seeding the collection with our own data */
opal_dss.copy_payload(&coll->bucket, sendbuf); opal_dss.copy_payload(&coll->bucket, sendbuf);
/* Communication step: /* process data */
At every step i, rank r: rcd_allgather_process_data(coll, 1);
- exchanges message containing all data collected so far with rank peer = (r ^ 2^i).
*/
rcd_allgather_send_dist(coll, 1);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t distance) { static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) {
orte_process_name_t peer;
opal_buffer_t *send_buf; opal_buffer_t *send_buf;
int rc; int rc;
peer.jobid = ORTE_PROC_MY_NAME->jobid;
if (1 == coll->ndmns) {
peer.vpid = ORTE_PROC_MY_NAME->vpid;
} else {
orte_vpid_t nv, rank;
rank = ORTE_VPID_INVALID;
for (nv = 0; nv < coll->ndmns; nv++) {
if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) {
rank = nv;
break;
}
}
/* check for bozo case */
if (ORTE_VPID_INVALID == rank) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* first send my current contents */
nv = rank ^ distance;
peer.vpid = coll->dmns[nv];
}
send_buf = OBJ_NEW(opal_buffer_t); send_buf = OBJ_NEW(opal_buffer_t);
/* pack the signature */ /* pack the signature */
@ -145,14 +127,8 @@ static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t distan
OBJ_RELEASE(send_buf); OBJ_RELEASE(send_buf);
return rc; return rc;
} }
/* pack the current distance */ /* pack the distance */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_INT32))) { if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf);
return rc;
}
/* pack the number of reported processes */
if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->nreported, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf); OBJ_RELEASE(send_buf);
return rc; return rc;
@ -165,40 +141,106 @@ static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_vpid_t distan
} }
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub sending to %s", "%s grpcomm:coll:recdub SENDING TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer))); ORTE_NAME_PRINT(peer)));
if (0 > (rc = orte_rml.send_buffer_nb(peer, send_buf,
if (0 > (rc = orte_rml.send_buffer_nb(&peer, send_buf,
ORTE_RML_TAG_ALLGATHER_RCD, ORTE_RML_TAG_ALLGATHER_RCD,
orte_rml_send_callback, NULL))) { orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(send_buf); OBJ_RELEASE(send_buf);
return rc; return rc;
}; };
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub receiving from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer)));
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static void rcd_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) {
/* Communication step:
At every step i, rank r:
- exchanges message containing all data collected so far with rank peer = (r ^ 2^i).
*/
orte_process_name_t peer;
orte_vpid_t nv, rank;
uint32_t distance_index;
int rc;
peer.jobid = ORTE_PROC_MY_NAME->jobid;
/* get my own rank */
rank = ORTE_VPID_INVALID;
for (orte_vpid_t nv = 0; nv < coll->ndmns; nv++) {
if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) {
rank = nv;
break;
}
}
/* check for bozo case */
if (ORTE_VPID_INVALID == rank) {
OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output,
"Peer not found"));
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rcd_finalize_coll(coll, ORTE_ERR_NOT_FOUND);
return;
}
while(distance < coll->ndmns) {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub process distance %u (mask recv: 0x%x)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance, coll->distance_mask_recv));
/* first send my current contents */
nv = rank ^ distance;
peer.vpid = coll->dmns[nv];
rcd_allgather_send_dist(coll, &peer, distance);
/* check whether data for next distance is available*/
distance_index = log2(distance);
if ((NULL != coll->buffers) && (NULL != coll->buffers[distance_index])) {
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub %u distance data found",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance));
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, coll->buffers[distance_index]))) {
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
coll->nreported += distance;
coll->distance_mask_recv |= (uint32_t)(1 << distance);
OBJ_RELEASE(coll->buffers[distance_index]);
coll->buffers[distance_index] = NULL;
distance = distance << 1;
continue;
}
break;
}
OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub reported %lu process from %lu",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported,
(unsigned long)coll->ndmns));
/* if we are done, then complete things */
if (coll->nreported >= coll->ndmns){
rcd_finalize_coll(coll, ORTE_SUCCESS);
}
return;
}
static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag, opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata) void* cbdata)
{ {
int32_t cnt, num_remote; int32_t cnt;
uint32_t distance, distance_index;
int rc; int rc;
orte_grpcomm_signature_t *sig; orte_grpcomm_signature_t *sig;
orte_grpcomm_coll_t *coll; orte_grpcomm_coll_t *coll;
orte_vpid_t distance, new_distance;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub received data", "%s grpcomm:coll:recdub RECEIVING FROM %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* unpack the signature */ /* unpack the signature */
cnt = 1; cnt = 1;
@ -213,40 +255,60 @@ static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender,
OBJ_RELEASE(sig); OBJ_RELEASE(sig);
return; return;
} }
/* unpack the distance */ /* unpack the distance */
distance = 1; distance = 0;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) { if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_UINT32))) {
OBJ_RELEASE(sig); OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc); rcd_finalize_coll(coll, rc);
return; return;
} }
assert(0 == (coll->distance_mask_recv & (uint32_t)(1 << distance)));
/* unpack number of reported */ /* Check whether we can process next distance */
num_remote = 0; if (coll->distance_mask_recv & ((uint32_t)(1 << (distance >> 1)))) {
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &num_remote, &cnt, OPAL_INT32))) { OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
OBJ_RELEASE(sig); "%s grpcomm:coll:recdub data from %d distance received, "
ORTE_ERROR_LOG(rc); "Process the next distance (mask recv: 0x%x).",
rcd_finalize_coll(coll, rc); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance, coll->distance_mask_recv));
return; /* capture any provided content */
} if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) {
coll->nreported += num_remote; OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
/* capture any provided content */ rcd_finalize_coll(coll, rc);
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { return;
OBJ_RELEASE(sig); }
ORTE_ERROR_LOG(rc); coll->nreported += distance;
rcd_finalize_coll(coll, rc); coll->distance_mask_recv |= (uint32_t)(1 << distance);
return; rcd_allgather_process_data(coll, (uint32_t)(distance << 1));
}
//update distance and send
new_distance = distance <<= 1;
if (new_distance < coll->ndmns) {
rcd_allgather_send_dist(coll, new_distance);
} else { } else {
rcd_finalize_coll(coll, ORTE_SUCCESS); OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub data from %d distance received, "
"still waiting for data (mask recv: 0x%x).",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance, coll->distance_mask_recv));
if (NULL == coll->buffers) {
if (NULL == (coll->buffers = (opal_buffer_t **)calloc(sizeof(opal_buffer_t *), log2(coll->ndmns)))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
}
distance_index = log2(distance);
if (NULL == (coll->buffers[distance_index] = OBJ_NEW(opal_buffer_t))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance_index], buffer))) {
OBJ_RELEASE(sig);
ORTE_ERROR_LOG(rc);
rcd_finalize_coll(coll, rc);
return;
}
} }
OBJ_RELEASE(sig); OBJ_RELEASE(sig);
@ -258,6 +320,10 @@ static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret) {
opal_buffer_t *reply; opal_buffer_t *reply;
int rc; int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:coll:recdub declared collective complete",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
reply = OBJ_NEW(opal_buffer_t); reply = OBJ_NEW(opal_buffer_t);
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &coll->nreported, 1, OPAL_UINT64))) { if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &coll->nreported, 1, OPAL_UINT64))) {

Просмотреть файл

@ -49,14 +49,14 @@ int orte_dt_compare_name(orte_process_name_t *value1,
} else if (NULL == value2) { } else if (NULL == value2) {
return OPAL_VALUE1_GREATER; return OPAL_VALUE1_GREATER;
} }
/* If any of the fields are wildcard, /* If any of the fields are wildcard,
* then we want to just ignore that one field. In the case * then we want to just ignore that one field. In the case
* of ORTE_NAME_WILDCARD (where ALL of the fields are wildcard), this * of ORTE_NAME_WILDCARD (where ALL of the fields are wildcard), this
* will automatically result in OPAL_EQUAL for any name in the other * will automatically result in OPAL_EQUAL for any name in the other
* value - a totally useless result, but consistent in behavior. * value - a totally useless result, but consistent in behavior.
*/ */
/** check the jobids - if one of them is WILDCARD, then ignore /** check the jobids - if one of them is WILDCARD, then ignore
* this field since anything is okay * this field since anything is okay
*/ */
@ -68,7 +68,7 @@ int orte_dt_compare_name(orte_process_name_t *value1,
return OPAL_VALUE1_GREATER; return OPAL_VALUE1_GREATER;
} }
} }
/** check the vpids - if one of them is WILDCARD, then ignore /** check the vpids - if one of them is WILDCARD, then ignore
* this field since anything is okay * this field since anything is okay
*/ */
@ -92,11 +92,11 @@ int orte_dt_compare_vpid(orte_vpid_t *value1,
/** if either value is WILDCARD, then return equal */ /** if either value is WILDCARD, then return equal */
if (*value1 == ORTE_VPID_WILDCARD || if (*value1 == ORTE_VPID_WILDCARD ||
*value2 == ORTE_VPID_WILDCARD) return OPAL_EQUAL; *value2 == ORTE_VPID_WILDCARD) return OPAL_EQUAL;
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -107,11 +107,11 @@ int orte_dt_compare_jobid(orte_jobid_t *value1,
/** if either value is WILDCARD, then return equal */ /** if either value is WILDCARD, then return equal */
if (*value1 == ORTE_JOBID_WILDCARD || if (*value1 == ORTE_JOBID_WILDCARD ||
*value2 == ORTE_JOBID_WILDCARD) return OPAL_EQUAL; *value2 == ORTE_JOBID_WILDCARD) return OPAL_EQUAL;
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -133,12 +133,12 @@ int orte_dt_compare_job(orte_job_t *value1, orte_job_t *value2, opal_data_type_t
int orte_dt_compare_node(orte_node_t *value1, orte_node_t *value2, opal_data_type_t type) int orte_dt_compare_node(orte_node_t *value1, orte_node_t *value2, opal_data_type_t type)
{ {
int test; int test;
/** check node names */ /** check node names */
test = strcmp(value1->name, value2->name); test = strcmp(value1->name, value2->name);
if (0 == test) return OPAL_EQUAL; if (0 == test) return OPAL_EQUAL;
if (0 < test) return OPAL_VALUE2_GREATER; if (0 < test) return OPAL_VALUE2_GREATER;
return OPAL_VALUE1_GREATER; return OPAL_VALUE1_GREATER;
} }
@ -162,7 +162,7 @@ int orte_dt_compare_app_context(orte_app_context_t *value1, orte_app_context_t *
{ {
if (value1->idx > value2->idx) return OPAL_VALUE1_GREATER; if (value1->idx > value2->idx) return OPAL_VALUE1_GREATER;
if (value2->idx > value1->idx) return OPAL_VALUE2_GREATER; if (value2->idx > value1->idx) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -174,9 +174,9 @@ int orte_dt_compare_exit_code(orte_exit_code_t *value1,
opal_data_type_t type) opal_data_type_t type)
{ {
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -188,9 +188,9 @@ int orte_dt_compare_node_state(orte_node_state_t *value1,
orte_node_state_t type) orte_node_state_t type)
{ {
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -202,9 +202,9 @@ int orte_dt_compare_proc_state(orte_proc_state_t *value1,
orte_proc_state_t type) orte_proc_state_t type)
{ {
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -216,9 +216,9 @@ int orte_dt_compare_job_state(orte_job_state_t *value1,
orte_job_state_t type) orte_job_state_t type)
{ {
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -248,9 +248,9 @@ int orte_dt_compare_tags(orte_rml_tag_t *value1, orte_rml_tag_t *value2, opal_da
int orte_dt_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, opal_data_type_t type) int orte_dt_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, opal_data_type_t type)
{ {
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -258,9 +258,9 @@ int orte_dt_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_f
int orte_dt_compare_iof_tag(orte_iof_tag_t *value1, orte_iof_tag_t *value2, opal_data_type_t type) int orte_dt_compare_iof_tag(orte_iof_tag_t *value1, orte_iof_tag_t *value2, opal_data_type_t type)
{ {
if (*value1 > *value2) return OPAL_VALUE1_GREATER; if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER; if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL; return OPAL_EQUAL;
} }
@ -285,11 +285,16 @@ int orte_dt_compare_sig(orte_grpcomm_signature_t *value1, orte_grpcomm_signature
} }
if (value2->sz > value1->sz) { if (value2->sz > value1->sz) {
return OPAL_VALUE2_GREATER; return OPAL_VALUE2_GREATER;
}
if (value1->seq_num > value2->seq_num) {
return OPAL_VALUE1_GREATER;
}
if (value2->seq_num > value1->seq_num) {
return OPAL_VALUE2_GREATER;
} }
/* same size - check contents */ /* same size - check contents */
if (0 == memcmp(value1->signature, value2->signature, value1->sz*sizeof(orte_process_name_t))) { if (0 == memcmp(value1->signature, value2->signature, value1->sz*sizeof(orte_process_name_t))) {
return OPAL_EQUAL; return OPAL_EQUAL;
} }
return OPAL_VALUE2_GREATER; return OPAL_VALUE2_GREATER;
} }

Просмотреть файл

@ -36,19 +36,19 @@
#include "orte/runtime/data_type_support/orte_dt_support.h" #include "orte/runtime/data_type_support/orte_dt_support.h"
/* ORTE_STD_CNTR */ /* ORTE_STD_CNTR */
int orte_dt_copy_std_cntr(orte_std_cntr_t **dest, orte_std_cntr_t *src, opal_data_type_t type) int orte_dt_copy_std_cntr(orte_std_cntr_t **dest, orte_std_cntr_t *src, opal_data_type_t type)
{ {
orte_std_cntr_t *val; orte_std_cntr_t *val;
val = (orte_std_cntr_t*)malloc(sizeof(orte_std_cntr_t)); val = (orte_std_cntr_t*)malloc(sizeof(orte_std_cntr_t));
if (NULL == val) { if (NULL == val) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
*val = *src; *val = *src;
*dest = val; *dest = val;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -56,16 +56,16 @@ int orte_dt_copy_std_cntr(orte_std_cntr_t **dest, orte_std_cntr_t *src, opal_dat
int orte_dt_copy_name(orte_process_name_t **dest, orte_process_name_t *src, opal_data_type_t type) int orte_dt_copy_name(orte_process_name_t **dest, orte_process_name_t *src, opal_data_type_t type)
{ {
orte_process_name_t *val; orte_process_name_t *val;
val = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); val = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
if (NULL == val) { if (NULL == val) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
val->jobid = src->jobid; val->jobid = src->jobid;
val->vpid = src->vpid; val->vpid = src->vpid;
*dest = val; *dest = val;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -76,16 +76,16 @@ int orte_dt_copy_name(orte_process_name_t **dest, orte_process_name_t *src, opal
int orte_dt_copy_jobid(orte_jobid_t **dest, orte_jobid_t *src, opal_data_type_t type) int orte_dt_copy_jobid(orte_jobid_t **dest, orte_jobid_t *src, opal_data_type_t type)
{ {
orte_jobid_t *val; orte_jobid_t *val;
val = (orte_jobid_t*)malloc(sizeof(orte_jobid_t)); val = (orte_jobid_t*)malloc(sizeof(orte_jobid_t));
if (NULL == val) { if (NULL == val) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
*val = *src; *val = *src;
*dest = val; *dest = val;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -95,16 +95,16 @@ int orte_dt_copy_jobid(orte_jobid_t **dest, orte_jobid_t *src, opal_data_type_t
int orte_dt_copy_vpid(orte_vpid_t **dest, orte_vpid_t *src, opal_data_type_t type) int orte_dt_copy_vpid(orte_vpid_t **dest, orte_vpid_t *src, opal_data_type_t type)
{ {
orte_vpid_t *val; orte_vpid_t *val;
val = (orte_vpid_t*)malloc(sizeof(orte_vpid_t)); val = (orte_vpid_t*)malloc(sizeof(orte_vpid_t));
if (NULL == val) { if (NULL == val) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
*val = *src; *val = *src;
*dest = val; *dest = val;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -115,7 +115,7 @@ int orte_dt_copy_job(orte_job_t **dest, orte_job_t *src, opal_data_type_t type)
{ {
(*dest) = src; (*dest) = src;
OBJ_RETAIN(src); OBJ_RETAIN(src);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -152,7 +152,7 @@ int orte_dt_copy_app_context(orte_app_context_t **dest, orte_app_context_t *src,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* copy data into it */ /* copy data into it */
(*dest)->idx = src->idx; (*dest)->idx = src->idx;
if (NULL != src->app) { if (NULL != src->app) {
@ -176,64 +176,64 @@ int orte_dt_copy_app_context(orte_app_context_t **dest, orte_app_context_t *src,
int orte_dt_copy_proc_state(orte_proc_state_t **dest, orte_proc_state_t *src, opal_data_type_t type) int orte_dt_copy_proc_state(orte_proc_state_t **dest, orte_proc_state_t *src, opal_data_type_t type)
{ {
orte_proc_state_t *ps; orte_proc_state_t *ps;
ps = (orte_proc_state_t*)malloc(sizeof(orte_proc_state_t)); ps = (orte_proc_state_t*)malloc(sizeof(orte_proc_state_t));
if (NULL == ps) { if (NULL == ps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
*ps = *src; *ps = *src;
*dest = ps; *dest = ps;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_dt_copy_job_state(orte_job_state_t **dest, orte_job_state_t *src, opal_data_type_t type) int orte_dt_copy_job_state(orte_job_state_t **dest, orte_job_state_t *src, opal_data_type_t type)
{ {
orte_job_state_t *ps; orte_job_state_t *ps;
ps = (orte_job_state_t*)malloc(sizeof(orte_job_state_t)); ps = (orte_job_state_t*)malloc(sizeof(orte_job_state_t));
if (NULL == ps) { if (NULL == ps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
*ps = *src; *ps = *src;
*dest = ps; *dest = ps;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_dt_copy_node_state(orte_node_state_t **dest, orte_node_state_t *src, opal_data_type_t type) int orte_dt_copy_node_state(orte_node_state_t **dest, orte_node_state_t *src, opal_data_type_t type)
{ {
orte_node_state_t *ps; orte_node_state_t *ps;
ps = (orte_node_state_t*)malloc(sizeof(orte_node_state_t)); ps = (orte_node_state_t*)malloc(sizeof(orte_node_state_t));
if (NULL == ps) { if (NULL == ps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
*ps = *src; *ps = *src;
*dest = ps; *dest = ps;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_dt_copy_exit_code(orte_exit_code_t **dest, orte_exit_code_t *src, opal_data_type_t type) int orte_dt_copy_exit_code(orte_exit_code_t **dest, orte_exit_code_t *src, opal_data_type_t type)
{ {
orte_exit_code_t *ps; orte_exit_code_t *ps;
ps = (orte_exit_code_t*)malloc(sizeof(orte_exit_code_t)); ps = (orte_exit_code_t*)malloc(sizeof(orte_exit_code_t));
if (NULL == ps) { if (NULL == ps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
*ps = *src; *ps = *src;
*dest = ps; *dest = ps;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -243,19 +243,19 @@ int orte_dt_copy_exit_code(orte_exit_code_t **dest, orte_exit_code_t *src, opal_
int orte_dt_copy_map(orte_job_map_t **dest, orte_job_map_t *src, opal_data_type_t type) int orte_dt_copy_map(orte_job_map_t **dest, orte_job_map_t *src, opal_data_type_t type)
{ {
orte_std_cntr_t i; orte_std_cntr_t i;
if (NULL == src) { if (NULL == src) {
*dest = NULL; *dest = NULL;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* create the new object */ /* create the new object */
*dest = OBJ_NEW(orte_job_map_t); *dest = OBJ_NEW(orte_job_map_t);
if (NULL == *dest) { if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* copy data into it */ /* copy data into it */
(*dest)->mapping = src->mapping; (*dest)->mapping = src->mapping;
(*dest)->ranking = src->ranking; (*dest)->ranking = src->ranking;
@ -269,7 +269,7 @@ int orte_dt_copy_map(orte_job_map_t **dest, orte_job_map_t *src, opal_data_type_
(*dest)->num_new_daemons = src->num_new_daemons; (*dest)->num_new_daemons = src->num_new_daemons;
(*dest)->daemon_vpid_start = src->daemon_vpid_start; (*dest)->daemon_vpid_start = src->daemon_vpid_start;
(*dest)->num_nodes = src->num_nodes; (*dest)->num_nodes = src->num_nodes;
/* copy the pointer array - have to do this manually /* copy the pointer array - have to do this manually
* as no dss.copy function is setup for that object * as no dss.copy function is setup for that object
*/ */
@ -281,7 +281,7 @@ int orte_dt_copy_map(orte_job_map_t **dest, orte_job_map_t *src, opal_data_type_
for (i=0; i < src->nodes->size; i++) { for (i=0; i < src->nodes->size; i++) {
(*dest)->nodes->addr[i] = src->nodes->addr[i]; (*dest)->nodes->addr[i] = src->nodes->addr[i];
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -291,57 +291,57 @@ int orte_dt_copy_map(orte_job_map_t **dest, orte_job_map_t *src, opal_data_type_
int orte_dt_copy_tag(orte_rml_tag_t **dest, orte_rml_tag_t *src, opal_data_type_t type) int orte_dt_copy_tag(orte_rml_tag_t **dest, orte_rml_tag_t *src, opal_data_type_t type)
{ {
orte_rml_tag_t *tag; orte_rml_tag_t *tag;
if (NULL == src) { if (NULL == src) {
*dest = NULL; *dest = NULL;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* create the new space */ /* create the new space */
tag = (orte_rml_tag_t*)malloc(sizeof(orte_rml_tag_t)); tag = (orte_rml_tag_t*)malloc(sizeof(orte_rml_tag_t));
if (NULL == tag) { if (NULL == tag) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* copy data into it */ /* copy data into it */
*tag = *src; *tag = *src;
*dest = tag; *dest = tag;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_dt_copy_daemon_cmd(orte_daemon_cmd_flag_t **dest, orte_daemon_cmd_flag_t *src, opal_data_type_t type) int orte_dt_copy_daemon_cmd(orte_daemon_cmd_flag_t **dest, orte_daemon_cmd_flag_t *src, opal_data_type_t type)
{ {
size_t datasize; size_t datasize;
datasize = sizeof(orte_daemon_cmd_flag_t); datasize = sizeof(orte_daemon_cmd_flag_t);
*dest = (orte_daemon_cmd_flag_t*)malloc(datasize); *dest = (orte_daemon_cmd_flag_t*)malloc(datasize);
if (NULL == *dest) { if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
memcpy(*dest, src, datasize); memcpy(*dest, src, datasize);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_dt_copy_iof_tag(orte_iof_tag_t **dest, orte_iof_tag_t *src, opal_data_type_t type) int orte_dt_copy_iof_tag(orte_iof_tag_t **dest, orte_iof_tag_t *src, opal_data_type_t type)
{ {
size_t datasize; size_t datasize;
datasize = sizeof(orte_iof_tag_t); datasize = sizeof(orte_iof_tag_t);
*dest = (orte_iof_tag_t*)malloc(datasize); *dest = (orte_iof_tag_t*)malloc(datasize);
if (NULL == *dest) { if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
memcpy(*dest, src, datasize); memcpy(*dest, src, datasize);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -356,7 +356,7 @@ int orte_dt_copy_attr(orte_attribute_t **dest, orte_attribute_t *src, opal_data_
(*dest)->type = src->type; (*dest)->type = src->type;
memcpy(&(*dest)->data, &src->data, sizeof(src->data)); memcpy(&(*dest)->data, &src->data, sizeof(src->data));
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -369,12 +369,12 @@ int orte_dt_copy_sig(orte_grpcomm_signature_t **dest, orte_grpcomm_signature_t *
} }
(*dest)->sz = src->sz; (*dest)->sz = src->sz;
(*dest)->signature = (orte_process_name_t*)malloc(src->sz * sizeof(orte_process_name_t)); (*dest)->signature = (orte_process_name_t*)malloc(src->sz * sizeof(orte_process_name_t));
(*dest)->seq_num = src->seq_num;
if (NULL == (*dest)->signature) { if (NULL == (*dest)->signature) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(*dest); OBJ_RELEASE(*dest);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
memcpy(&(*dest)->signature, &src->signature, src->sz * sizeof(orte_process_name_t)); memcpy((*dest)->signature, src->signature, src->sz * sizeof(orte_process_name_t));
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -43,13 +43,13 @@ int orte_dt_pack_std_cntr(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and pack the real type */ /* Turn around and pack the real type */
if (ORTE_SUCCESS != ( if (ORTE_SUCCESS != (
ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_STD_CNTR_T))) { ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_STD_CNTR_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -64,7 +64,7 @@ int orte_dt_pack_name(opal_buffer_t *buffer, const void *src,
orte_process_name_t* proc; orte_process_name_t* proc;
orte_jobid_t *jobid; orte_jobid_t *jobid;
orte_vpid_t *vpid; orte_vpid_t *vpid;
/* collect all the jobids in a contiguous array */ /* collect all the jobids in a contiguous array */
jobid = (orte_jobid_t*)malloc(num_vals * sizeof(orte_jobid_t)); jobid = (orte_jobid_t*)malloc(num_vals * sizeof(orte_jobid_t));
if (NULL == jobid) { if (NULL == jobid) {
@ -84,7 +84,7 @@ int orte_dt_pack_name(opal_buffer_t *buffer, const void *src,
return rc; return rc;
} }
free(jobid); free(jobid);
/* collect all the vpids in a contiguous array */ /* collect all the vpids in a contiguous array */
vpid = (orte_vpid_t*)malloc(num_vals * sizeof(orte_vpid_t)); vpid = (orte_vpid_t*)malloc(num_vals * sizeof(orte_vpid_t));
if (NULL == vpid) { if (NULL == vpid) {
@ -115,13 +115,13 @@ int orte_dt_pack_jobid(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and pack the real type */ /* Turn around and pack the real type */
if (ORTE_SUCCESS != ( if (ORTE_SUCCESS != (
ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_JOBID_T))) { ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_JOBID_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -132,13 +132,13 @@ int orte_dt_pack_vpid(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and pack the real type */ /* Turn around and pack the real type */
if (ORTE_SUCCESS != ( if (ORTE_SUCCESS != (
ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_VPID_T))) { ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_VPID_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -176,7 +176,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* if there are apps, pack the app_contexts */ /* if there are apps, pack the app_contexts */
if (0 < jobs[i]->num_apps) { if (0 < jobs[i]->num_apps) {
for (j=0; j < jobs[i]->apps->size; j++) { for (j=0; j < jobs[i]->apps->size; j++) {
@ -189,7 +189,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
} }
} }
} }
/* pack the number of procs and offset */ /* pack the number of procs and offset */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->num_procs)), 1, ORTE_VPID))) { (void*)(&(jobs[i]->num_procs)), 1, ORTE_VPID))) {
@ -220,14 +220,14 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the total slots allocated to the job */ /* pack the total slots allocated to the job */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->total_slots_alloc)), 1, ORTE_STD_CNTR))) { (void*)(&(jobs[i]->total_slots_alloc)), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* if the map is NULL, then we cannot pack it as there is /* if the map is NULL, then we cannot pack it as there is
* nothing to pack. However, we have to flag whether or not * nothing to pack. However, we have to flag whether or not
* the map is included so the unpacking routine can know * the map is included so the unpacking routine can know
@ -244,7 +244,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the map - this will only pack the fields that control /* pack the map - this will only pack the fields that control
* HOW a job is to be mapped. We do -not- pack the mapped procs * HOW a job is to be mapped. We do -not- pack the mapped procs
* or nodes as this info does not need to be transmitted * or nodes as this info does not need to be transmitted
@ -256,23 +256,23 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
return rc; return rc;
} }
} }
/* do not pack the bookmark or oversubscribe_override flags */ /* do not pack the bookmark or oversubscribe_override flags */
/* pack the job state */ /* pack the job state */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->state)), 1, ORTE_JOB_STATE))) { (void*)(&(jobs[i]->state)), 1, ORTE_JOB_STATE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the flags */ /* pack the flags */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->flags)), 1, ORTE_JOB_FLAGS_T))) { (void*)(&(jobs[i]->flags)), 1, ORTE_JOB_FLAGS_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the attributes that need to be sent */ /* pack the attributes that need to be sent */
count = 0; count = 0;
OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) { OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) {
@ -310,33 +310,33 @@ int orte_dt_pack_node(opal_buffer_t *buffer, const void *src,
/* array of pointers to orte_node_t objects - need to pack the objects a set of fields at a time */ /* array of pointers to orte_node_t objects - need to pack the objects a set of fields at a time */
nodes = (orte_node_t**) src; nodes = (orte_node_t**) src;
for (i=0; i < num_vals; i++) { for (i=0; i < num_vals; i++) {
/* do not pack the index - it is meaningless on the other end */ /* do not pack the index - it is meaningless on the other end */
/* pack the node name */ /* pack the node name */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&(nodes[i]->name)), 1, OPAL_STRING))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&(nodes[i]->name)), 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* do not pack the daemon name or launch id */ /* do not pack the daemon name or launch id */
/* pack the number of procs on the node */ /* pack the number of procs on the node */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&(nodes[i]->num_procs)), 1, ORTE_VPID))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&(nodes[i]->num_procs)), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* do not pack the procs */ /* do not pack the procs */
/* pack whether we are oversubscribed or not */ /* pack whether we are oversubscribed or not */
flag = ORTE_FLAG_TEST(nodes[i], ORTE_NODE_FLAG_OVERSUBSCRIBED); flag = ORTE_FLAG_TEST(nodes[i], ORTE_NODE_FLAG_OVERSUBSCRIBED);
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&flag), 1, OPAL_UINT8))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&flag), 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the state */ /* pack the state */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&(nodes[i]->state)), 1, ORTE_NODE_STATE))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&(nodes[i]->state)), 1, ORTE_NODE_STATE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -379,7 +379,7 @@ int orte_dt_pack_proc(opal_buffer_t *buffer, const void *src,
/* array of pointers to orte_proc_t objects - need to pack the objects a set of fields at a time */ /* array of pointers to orte_proc_t objects - need to pack the objects a set of fields at a time */
procs = (orte_proc_t**) src; procs = (orte_proc_t**) src;
for (i=0; i < num_vals; i++) { for (i=0; i < num_vals; i++) {
/* pack the name */ /* pack the name */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
@ -387,7 +387,7 @@ int orte_dt_pack_proc(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the daemon/node it is on */ /* pack the daemon/node it is on */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(procs[i]->parent)), 1, ORTE_VPID))) { (void*)(&(procs[i]->parent)), 1, ORTE_VPID))) {
@ -401,21 +401,21 @@ int orte_dt_pack_proc(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the node rank */ /* pack the node rank */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(procs[i]->node_rank)), 1, ORTE_NODE_RANK))) { (void*)(&(procs[i]->node_rank)), 1, ORTE_NODE_RANK))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the state */ /* pack the state */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(procs[i]->state)), 1, ORTE_PROC_STATE))) { (void*)(&(procs[i]->state)), 1, ORTE_PROC_STATE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the app context index */ /* pack the app context index */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(procs[i]->app_idx)), 1, ORTE_STD_CNTR))) { (void*)(&(procs[i]->app_idx)), 1, ORTE_STD_CNTR))) {
@ -443,7 +443,7 @@ int orte_dt_pack_proc(opal_buffer_t *buffer, const void *src,
} }
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -549,7 +549,7 @@ int orte_dt_pack_app_context(opal_buffer_t *buffer, const void *src,
} }
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -560,11 +560,11 @@ int orte_dt_pack_exit_code(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_EXIT_CODE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_EXIT_CODE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -575,11 +575,11 @@ int orte_dt_pack_node_state(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_NODE_STATE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_NODE_STATE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -590,11 +590,11 @@ int orte_dt_pack_proc_state(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_PROC_STATE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_PROC_STATE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -605,11 +605,11 @@ int orte_dt_pack_job_state(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_JOB_STATE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_JOB_STATE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -624,10 +624,10 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
int rc; int rc;
int32_t i; int32_t i;
orte_job_map_t **maps; orte_job_map_t **maps;
/* array of pointers to orte_job_map_t objects - need to pack the objects a set of fields at a time */ /* array of pointers to orte_job_map_t objects - need to pack the objects a set of fields at a time */
maps = (orte_job_map_t**) src; maps = (orte_job_map_t**) src;
for (i=0; i < num_vals; i++) { for (i=0; i < num_vals; i++) {
/* pack the requested mapper */ /* pack the requested mapper */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->req_mapper), 1, OPAL_STRING))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->req_mapper), 1, OPAL_STRING))) {
@ -655,7 +655,7 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the display map flag */ /* pack the display map flag */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->display_map), 1, OPAL_BOOL))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->display_map), 1, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -668,7 +668,7 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -679,12 +679,12 @@ int orte_dt_pack_tag(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type) int32_t num_vals, opal_data_type_t type)
{ {
int rc; int rc;
/* Turn around and pack the real type */ /* Turn around and pack the real type */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_RML_TAG_T))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_RML_TAG_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -695,12 +695,12 @@ int orte_dt_pack_daemon_cmd(opal_buffer_t *buffer, const void *src, int32_t num_
opal_data_type_t type) opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and pack the real type */ /* Turn around and pack the real type */
if (ORTE_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_DAEMON_CMD_T))) { if (ORTE_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_DAEMON_CMD_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -711,12 +711,12 @@ int orte_dt_pack_iof_tag(opal_buffer_t *buffer, const void *src, int32_t num_val
opal_data_type_t type) opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and pack the real type */ /* Turn around and pack the real type */
if (ORTE_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_IOF_TAG_T))) { if (ORTE_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_IOF_TAG_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -732,7 +732,7 @@ int orte_dt_pack_attr(opal_buffer_t *buffer, const void *src, int32_t num_vals,
int ret; int ret;
ptr = (orte_attribute_t **) src; ptr = (orte_attribute_t **) src;
for (i = 0; i < num_vals; ++i) { for (i = 0; i < num_vals; ++i) {
/* pack the key and type */ /* pack the key and type */
if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &ptr[i]->key, 1, ORTE_ATTR_KEY_T))) { if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &ptr[i]->key, 1, ORTE_ATTR_KEY_T))) {
@ -873,13 +873,18 @@ int orte_dt_pack_sig(opal_buffer_t *buffer, const void *src, int32_t num_vals,
int rc; int rc;
ptr = (orte_grpcomm_signature_t **) src; ptr = (orte_grpcomm_signature_t **) src;
for (i = 0; i < num_vals; ++i) { for (i = 0; i < num_vals; ++i) {
/* pack the #procs */ /* pack the #procs */
if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &ptr[i]->sz, 1, OPAL_SIZE))) { if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &ptr[i]->sz, 1, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the sequence number */
if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &ptr[i]->seq_num, 1, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (0 < ptr[i]->sz) { if (0 < ptr[i]->sz) {
/* pack the array */ /* pack the array */
if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, ptr[i]->signature, ptr[i]->sz, ORTE_NAME))) { if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, ptr[i]->signature, ptr[i]->sz, ORTE_NAME))) {

Просмотреть файл

@ -49,24 +49,24 @@ static void orte_dt_quick_print(char **output, char *type_name, char *prefix, vo
uint16_t *ui16; uint16_t *ui16;
uint32_t *ui32; uint32_t *ui32;
uint64_t *ui64; uint64_t *ui64;
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
/* check for NULL ptr */ /* check for NULL ptr */
if (NULL == src) { if (NULL == src) {
asprintf(output, "%sData type: %s\tData size: 8-bit\tValue: NULL pointer", asprintf(output, "%sData type: %s\tData size: 8-bit\tValue: NULL pointer",
(NULL == prefix) ? "" : prefix, type_name); (NULL == prefix) ? "" : prefix, type_name);
return; return;
} }
switch(real_type) { switch(real_type) {
case OPAL_INT8: case OPAL_INT8:
i8 = (int8_t*)src; i8 = (int8_t*)src;
asprintf(output, "%sData type: %s\tData size: 8-bit\tValue: %d", asprintf(output, "%sData type: %s\tData size: 8-bit\tValue: %d",
(NULL == prefix) ? "" : prefix, type_name, (int) *i8); (NULL == prefix) ? "" : prefix, type_name, (int) *i8);
break; break;
case OPAL_UINT8: case OPAL_UINT8:
ui8 = (uint8_t*)src; ui8 = (uint8_t*)src;
asprintf(output, "%sData type: %s\tData size: 8-bit\tValue: %u", asprintf(output, "%sData type: %s\tData size: 8-bit\tValue: %u",
@ -75,40 +75,40 @@ static void orte_dt_quick_print(char **output, char *type_name, char *prefix, vo
case OPAL_INT16: case OPAL_INT16:
i16 = (int16_t*)src; i16 = (int16_t*)src;
asprintf(output, "%sData type: %s\tData size: 16-bit\tValue: %d", asprintf(output, "%sData type: %s\tData size: 16-bit\tValue: %d",
(NULL == prefix) ? "" : prefix, type_name, (int) *i16); (NULL == prefix) ? "" : prefix, type_name, (int) *i16);
break; break;
case OPAL_UINT16: case OPAL_UINT16:
ui16 = (uint16_t*)src; ui16 = (uint16_t*)src;
asprintf(output, "%sData type: %s\tData size: 16-bit\tValue: %u", asprintf(output, "%sData type: %s\tData size: 16-bit\tValue: %u",
(NULL == prefix) ? "" : prefix, type_name, (unsigned int) *ui16); (NULL == prefix) ? "" : prefix, type_name, (unsigned int) *ui16);
break; break;
case OPAL_INT32: case OPAL_INT32:
i32 = (int32_t*)src; i32 = (int32_t*)src;
asprintf(output, "%sData type: %s\tData size: 32-bit\tValue: %ld", asprintf(output, "%sData type: %s\tData size: 32-bit\tValue: %ld",
(NULL == prefix) ? "" : prefix, type_name, (long) *i32); (NULL == prefix) ? "" : prefix, type_name, (long) *i32);
break; break;
case OPAL_UINT32: case OPAL_UINT32:
ui32 = (uint32_t*)src; ui32 = (uint32_t*)src;
asprintf(output, "%sData type: %s\tData size: 32-bit\tValue: %lu", asprintf(output, "%sData type: %s\tData size: 32-bit\tValue: %lu",
(NULL == prefix) ? "" : prefix, type_name, (unsigned long) *ui32); (NULL == prefix) ? "" : prefix, type_name, (unsigned long) *ui32);
break; break;
case OPAL_INT64: case OPAL_INT64:
i64 = (int64_t*)src; i64 = (int64_t*)src;
asprintf(output, "%sData type: %s\tData size: 64-bit\tValue: %ld", asprintf(output, "%sData type: %s\tData size: 64-bit\tValue: %ld",
(NULL == prefix) ? "" : prefix, type_name, (long) *i64); (NULL == prefix) ? "" : prefix, type_name, (long) *i64);
break; break;
case OPAL_UINT64: case OPAL_UINT64:
ui64 = (uint64_t*)src; ui64 = (uint64_t*)src;
asprintf(output, "%sData type: %s\tData size: 64-bit\tValue: %lu", asprintf(output, "%sData type: %s\tData size: 64-bit\tValue: %lu",
(NULL == prefix) ? "" : prefix, type_name, (unsigned long) *ui64); (NULL == prefix) ? "" : prefix, type_name, (unsigned long) *ui64);
break; break;
default: default:
return; return;
} }
@ -123,7 +123,7 @@ int orte_dt_std_print(char **output, char *prefix, void *src, opal_data_type_t t
{ {
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
switch(type) { switch(type) {
case ORTE_STD_CNTR: case ORTE_STD_CNTR:
orte_dt_quick_print(output, "ORTE_STD_CNTR", prefix, src, ORTE_STD_CNTR_T); orte_dt_quick_print(output, "ORTE_STD_CNTR", prefix, src, ORTE_STD_CNTR_T);
@ -132,33 +132,33 @@ int orte_dt_std_print(char **output, char *prefix, void *src, opal_data_type_t t
case ORTE_VPID: case ORTE_VPID:
orte_dt_quick_print(output, "ORTE_VPID", prefix, src, ORTE_VPID_T); orte_dt_quick_print(output, "ORTE_VPID", prefix, src, ORTE_VPID_T);
break; break;
case ORTE_JOBID: case ORTE_JOBID:
asprintf(output, "%sData Type: ORTE_JOBID\tData size: %lu\tValue: %s", asprintf(output, "%sData Type: ORTE_JOBID\tData size: %lu\tValue: %s",
(NULL == prefix) ? "" : prefix, (unsigned long)sizeof(orte_jobid_t), (NULL == prefix) ? "" : prefix, (unsigned long)sizeof(orte_jobid_t),
ORTE_JOBID_PRINT(*(orte_jobid_t*)src)); ORTE_JOBID_PRINT(*(orte_jobid_t*)src));
break; break;
case ORTE_PROC_STATE: case ORTE_PROC_STATE:
orte_dt_quick_print(output, "ORTE_PROC_STATE", prefix, src, ORTE_PROC_STATE_T); orte_dt_quick_print(output, "ORTE_PROC_STATE", prefix, src, ORTE_PROC_STATE_T);
break; break;
case ORTE_JOB_STATE: case ORTE_JOB_STATE:
orte_dt_quick_print(output, "ORTE_JOB_STATE", prefix, src, ORTE_JOB_STATE_T); orte_dt_quick_print(output, "ORTE_JOB_STATE", prefix, src, ORTE_JOB_STATE_T);
break; break;
case ORTE_NODE_STATE: case ORTE_NODE_STATE:
orte_dt_quick_print(output, "ORTE_NODE_STATE", prefix, src, ORTE_NODE_STATE_T); orte_dt_quick_print(output, "ORTE_NODE_STATE", prefix, src, ORTE_NODE_STATE_T);
break; break;
case ORTE_EXIT_CODE: case ORTE_EXIT_CODE:
orte_dt_quick_print(output, "ORTE_EXIT_CODE", prefix, src, ORTE_EXIT_CODE_T); orte_dt_quick_print(output, "ORTE_EXIT_CODE", prefix, src, ORTE_EXIT_CODE_T);
break; break;
case ORTE_RML_TAG: case ORTE_RML_TAG:
orte_dt_quick_print(output, "ORTE_RML_TAG", prefix, src, ORTE_RML_TAG_T); orte_dt_quick_print(output, "ORTE_RML_TAG", prefix, src, ORTE_RML_TAG_T);
break; break;
case ORTE_DAEMON_CMD: case ORTE_DAEMON_CMD:
orte_dt_quick_print(output, "ORTE_DAEMON_CMD", prefix, src, ORTE_DAEMON_CMD_T); orte_dt_quick_print(output, "ORTE_DAEMON_CMD", prefix, src, ORTE_DAEMON_CMD_T);
break; break;
@ -166,12 +166,12 @@ int orte_dt_std_print(char **output, char *prefix, void *src, opal_data_type_t t
case ORTE_IOF_TAG: case ORTE_IOF_TAG:
orte_dt_quick_print(output, "ORTE_IOF_TAG", prefix, src, ORTE_IOF_TAG_T); orte_dt_quick_print(output, "ORTE_IOF_TAG", prefix, src, ORTE_IOF_TAG_T);
break; break;
default: default:
ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE);
return ORTE_ERR_UNKNOWN_DATA_TYPE; return ORTE_ERR_UNKNOWN_DATA_TYPE;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -182,7 +182,7 @@ int orte_dt_print_name(char **output, char *prefix, orte_process_name_t *name, o
{ {
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
if (NULL == name) { if (NULL == name) {
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL", asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL",
(NULL == prefix ? " " : prefix)); (NULL == prefix ? " " : prefix));
@ -190,7 +190,7 @@ int orte_dt_print_name(char **output, char *prefix, orte_process_name_t *name, o
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: %s", asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: %s",
(NULL == prefix ? " " : prefix), ORTE_NAME_PRINT(name)); (NULL == prefix ? " " : prefix), ORTE_NAME_PRINT(name));
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -225,7 +225,7 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
orte_job_state_to_str(src->state), (ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_ABORTED)) ? "True" : "False"); orte_job_state_to_str(src->state), (ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_ABORTED)) ? "True" : "False");
asprintf(&pfx, "%s\t", pfx2); asprintf(&pfx, "%s\t", pfx2);
free(pfx2); free(pfx2);
for (i=0; i < src->apps->size; i++) { for (i=0; i < src->apps->size; i++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(src->apps, i))) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(src->apps, i))) {
continue; continue;
@ -236,7 +236,7 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
free(tmp2); free(tmp2);
tmp = tmp3; tmp = tmp3;
} }
if (NULL != src->map) { if (NULL != src->map) {
if (ORTE_SUCCESS != (rc = opal_dss.print(&tmp2, pfx, src->map, ORTE_JOB_MAP))) { if (ORTE_SUCCESS != (rc = opal_dss.print(&tmp2, pfx, src->map, ORTE_JOB_MAP))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -251,7 +251,7 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
} }
asprintf(&tmp2, "%s\n%sNum procs: %ld\tOffset: %ld", tmp, pfx, (long)src->num_procs, (long)src->offset); asprintf(&tmp2, "%s\n%sNum procs: %ld\tOffset: %ld", tmp, pfx, (long)src->num_procs, (long)src->offset);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
@ -275,7 +275,7 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
(long)src->num_terminated); (long)src->num_terminated);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
/* set the return */ /* set the return */
*output = tmp; *output = tmp;
free(pfx); free(pfx);
@ -295,7 +295,7 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
char **alias; char **alias;
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
/* protect against NULL prefix */ /* protect against NULL prefix */
if (NULL == prefix) { if (NULL == prefix) {
asprintf(&pfx2, " "); asprintf(&pfx2, " ");
@ -326,7 +326,7 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
free(pfx2); free(pfx2);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
if (!orte_devel_level_output) { if (!orte_devel_level_output) {
/* just provide a simple output for users */ /* just provide a simple output for users */
if (0 == src->num_procs) { if (0 == src->num_procs) {
@ -371,7 +371,7 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
} }
goto PRINT_PROCS; goto PRINT_PROCS;
} }
asprintf(&tmp, "\n%sData for node: %s\tState: %0x", asprintf(&tmp, "\n%sData for node: %s\tState: %0x",
pfx2, (NULL == src->name) ? "UNKNOWN" : src->name, src->state); pfx2, (NULL == src->name) ? "UNKNOWN" : src->name, src->state);
/* does this node have any aliases? */ /* does this node have any aliases? */
@ -388,7 +388,7 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
if (NULL != tmp3) { if (NULL != tmp3) {
free(tmp3); free(tmp3);
} }
if (NULL == src->daemon) { if (NULL == src->daemon) {
asprintf(&tmp2, "%s\n%s\tDaemon: %s\tDaemon launched: %s", tmp, pfx2, asprintf(&tmp2, "%s\n%s\tDaemon: %s\tDaemon launched: %s", tmp, pfx2,
"Not defined", ORTE_FLAG_TEST(src, ORTE_NODE_FLAG_DAEMON_LAUNCHED) ? "True" : "False"); "Not defined", ORTE_FLAG_TEST(src, ORTE_NODE_FLAG_DAEMON_LAUNCHED) ? "True" : "False");
@ -399,18 +399,18 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
} }
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
asprintf(&tmp2, "%s\n%s\tNum slots: %ld\tSlots in use: %ld\tOversubscribed: %s", tmp, pfx2, asprintf(&tmp2, "%s\n%s\tNum slots: %ld\tSlots in use: %ld\tOversubscribed: %s", tmp, pfx2,
(long)src->slots, (long)src->slots_inuse, (long)src->slots, (long)src->slots_inuse,
ORTE_FLAG_TEST(src, ORTE_NODE_FLAG_OVERSUBSCRIBED) ? "TRUE" : "FALSE"); ORTE_FLAG_TEST(src, ORTE_NODE_FLAG_OVERSUBSCRIBED) ? "TRUE" : "FALSE");
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
asprintf(&tmp2, "%s\n%s\tNum slots allocated: %ld\tMax slots: %ld", tmp, pfx2, asprintf(&tmp2, "%s\n%s\tNum slots allocated: %ld\tMax slots: %ld", tmp, pfx2,
(long)src->slots, (long)src->slots_max); (long)src->slots, (long)src->slots_max);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
tmp3 = NULL; tmp3 = NULL;
if (orte_get_attribute(&src->attributes, ORTE_NODE_USERNAME, (void**)&tmp3, OPAL_STRING)) { if (orte_get_attribute(&src->attributes, ORTE_NODE_USERNAME, (void**)&tmp3, OPAL_STRING)) {
asprintf(&tmp2, "%s\n%s\tUsername on node: %s", tmp, pfx2, tmp3); asprintf(&tmp2, "%s\n%s\tUsername on node: %s", tmp, pfx2, tmp3);
@ -418,14 +418,14 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
} }
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
if (orte_display_topo_with_map && NULL != src->topology) { if (orte_display_topo_with_map && NULL != src->topology) {
char *pfx3; char *pfx3;
asprintf(&tmp2, "%s\n%s\tDetected Resources:\n", tmp, pfx2); asprintf(&tmp2, "%s\n%s\tDetected Resources:\n", tmp, pfx2);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
tmp2 = NULL; tmp2 = NULL;
asprintf(&pfx3, "%s\t\t", pfx2); asprintf(&pfx3, "%s\t\t", pfx2);
opal_dss.print(&tmp2, pfx3, src->topology, OPAL_HWLOC_TOPO); opal_dss.print(&tmp2, pfx3, src->topology, OPAL_HWLOC_TOPO);
@ -433,7 +433,7 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
asprintf(&tmp3, "%s%s", tmp, tmp2); asprintf(&tmp3, "%s%s", tmp, tmp2);
free(tmp); free(tmp);
free(tmp2); free(tmp2);
tmp = tmp3; tmp = tmp3;
} }
#endif #endif
@ -441,11 +441,11 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
(long)src->num_procs, (long)src->next_node_rank); (long)src->num_procs, (long)src->next_node_rank);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
PRINT_PROCS: PRINT_PROCS:
asprintf(&pfx, "%s\t", pfx2); asprintf(&pfx, "%s\t", pfx2);
free(pfx2); free(pfx2);
for (i=0; i < src->procs->size; i++) { for (i=0; i < src->procs->size; i++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(src->procs, i))) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(src->procs, i))) {
continue; continue;
@ -460,10 +460,10 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
tmp = tmp3; tmp = tmp3;
} }
free(pfx); free(pfx);
/* set the return */ /* set the return */
*output = tmp; *output = tmp;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -476,14 +476,14 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
/* protect against NULL prefix */ /* protect against NULL prefix */
if (NULL == prefix) { if (NULL == prefix) {
asprintf(&pfx2, " "); asprintf(&pfx2, " ");
} else { } else {
asprintf(&pfx2, "%s", prefix); asprintf(&pfx2, "%s", prefix);
} }
if (orte_xml_output) { if (orte_xml_output) {
/* need to create the output in XML format */ /* need to create the output in XML format */
if (0 == src->pid) { if (0 == src->pid) {
@ -496,7 +496,7 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
free(pfx2); free(pfx2);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
if (!orte_devel_level_output) { if (!orte_devel_level_output) {
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
{ {
@ -544,12 +544,12 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
} }
asprintf(&tmp, "\n%sData for proc: %s", pfx2, ORTE_NAME_PRINT(&src->name)); asprintf(&tmp, "\n%sData for proc: %s", pfx2, ORTE_NAME_PRINT(&src->name));
asprintf(&tmp2, "%s\n%s\tPid: %ld\tLocal rank: %lu\tNode rank: %lu\tApp rank: %d", tmp, pfx2, asprintf(&tmp2, "%s\n%s\tPid: %ld\tLocal rank: %lu\tNode rank: %lu\tApp rank: %d", tmp, pfx2,
(long)src->pid, (unsigned long)src->local_rank, (unsigned long)src->node_rank, src->app_rank); (long)src->pid, (unsigned long)src->local_rank, (unsigned long)src->node_rank, src->app_rank);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
{ {
char *locale=NULL; char *locale=NULL;
@ -588,10 +588,10 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
orte_proc_state_to_str(src->state), (long)src->app_idx); orte_proc_state_to_str(src->state), (long)src->app_idx);
#endif #endif
free(tmp); free(tmp);
/* set the return */ /* set the return */
*output = tmp2; *output = tmp2;
free(pfx2); free(pfx2);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -607,34 +607,34 @@ int orte_dt_print_app_context(char **output, char *prefix, orte_app_context_t *s
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
/* protect against NULL prefix */ /* protect against NULL prefix */
if (NULL == prefix) { if (NULL == prefix) {
asprintf(&pfx2, " "); asprintf(&pfx2, " ");
} else { } else {
asprintf(&pfx2, "%s", prefix); asprintf(&pfx2, "%s", prefix);
} }
asprintf(&tmp, "\n%sData for app_context: index %lu\tapp: %s\n%s\tNum procs: %lu\tFirstRank: %s", asprintf(&tmp, "\n%sData for app_context: index %lu\tapp: %s\n%s\tNum procs: %lu\tFirstRank: %s",
pfx2, (unsigned long)src->idx, pfx2, (unsigned long)src->idx,
(NULL == src->app) ? "NULL" : src->app, (NULL == src->app) ? "NULL" : src->app,
pfx2, (unsigned long)src->num_procs, pfx2, (unsigned long)src->num_procs,
ORTE_VPID_PRINT(src->first_rank)); ORTE_VPID_PRINT(src->first_rank));
count = opal_argv_count(src->argv); count = opal_argv_count(src->argv);
for (i=0; i < count; i++) { for (i=0; i < count; i++) {
asprintf(&tmp2, "%s\n%s\tArgv[%d]: %s", tmp, pfx2, i, src->argv[i]); asprintf(&tmp2, "%s\n%s\tArgv[%d]: %s", tmp, pfx2, i, src->argv[i]);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
} }
count = opal_argv_count(src->env); count = opal_argv_count(src->env);
for (i=0; i < count; i++) { for (i=0; i < count; i++) {
asprintf(&tmp2, "%s\n%s\tEnv[%lu]: %s", tmp, pfx2, (unsigned long)i, src->env[i]); asprintf(&tmp2, "%s\n%s\tEnv[%lu]: %s", tmp, pfx2, (unsigned long)i, src->env[i]);
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
} }
tmp3 = NULL; tmp3 = NULL;
orte_get_attribute(&src->attributes, ORTE_APP_PREFIX_DIR, (void**)&tmp3, OPAL_STRING); orte_get_attribute(&src->attributes, ORTE_APP_PREFIX_DIR, (void**)&tmp3, OPAL_STRING);
asprintf(&tmp2, "%s\n%s\tWorking dir: %s\n%s\tPrefix: %s\n%s\tUsed on node: %s", tmp, asprintf(&tmp2, "%s\n%s\tWorking dir: %s\n%s\tPrefix: %s\n%s\tUsed on node: %s", tmp,
@ -643,7 +643,7 @@ int orte_dt_print_app_context(char **output, char *prefix, orte_app_context_t *s
pfx2, ORTE_FLAG_TEST(src, ORTE_APP_FLAG_USED_ON_NODE) ? "TRUE" : "FALSE"); pfx2, ORTE_FLAG_TEST(src, ORTE_APP_FLAG_USED_ON_NODE) ? "TRUE" : "FALSE");
free(tmp); free(tmp);
tmp = tmp2; tmp = tmp2;
OPAL_LIST_FOREACH(kv, &src->attributes, opal_value_t) { OPAL_LIST_FOREACH(kv, &src->attributes, opal_value_t) {
opal_dss.print(&tmp2, pfx2, kv, ORTE_ATTRIBUTE); opal_dss.print(&tmp2, pfx2, kv, ORTE_ATTRIBUTE);
asprintf(&tmp3, "%s\n%s", tmp, tmp2); asprintf(&tmp3, "%s\n%s", tmp, tmp2);
@ -654,7 +654,7 @@ int orte_dt_print_app_context(char **output, char *prefix, orte_app_context_t *s
/* set the return */ /* set the return */
*output = tmp; *output = tmp;
free(pfx2); free(pfx2);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -669,17 +669,17 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
int rc; int rc;
orte_node_t *node; orte_node_t *node;
orte_proc_t *proc; orte_proc_t *proc;
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
/* protect against NULL prefix */ /* protect against NULL prefix */
if (NULL == prefix) { if (NULL == prefix) {
asprintf(&pfx2, " "); asprintf(&pfx2, " ");
} else { } else {
asprintf(&pfx2, "%s", prefix); asprintf(&pfx2, "%s", prefix);
} }
if (orte_xml_output) { if (orte_xml_output) {
/* need to create the output in XML format */ /* need to create the output in XML format */
asprintf(&tmp, "<map>\n"); asprintf(&tmp, "<map>\n");
@ -713,11 +713,11 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
free(pfx2); free(pfx2);
*output = tmp2; *output = tmp2;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
asprintf(&pfx, "%s\t", pfx2); asprintf(&pfx, "%s\t", pfx2);
if (orte_devel_level_output) { if (orte_devel_level_output) {
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
asprintf(&tmp, "\n%sMapper requested: %s Last mapper: %s Mapping policy: %s Ranking policy: %s\n%sBinding policy: %s Cpu set: %s PPR: %s Cpus-per-rank: %d", asprintf(&tmp, "\n%sMapper requested: %s Last mapper: %s Mapping policy: %s Ranking policy: %s\n%sBinding policy: %s Cpu set: %s PPR: %s Cpus-per-rank: %d",
@ -753,8 +753,8 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
/* this is being printed for a user, so let's make it easier to see */ /* this is being printed for a user, so let's make it easier to see */
asprintf(&tmp, "\n%s======================== JOB MAP ========================", pfx2); asprintf(&tmp, "\n%s======================== JOB MAP ========================", pfx2);
} }
for (i=0; i < src->nodes->size; i++) { for (i=0; i < src->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(src->nodes, i))) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(src->nodes, i))) {
continue; continue;
@ -770,7 +770,7 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
free(tmp2); free(tmp2);
tmp = tmp3; tmp = tmp3;
} }
if (!orte_devel_level_output) { if (!orte_devel_level_output) {
/* this is being printed for a user, so let's make it easier to see */ /* this is being printed for a user, so let's make it easier to see */
asprintf(&tmp2, "%s\n\n%s=============================================================\n", tmp, pfx2); asprintf(&tmp2, "%s\n\n%s=============================================================\n", tmp, pfx2);
@ -778,10 +778,10 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
tmp = tmp2; tmp = tmp2;
} }
free(pfx2); free(pfx2);
/* set the return */ /* set the return */
*output = tmp; *output = tmp;
free(pfx); free(pfx);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -795,14 +795,14 @@ int orte_dt_print_attr(char **output, char *prefix,
/* deal with NULL prefix */ /* deal with NULL prefix */
if (NULL == prefix) asprintf(&prefx, " "); if (NULL == prefix) asprintf(&prefx, " ");
else prefx = strdup(prefix); else prefx = strdup(prefix);
/* if src is NULL, just print data type and return */ /* if src is NULL, just print data type and return */
if (NULL == src) { if (NULL == src) {
asprintf(output, "%sData type: ORTE_ATTR\tValue: NULL pointer", prefx); asprintf(output, "%sData type: ORTE_ATTR\tValue: NULL pointer", prefx);
free(prefx); free(prefx);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
switch (src->type) { switch (src->type) {
case OPAL_STRING: case OPAL_STRING:
asprintf(output, "%sORTE_ATTR: Local: %s Data type: OPAL_STRING\tKey: %s\tValue: %s", asprintf(output, "%sORTE_ATTR: Local: %s Data type: OPAL_STRING\tKey: %s\tValue: %s",
@ -902,7 +902,7 @@ int orte_dt_print_sig(char **output, char *prefix, orte_grpcomm_signature_t *src
/* deal with NULL prefix */ /* deal with NULL prefix */
if (NULL == prefix) asprintf(&prefx, " "); if (NULL == prefix) asprintf(&prefx, " ");
else prefx = strdup(prefix); else prefx = strdup(prefix);
/* if src is NULL, just print data type and return */ /* if src is NULL, just print data type and return */
if (NULL == src) { if (NULL == src) {
asprintf(output, "%sData type: ORTE_SIG\tValue: NULL pointer", prefx); asprintf(output, "%sData type: ORTE_SIG\tValue: NULL pointer", prefx);
@ -917,7 +917,7 @@ int orte_dt_print_sig(char **output, char *prefix, orte_grpcomm_signature_t *src
} }
/* there must be at least one */ /* there must be at least one */
asprintf(&tmp, "%sORTE_SIG\tValue: ", prefx); asprintf(&tmp, "%sORTE_SIG\tSeqNumber:%d\tValue: ", prefx, src->seq_num);
for (i=0; i < src->sz; i++) { for (i=0; i < src->sz; i++) {
asprintf(&tmp2, "%s%s", tmp, ORTE_NAME_PRINT(&src->signature[i])); asprintf(&tmp2, "%s%s", tmp, ORTE_NAME_PRINT(&src->signature[i]));

Просмотреть файл

@ -39,12 +39,12 @@ int orte_dt_unpack_std_cntr(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and unpack the real type */ /* Turn around and unpack the real type */
if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_STD_CNTR_T))) { if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_STD_CNTR_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -59,9 +59,9 @@ int orte_dt_unpack_name(opal_buffer_t *buffer, void *dest,
orte_process_name_t* proc; orte_process_name_t* proc;
orte_jobid_t *jobid; orte_jobid_t *jobid;
orte_vpid_t *vpid; orte_vpid_t *vpid;
num = *num_vals; num = *num_vals;
/* allocate space for all the jobids in a contiguous array */ /* allocate space for all the jobids in a contiguous array */
jobid = (orte_jobid_t*)malloc(num * sizeof(orte_jobid_t)); jobid = (orte_jobid_t*)malloc(num * sizeof(orte_jobid_t));
if (NULL == jobid) { if (NULL == jobid) {
@ -77,7 +77,7 @@ int orte_dt_unpack_name(opal_buffer_t *buffer, void *dest,
free(jobid); free(jobid);
return rc; return rc;
} }
/* collect all the vpids in a contiguous array */ /* collect all the vpids in a contiguous array */
vpid = (orte_vpid_t*)malloc(num * sizeof(orte_vpid_t)); vpid = (orte_vpid_t*)malloc(num * sizeof(orte_vpid_t));
if (NULL == vpid) { if (NULL == vpid) {
@ -95,7 +95,7 @@ int orte_dt_unpack_name(opal_buffer_t *buffer, void *dest,
free(jobid); free(jobid);
return rc; return rc;
} }
/* build the names from the jobid/vpid arrays */ /* build the names from the jobid/vpid arrays */
proc = (orte_process_name_t*)dest; proc = (orte_process_name_t*)dest;
for (i=0; i < num; i++) { for (i=0; i < num; i++) {
@ -103,11 +103,11 @@ int orte_dt_unpack_name(opal_buffer_t *buffer, void *dest,
proc->vpid = vpid[i]; proc->vpid = vpid[i];
proc++; proc++;
} }
/* cleanup */ /* cleanup */
free(vpid); free(vpid);
free(jobid); free(jobid);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -118,12 +118,12 @@ int orte_dt_unpack_jobid(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and unpack the real type */ /* Turn around and unpack the real type */
if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_JOBID_T))) { if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_JOBID_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -134,12 +134,12 @@ int orte_dt_unpack_vpid(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and unpack the real type */ /* Turn around and unpack the real type */
if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_VPID_T))) { if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_VPID_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -186,7 +186,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* if there are apps, unpack them */ /* if there are apps, unpack them */
if (0 < jobs[i]->num_apps) { if (0 < jobs[i]->num_apps) {
orte_app_context_t *app; orte_app_context_t *app;
@ -200,7 +200,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
opal_pointer_array_add(jobs[i]->apps, app); opal_pointer_array_add(jobs[i]->apps, app);
} }
} }
/* unpack num procs and offset */ /* unpack num procs and offset */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -235,7 +235,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the total slots allocated to the job */ /* unpack the total slots allocated to the job */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -243,7 +243,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* if the map is NULL, then we din't pack it as there was /* if the map is NULL, then we din't pack it as there was
* nothing to pack. Instead, we packed a flag to indicate whether or not * nothing to pack. Instead, we packed a flag to indicate whether or not
* the map is included */ * the map is included */
@ -262,9 +262,9 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
return rc; return rc;
} }
} }
/* no bookmark of oversubscribe_override flags to unpack */ /* no bookmark of oversubscribe_override flags to unpack */
/* unpack the job state */ /* unpack the job state */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -272,7 +272,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the flags */ /* unpack the flags */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -318,16 +318,16 @@ int orte_dt_unpack_node(opal_buffer_t *buffer, void *dest,
/* unpack into array of orte_node_t objects */ /* unpack into array of orte_node_t objects */
nodes = (orte_node_t**) dest; nodes = (orte_node_t**) dest;
for (i=0; i < *num_vals; i++) { for (i=0; i < *num_vals; i++) {
/* create the node object */ /* create the node object */
nodes[i] = OBJ_NEW(orte_node_t); nodes[i] = OBJ_NEW(orte_node_t);
if (NULL == nodes[i]) { if (NULL == nodes[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* do not unpack the index - meaningless here */ /* do not unpack the index - meaningless here */
/* unpack the node name */ /* unpack the node name */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -335,9 +335,9 @@ int orte_dt_unpack_node(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* do not unpack the daemon name or launch id */ /* do not unpack the daemon name or launch id */
/* unpack the number of procs on the node */ /* unpack the number of procs on the node */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -345,9 +345,9 @@ int orte_dt_unpack_node(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* do not unpack the proc info */ /* do not unpack the proc info */
/* unpack whether we are oversubscribed */ /* unpack whether we are oversubscribed */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -366,7 +366,7 @@ int orte_dt_unpack_node(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the attributes */ /* unpack the attributes */
n=1; n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count,
@ -398,18 +398,18 @@ int orte_dt_unpack_proc(opal_buffer_t *buffer, void *dest,
int32_t i, n, count, k; int32_t i, n, count, k;
orte_attribute_t *kv;; orte_attribute_t *kv;;
orte_proc_t **procs; orte_proc_t **procs;
/* unpack into array of orte_proc_t objects */ /* unpack into array of orte_proc_t objects */
procs = (orte_proc_t**) dest; procs = (orte_proc_t**) dest;
for (i=0; i < *num_vals; i++) { for (i=0; i < *num_vals; i++) {
/* create the orte_proc_t object */ /* create the orte_proc_t object */
procs[i] = OBJ_NEW(orte_proc_t); procs[i] = OBJ_NEW(orte_proc_t);
if (NULL == procs[i]) { if (NULL == procs[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* unpack the name */ /* unpack the name */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -417,7 +417,7 @@ int orte_dt_unpack_proc(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the node it is on */ /* unpack the node it is on */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -425,7 +425,7 @@ int orte_dt_unpack_proc(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the local rank */ /* unpack the local rank */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -433,7 +433,7 @@ int orte_dt_unpack_proc(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the node rank */ /* unpack the node rank */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -441,7 +441,7 @@ int orte_dt_unpack_proc(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the state */ /* unpack the state */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -449,7 +449,7 @@ int orte_dt_unpack_proc(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the app context index */ /* unpack the app context index */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -619,11 +619,11 @@ int orte_dt_unpack_exit_code(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_EXIT_CODE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_EXIT_CODE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -634,11 +634,11 @@ int orte_dt_unpack_node_state(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_NODE_STATE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_NODE_STATE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -649,11 +649,11 @@ int orte_dt_unpack_proc_state(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_PROC_STATE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_PROC_STATE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -664,11 +664,11 @@ int orte_dt_unpack_job_state(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_JOB_STATE_T))) { if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_JOB_STATE_T))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
return rc; return rc;
} }
@ -683,18 +683,18 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
int rc; int rc;
int32_t i, n; int32_t i, n;
orte_job_map_t **maps; orte_job_map_t **maps;
/* unpack into array of orte_job_map_t objects */ /* unpack into array of orte_job_map_t objects */
maps = (orte_job_map_t**) dest; maps = (orte_job_map_t**) dest;
for (i=0; i < *num_vals; i++) { for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_map_t object */ /* create the orte_rmaps_base_map_t object */
maps[i] = OBJ_NEW(orte_job_map_t); maps[i] = OBJ_NEW(orte_job_map_t);
if (NULL == maps[i]) { if (NULL == maps[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* unpack the requested mapper */ /* unpack the requested mapper */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -702,7 +702,7 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the policies */ /* unpack the policies */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -731,7 +731,7 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the display map flag */ /* unpack the display map flag */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -747,7 +747,7 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
return rc; return rc;
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -758,12 +758,12 @@ int orte_dt_unpack_tag(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type) int32_t *num_vals, opal_data_type_t type)
{ {
int ret; int ret;
/* Turn around and unpack the real type */ /* Turn around and unpack the real type */
if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_RML_TAG_T))) { if (ORTE_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_RML_TAG_T))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
} }
return ret; return ret;
} }
@ -774,10 +774,10 @@ int orte_dt_unpack_daemon_cmd(opal_buffer_t *buffer, void *dest, int32_t *num_va
opal_data_type_t type) opal_data_type_t type)
{ {
int ret; int ret;
/* turn around and unpack the real type */ /* turn around and unpack the real type */
ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DAEMON_CMD_T); ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DAEMON_CMD_T);
return ret; return ret;
} }
@ -788,10 +788,10 @@ int orte_dt_unpack_iof_tag(opal_buffer_t *buffer, void *dest, int32_t *num_vals,
opal_data_type_t type) opal_data_type_t type)
{ {
int ret; int ret;
/* turn around and unpack the real type */ /* turn around and unpack the real type */
ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_IOF_TAG_T); ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_IOF_TAG_T);
return ret; return ret;
} }
@ -808,7 +808,7 @@ int orte_dt_unpack_attr(opal_buffer_t *buffer, void *dest, int32_t *num_vals,
ptr = (orte_attribute_t **) dest; ptr = (orte_attribute_t **) dest;
n = *num_vals; n = *num_vals;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
/* allocate the new object */ /* allocate the new object */
ptr[i] = OBJ_NEW(orte_attribute_t); ptr[i] = OBJ_NEW(orte_attribute_t);
@ -959,7 +959,7 @@ int orte_dt_unpack_sig(opal_buffer_t *buffer, void *dest, int32_t *num_vals,
ptr = (orte_grpcomm_signature_t **) dest; ptr = (orte_grpcomm_signature_t **) dest;
n = *num_vals; n = *num_vals;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
/* allocate the new object */ /* allocate the new object */
ptr[i] = OBJ_NEW(orte_grpcomm_signature_t); ptr[i] = OBJ_NEW(orte_grpcomm_signature_t);
@ -972,6 +972,10 @@ int orte_dt_unpack_sig(opal_buffer_t *buffer, void *dest, int32_t *num_vals,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ptr[i]->seq_num, &cnt, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (0 < ptr[i]->sz) { if (0 < ptr[i]->sz) {
/* allocate space for the array */ /* allocate space for the array */
ptr[i]->signature = (orte_process_name_t*)malloc(ptr[i]->sz * sizeof(orte_process_name_t)); ptr[i]->signature = (orte_process_name_t*)malloc(ptr[i]->sz * sizeof(orte_process_name_t));