Merge pull request #2654 from rhc54/topic/memory
Remove stale global variables
Этот коммит содержится в:
Коммит
218aed144d
1
.gitignore
поставляемый
1
.gitignore
поставляемый
@ -111,6 +111,7 @@ contrib/platform/intel/bend/*orcm*
|
||||
contrib/scaling/orte_no_op
|
||||
contrib/scaling/mpi_no_op
|
||||
contrib/scaling/mpi_barrier
|
||||
contrib/scaling/mpi_memprobe
|
||||
|
||||
examples/hello_c
|
||||
examples/hello_cxx
|
||||
|
@ -1,4 +1,4 @@
|
||||
PROGS = orte_no_op mpi_no_op
|
||||
PROGS = orte_no_op mpi_no_op mpi_memprobe
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
@ -10,5 +10,8 @@ orte_no_op:
|
||||
mpi_no_op:
|
||||
mpicc -o mpi_no_op mpi_no_op.c
|
||||
|
||||
mpi_memprobe:
|
||||
mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal
|
||||
|
||||
clean:
|
||||
rm -f $(PROGS) *~
|
||||
|
98
contrib/scaling/mpi_memprobe.c
Обычный файл
98
contrib/scaling/mpi_memprobe.c
Обычный файл
@ -0,0 +1,98 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* The most basic of MPI applications
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
static volatile int active;
|
||||
static volatile bool wait_for_release = true;
|
||||
#define MEMPROBE_RELEASE 12345
|
||||
|
||||
static void _release_fn(int status,
|
||||
const opal_process_name_t *source,
|
||||
opal_list_t *info, opal_list_t *results,
|
||||
opal_pmix_notification_complete_fn_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
/* must let the notifier know we are done */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(0, NULL, NULL, NULL, cbdata);
|
||||
}
|
||||
/* flag that the debugger is complete so we can exit */
|
||||
wait_for_release = false;
|
||||
}
|
||||
|
||||
static void _register_fn(int status,
|
||||
size_t evhandler_ref,
|
||||
void *cbdata)
|
||||
{
|
||||
volatile int *active = (volatile int*)cbdata;
|
||||
|
||||
if (0 != status) {
|
||||
fprintf(stderr, "Client EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
|
||||
status, (unsigned long)evhandler_ref);
|
||||
}
|
||||
*active = status;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int rank, size;
|
||||
opal_list_t *codes;
|
||||
opal_value_t *kv;
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
if (0 == rank) {
|
||||
fprintf(stderr, "Sampling memory usage after MPI_Init\n");
|
||||
}
|
||||
|
||||
codes = OBJ_NEW(opal_list_t);
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup("errorcode");
|
||||
kv->type = OPAL_INT;
|
||||
kv->data.integer = MEMPROBE_RELEASE;
|
||||
opal_list_append(codes, &kv->super);
|
||||
|
||||
active = -1;
|
||||
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active);
|
||||
while (-1 == active) {
|
||||
usleep(10);
|
||||
}
|
||||
|
||||
/* now wait for notification */
|
||||
while (wait_for_release) {
|
||||
usleep(10);
|
||||
}
|
||||
wait_for_release = true;
|
||||
|
||||
/* perform a barrier so some communication will occur, thus
|
||||
* requiring exchange of endpoint info */
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
if (0 == rank) {
|
||||
fprintf(stderr, "\n\nSampling memory usage after MPI_Barrier\n");
|
||||
}
|
||||
|
||||
/* wait again while memory is sampled */
|
||||
while (wait_for_release) {
|
||||
usleep(10);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
||||
@ -157,6 +157,7 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
|
||||
|
||||
/* information about relative ranks as assigned by the RM */
|
||||
#define PMIX_PROCID "pmix.procid" // (pmix_proc_t) process identifier
|
||||
#define PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
|
||||
#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
|
||||
#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
|
||||
@ -282,6 +283,8 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform
|
||||
#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
|
||||
#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
|
||||
#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
|
||||
#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
|
||||
|
||||
/* log attributes */
|
||||
#define PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -553,11 +553,18 @@ static void _putfn(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
pmix_kval_t *kv;
|
||||
pmix_kval_t *kv = NULL;
|
||||
pmix_nspace_t *ns;
|
||||
uint8_t *tmp;
|
||||
size_t len;
|
||||
|
||||
/* no need to push info that starts with "pmix" as that is
|
||||
* info we would have been provided at startup */
|
||||
if (0 == strncmp(cb->key, "pmix", 4)) {
|
||||
rc = PMIX_SUCCESS;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* setup to xfer the data */
|
||||
kv = PMIX_NEW(pmix_kval_t);
|
||||
kv->key = strdup(cb->key); // need to copy as the input belongs to the user
|
||||
@ -622,7 +629,9 @@ static void _putfn(int sd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
done:
|
||||
PMIX_RELEASE(kv); // maintain accounting
|
||||
if (NULL != kv) {
|
||||
PMIX_RELEASE(kv); // maintain accounting
|
||||
}
|
||||
cb->pstatus = rc;
|
||||
cb->active = false;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -763,7 +763,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
* us to attempt to retrieve it from the server */
|
||||
for (n=0; n < cb->ninfo; n++) {
|
||||
if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) &&
|
||||
cb->info[n].value.data.flag) {
|
||||
(PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) {
|
||||
/* they don't want us to try and retrieve it */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -356,6 +356,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
|
||||
sing->code = cd->codes[0];
|
||||
index = pmix_globals.events.nhdlrs;
|
||||
sing->index = index;
|
||||
sing->evhdlr = cd->evhdlr;
|
||||
++pmix_globals.events.nhdlrs;
|
||||
sing->cbobject = cbobject;
|
||||
rc = _add_hdlr(&pmix_globals.events.single_events, &sing->super,
|
||||
@ -365,17 +366,17 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
|
||||
PMIX_ERR_WOULD_BLOCK != rc) {
|
||||
/* unable to register */
|
||||
--pmix_globals.events.nhdlrs;
|
||||
rc = PMIX_ERR_EVENT_REGISTRATION;
|
||||
index = UINT_MAX;
|
||||
rc = PMIX_ERR_EVENT_REGISTRATION;
|
||||
index = UINT_MAX;
|
||||
goto ack;
|
||||
}
|
||||
if (PMIX_ERR_WOULD_BLOCK == rc) {
|
||||
/* the callback will provide our response */
|
||||
PMIX_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
goto ack;
|
||||
}
|
||||
if (PMIX_ERR_WOULD_BLOCK == rc) {
|
||||
/* the callback will provide our response */
|
||||
PMIX_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
goto ack;
|
||||
}
|
||||
|
||||
/* must be a multi-code registration */
|
||||
multi = PMIX_NEW(pmix_multi_event_t);
|
||||
@ -387,6 +388,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
|
||||
memcpy(multi->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t));
|
||||
index = pmix_globals.events.nhdlrs;
|
||||
multi->index = index;
|
||||
multi->evhdlr = cd->evhdlr;
|
||||
++pmix_globals.events.nhdlrs;
|
||||
multi->cbobject = cbobject;
|
||||
rc = _add_hdlr(&pmix_globals.events.multi_events, &multi->super,
|
||||
@ -396,9 +398,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
|
||||
PMIX_ERR_WOULD_BLOCK != rc) {
|
||||
/* unable to register */
|
||||
--pmix_globals.events.nhdlrs;
|
||||
rc = PMIX_ERR_EVENT_REGISTRATION;
|
||||
index = UINT_MAX;
|
||||
goto ack;
|
||||
rc = PMIX_ERR_EVENT_REGISTRATION;
|
||||
index = UINT_MAX;
|
||||
goto ack;
|
||||
}
|
||||
if (PMIX_ERR_WOULD_BLOCK == rc) {
|
||||
/* the callback will provide our response */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
|
||||
@ -145,121 +145,52 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace)
|
||||
opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super);
|
||||
}
|
||||
|
||||
static void completion_handler(int status, void *cbdata)
|
||||
static void event_hdlr_complete(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)cbdata;
|
||||
if (NULL != chain->info) {
|
||||
OPAL_LIST_RELEASE(chain->info);
|
||||
}
|
||||
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
|
||||
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void progress_local_event_hdlr(int status,
|
||||
opal_list_t *results,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
|
||||
void *notification_cbdata)
|
||||
static void return_local_event_hdlr(int status, opal_list_t *results,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
|
||||
void *notification_cbdata)
|
||||
{
|
||||
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)notification_cbdata;
|
||||
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)notification_cbdata;
|
||||
pmix2x_opcaddy_t *op;
|
||||
opal_value_t *kv;
|
||||
pmix_status_t pstatus;
|
||||
size_t n;
|
||||
opal_list_item_t *nxt;
|
||||
opal_pmix2x_single_event_t *sing;
|
||||
opal_pmix2x_multi_event_t *multi;
|
||||
opal_pmix2x_default_event_t *def;
|
||||
|
||||
/* if the caller indicates that the chain is completed, then stop here */
|
||||
if (OPAL_ERR_HANDLERS_COMPLETE == status) {
|
||||
goto complete;
|
||||
}
|
||||
if (NULL != cd->pmixcbfunc) {
|
||||
op = OBJ_NEW(pmix2x_opcaddy_t);
|
||||
|
||||
/* if any results were provided, then add them here */
|
||||
if (NULL != results) {
|
||||
while (NULL != (nxt = opal_list_remove_first(results))) {
|
||||
opal_list_append(results, nxt);
|
||||
}
|
||||
}
|
||||
|
||||
/* see if we need to continue, starting with the single code events */
|
||||
if (NULL != chain->sing) {
|
||||
/* the last handler was for a single code - see if there are
|
||||
* any others that match this event */
|
||||
while (opal_list_get_end(&mca_pmix_pmix2x_component.single_events) != (nxt = opal_list_get_next(&chain->sing->super))) {
|
||||
sing = (opal_pmix2x_single_event_t*)nxt;
|
||||
if (sing->code == chain->status) {
|
||||
OBJ_RETAIN(chain);
|
||||
chain->sing = sing;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s PROGRESS CALLING SINGLE EVHDLR",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
sing->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
goto complete;
|
||||
}
|
||||
}
|
||||
/* if we get here, then there are no more single code
|
||||
* events that match */
|
||||
chain->sing = NULL;
|
||||
/* pickup the beginning of the multi-code event list */
|
||||
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.multi_events)) {
|
||||
chain->multi = (opal_pmix2x_multi_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.multi_events);
|
||||
}
|
||||
}
|
||||
|
||||
/* see if we need to continue with the multi code events */
|
||||
if (NULL != chain->multi) {
|
||||
while (opal_list_get_end(&mca_pmix_pmix2x_component.multi_events) != (nxt = opal_list_get_next(&chain->multi->super))) {
|
||||
multi = (opal_pmix2x_multi_event_t*)nxt;
|
||||
for (n=0; n < multi->ncodes; n++) {
|
||||
if (multi->codes[n] == chain->status) {
|
||||
/* found it - invoke the handler, pointing its
|
||||
* callback function to our progression function */
|
||||
OBJ_RETAIN(chain);
|
||||
chain->multi = multi;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s PROGRESS CALLING MULTI EVHDLR",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
multi->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
goto complete;
|
||||
if (NULL != results) {
|
||||
/* convert the list of results to an array of info */
|
||||
op->ninfo = opal_list_get_size(results);
|
||||
if (0 < op->ninfo) {
|
||||
PMIX_INFO_CREATE(op->info, op->ninfo);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
|
||||
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
|
||||
pmix2x_value_load(&op->info[n].value, kv);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if we get here, then there are no more multi-mode
|
||||
* events that match */
|
||||
chain->multi = NULL;
|
||||
/* pickup the beginning of the default event list */
|
||||
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
|
||||
chain->def = (opal_pmix2x_default_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.default_events);
|
||||
}
|
||||
/* convert the status */
|
||||
pstatus = pmix2x_convert_opalrc(status);
|
||||
/* call the library's callback function */
|
||||
cd->pmixcbfunc(pstatus, op->info, op->ninfo, event_hdlr_complete, op, cd->cbdata);
|
||||
}
|
||||
|
||||
/* if they didn't want it to go to a default handler, then we are done */
|
||||
if (chain->nondefault) {
|
||||
goto complete;
|
||||
/* release the threadshift object */
|
||||
if (NULL != cd->info) {
|
||||
OPAL_LIST_RELEASE(cd->info);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
|
||||
if (NULL != chain->def) {
|
||||
if (opal_list_get_end(&mca_pmix_pmix2x_component.default_events) != (nxt = opal_list_get_next(&chain->def->super))) {
|
||||
def = (opal_pmix2x_default_event_t*)nxt;
|
||||
OBJ_RETAIN(chain);
|
||||
chain->def = def;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s PROGRESS CALLING DEFAULT EVHDLR",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
def->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
}
|
||||
}
|
||||
|
||||
complete:
|
||||
/* we still have to call their final callback */
|
||||
if (NULL != chain->final_cbfunc) {
|
||||
chain->final_cbfunc(OPAL_SUCCESS, chain->final_cbdata);
|
||||
}
|
||||
/* maintain acctng */
|
||||
OBJ_RELEASE(chain);
|
||||
/* let the caller know that we are done with their callback */
|
||||
/* release the caller */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(OPAL_SUCCESS, thiscbdata);
|
||||
}
|
||||
@ -268,92 +199,29 @@ static void progress_local_event_hdlr(int status,
|
||||
static void _event_hdlr(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
|
||||
size_t n;
|
||||
opal_pmix2x_event_chain_t *chain;
|
||||
opal_pmix2x_single_event_t *sing;
|
||||
opal_pmix2x_multi_event_t *multi;
|
||||
opal_pmix2x_default_event_t *def;
|
||||
opal_pmix2x_event_t *event;
|
||||
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status);
|
||||
"%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status);
|
||||
|
||||
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
|
||||
/* point it at our final callback */
|
||||
chain->final_cbfunc = completion_handler;
|
||||
chain->final_cbdata = chain;
|
||||
|
||||
/* carry across provided info */
|
||||
chain->status = cd->status;
|
||||
chain->source = cd->pname;
|
||||
chain->info = cd->info;
|
||||
chain->nondefault = cd->nondefault;
|
||||
|
||||
/* cycle thru the single-event registrations first */
|
||||
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
|
||||
if (sing->code == chain->status) {
|
||||
/* cycle thru the registrations */
|
||||
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
|
||||
if (cd->id == event->index) {
|
||||
/* found it - invoke the handler, pointing its
|
||||
* callback function to our progression function */
|
||||
OBJ_RETAIN(chain);
|
||||
chain->sing = sing;
|
||||
* callback function to our callback function */
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s _EVENT_HDLR CALLING SINGLE EVHDLR",
|
||||
"%s _EVENT_HDLR CALLING EVHDLR",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
sing->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
event->handler(cd->status, &cd->pname,
|
||||
cd->info, &cd->results,
|
||||
return_local_event_hdlr, (void*)cd);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we didn't find any match in the single-event registrations,
|
||||
* then cycle thru the multi-event registrations next */
|
||||
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
|
||||
for (n=0; n < multi->ncodes; n++) {
|
||||
if (multi->codes[n] == chain->status) {
|
||||
/* found it - invoke the handler, pointing its
|
||||
* callback function to our progression function */
|
||||
OBJ_RETAIN(chain);
|
||||
chain->multi = multi;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s _EVENT_HDLR CALLING MULTI EVHDLR",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
multi->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* if they didn't want it to go to a default handler, then we are done */
|
||||
if (chain->nondefault) {
|
||||
/* if we get here, then we need to cache this event in case they
|
||||
* register for it later - we cannot lose individual events */
|
||||
opal_list_append(&mca_pmix_pmix2x_component.cache, &chain->super);
|
||||
return;
|
||||
}
|
||||
|
||||
/* we are done with the threadshift caddy */
|
||||
OBJ_RELEASE(cd);
|
||||
|
||||
/* finally, pass it to any default handlers */
|
||||
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
|
||||
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
|
||||
OBJ_RETAIN(chain);
|
||||
chain->def = def;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s _EVENT_HDLR CALLING DEFAULT EVHDLR",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
def->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
return;
|
||||
}
|
||||
|
||||
/* we still have to call their final callback */
|
||||
if (NULL != chain->final_cbfunc) {
|
||||
chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata);
|
||||
/* if we didn't find a match, we still have to call their final callback */
|
||||
if (NULL != cd->pmixcbfunc) {
|
||||
cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -385,6 +253,9 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status);
|
||||
|
||||
cd = OBJ_NEW(pmix2x_threadshift_t);
|
||||
cd->id = evhdlr_registration_id;
|
||||
cd->pmixcbfunc = cbfunc;
|
||||
cd->cbdata = cbdata;
|
||||
|
||||
/* convert the incoming status */
|
||||
cd->status = pmix2x_convert_rc(status);
|
||||
@ -409,9 +280,6 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
|
||||
if (NULL != info) {
|
||||
cd->info = OBJ_NEW(opal_list_t);
|
||||
for (n=0; n < ninfo; n++) {
|
||||
if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
|
||||
cd->nondefault = true;
|
||||
}
|
||||
iptr = OBJ_NEW(opal_value_t);
|
||||
iptr->key = strdup(info[n].key);
|
||||
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) {
|
||||
@ -422,17 +290,25 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
|
||||
opal_list_append(cd->info, &iptr->super);
|
||||
}
|
||||
}
|
||||
|
||||
/* convert the array of prior results */
|
||||
if (NULL != results) {
|
||||
for (n=0; n < nresults; n++) {
|
||||
iptr = OBJ_NEW(opal_value_t);
|
||||
iptr->key = strdup(results[n].key);
|
||||
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &results[n].value))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(iptr);
|
||||
continue;
|
||||
}
|
||||
opal_list_append(&cd->results, &iptr->super);
|
||||
}
|
||||
}
|
||||
|
||||
/* now push it into the local thread */
|
||||
event_assign(&cd->ev, opal_pmix_base.evbase,
|
||||
-1, EV_WRITE, _event_hdlr, cd);
|
||||
event_active(&cd->ev, EV_WRITE, 1);
|
||||
|
||||
/* we don't need any of the data they provided,
|
||||
* so let them go - also tell them that we will handle
|
||||
* everything from this point forward */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank)
|
||||
@ -536,7 +412,7 @@ pmix_status_t pmix2x_convert_opalrc(int rc)
|
||||
case OPAL_SUCCESS:
|
||||
return PMIX_SUCCESS;
|
||||
default:
|
||||
return PMIX_ERROR;
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
@ -620,7 +496,7 @@ int pmix2x_convert_rc(pmix_status_t rc)
|
||||
case PMIX_SUCCESS:
|
||||
return OPAL_SUCCESS;
|
||||
default:
|
||||
return OPAL_ERROR;
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
@ -735,6 +611,10 @@ void pmix2x_value_load(pmix_value_t *v,
|
||||
{
|
||||
opal_pmix2x_jobid_trkr_t *job;
|
||||
bool found;
|
||||
opal_list_t *list;
|
||||
opal_value_t *val;
|
||||
pmix_info_t *info;
|
||||
size_t n;
|
||||
|
||||
switch(kv->type) {
|
||||
case OPAL_UNDEF:
|
||||
@ -876,8 +756,22 @@ void pmix2x_value_load(pmix_value_t *v,
|
||||
memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t));
|
||||
break;
|
||||
case OPAL_PTR:
|
||||
v->type = PMIX_POINTER;
|
||||
v->data.ptr = kv->data.ptr;
|
||||
/* if someone returned a pointer, it must be to a list of
|
||||
* opal_value_t's that we need to convert to a pmix_data_array
|
||||
* of pmix_info_t structures */
|
||||
list = (opal_list_t*)kv->data.ptr;
|
||||
v->type = PMIX_DATA_ARRAY;
|
||||
v->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
|
||||
v->data.darray->type = PMIX_INFO;
|
||||
v->data.darray->size = opal_list_get_size(list);
|
||||
PMIX_INFO_CREATE(info, v->data.darray->size);
|
||||
v->data.darray->array = info;
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(val, list, opal_value_t) {
|
||||
(void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN);
|
||||
pmix2x_value_load(&info[n].value, val);
|
||||
++n;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* silence warnings */
|
||||
@ -1041,16 +935,27 @@ int pmix2x_value_unload(opal_value_t *kv,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void errreg_cbfunc (pmix_status_t status,
|
||||
size_t errhandler_ref,
|
||||
void *cbdata)
|
||||
{
|
||||
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
|
||||
|
||||
op->event->index = errhandler_ref;
|
||||
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
|
||||
"PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu",
|
||||
status, (unsigned long)errhandler_ref);
|
||||
if (NULL != op->evregcbfunc) {
|
||||
op->evregcbfunc(pmix2x_convert_rc(status), errhandler_ref, op->cbdata);
|
||||
}
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void _reg_hdlr(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
|
||||
opal_pmix2x_event_chain_t *chain;
|
||||
opal_pmix2x_single_event_t *sing = NULL;
|
||||
opal_pmix2x_multi_event_t *multi = NULL;
|
||||
opal_pmix2x_default_event_t *def = NULL;
|
||||
pmix2x_opcaddy_t *op;
|
||||
opal_value_t *kv;
|
||||
int i;
|
||||
bool prepend = false;
|
||||
size_t n;
|
||||
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
@ -1058,116 +963,46 @@ static void _reg_hdlr(int sd, short args, void *cbdata)
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
||||
(NULL == cd->event_codes) ? "NULL" : "NON-NULL");
|
||||
|
||||
if (NULL != cd->info) {
|
||||
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
|
||||
if (0 == strcmp(kv->key, OPAL_PMIX_EVENT_ORDER_PREPEND)) {
|
||||
prepend = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
op = OBJ_NEW(pmix2x_opcaddy_t);
|
||||
op->evregcbfunc = cd->cbfunc;
|
||||
op->cbdata = cd->cbdata;
|
||||
|
||||
if (NULL == cd->event_codes) {
|
||||
/* this is a default handler */
|
||||
def = OBJ_NEW(opal_pmix2x_default_event_t);
|
||||
def->handler = cd->evhandler;
|
||||
def->index = mca_pmix_pmix2x_component.evindex;
|
||||
if (prepend) {
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s PREPENDING TO DEFAULT EVENTS",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
opal_list_prepend(&mca_pmix_pmix2x_component.default_events, &def->super);
|
||||
} else {
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s APPENDING TO DEFAULT EVENTS",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
opal_list_append(&mca_pmix_pmix2x_component.default_events, &def->super);
|
||||
}
|
||||
} else if (1 == opal_list_get_size(cd->event_codes)) {
|
||||
/* single handler */
|
||||
sing = OBJ_NEW(opal_pmix2x_single_event_t);
|
||||
kv = (opal_value_t*)opal_list_get_first(cd->event_codes);
|
||||
sing->code = kv->data.integer;
|
||||
sing->index = mca_pmix_pmix2x_component.evindex;
|
||||
sing->handler = cd->evhandler;
|
||||
if (prepend) {
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s PREPENDING TO SINGLE EVENTS WITH CODE %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
|
||||
opal_list_prepend(&mca_pmix_pmix2x_component.single_events, &sing->super);
|
||||
} else {
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s APPENDING TO SINGLE EVENTS WITH CODE %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
|
||||
opal_list_append(&mca_pmix_pmix2x_component.single_events, &sing->super);
|
||||
}
|
||||
} else {
|
||||
multi = OBJ_NEW(opal_pmix2x_multi_event_t);
|
||||
multi->ncodes = opal_list_get_size(cd->event_codes);
|
||||
multi->codes = (int*)malloc(multi->ncodes * sizeof(int));
|
||||
i=0;
|
||||
/* convert the event codes */
|
||||
if (NULL != cd->event_codes) {
|
||||
op->ncodes = opal_list_get_size(cd->event_codes);
|
||||
op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t));
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) {
|
||||
multi->codes[i] = kv->data.integer;
|
||||
++i;
|
||||
}
|
||||
multi->index = mca_pmix_pmix2x_component.evindex;
|
||||
multi->handler = cd->evhandler;
|
||||
if (prepend) {
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s PREPENDING TO MULTI EVENTS",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
opal_list_prepend(&mca_pmix_pmix2x_component.multi_events, &multi->super);
|
||||
} else {
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s APPENDING TO MULTI EVENTS",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
opal_list_append(&mca_pmix_pmix2x_component.multi_events, &multi->super);
|
||||
op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer);
|
||||
}
|
||||
}
|
||||
|
||||
/* release the caller */
|
||||
if (NULL != cd->cbfunc) {
|
||||
cd->cbfunc(OPAL_SUCCESS, mca_pmix_pmix2x_component.evindex, cd->cbdata);
|
||||
}
|
||||
mca_pmix_pmix2x_component.evindex++;
|
||||
|
||||
/* check if any matching notifications have been cached - only nondefault
|
||||
* events will have been cached*/
|
||||
if (NULL == def) {
|
||||
/* check single code registrations */
|
||||
if (NULL != sing) {
|
||||
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
|
||||
if (sing->code == chain->status) {
|
||||
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
|
||||
chain->sing = sing;
|
||||
sing->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
OBJ_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else if (NULL != multi) {
|
||||
/* check for multi code registrations */
|
||||
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
|
||||
for (n=0; n < multi->ncodes; n++) {
|
||||
if (multi->codes[n] == chain->status) {
|
||||
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
|
||||
chain->multi = multi;
|
||||
multi->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
OBJ_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* convert the list of info to an array of pmix_info_t */
|
||||
if (NULL != cd->info) {
|
||||
op->ninfo = opal_list_get_size(cd->info);
|
||||
if (0 < op->ninfo) {
|
||||
PMIX_INFO_CREATE(op->info, op->ninfo);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
|
||||
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
|
||||
pmix2x_value_load(&op->info[n].value, kv);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* register the event */
|
||||
op->event = OBJ_NEW(opal_pmix2x_event_t);
|
||||
op->event->handler = cd->evhandler;
|
||||
opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super);
|
||||
PMIx_Register_event_handler(op->pcodes, op->ncodes,
|
||||
op->info, op->ninfo,
|
||||
pmix2x_event_hdlr, errreg_cbfunc, op);
|
||||
|
||||
OBJ_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
|
||||
static void register_handler(opal_list_t *event_codes,
|
||||
opal_list_t *info,
|
||||
opal_pmix_notification_fn_t evhandler,
|
||||
@ -1184,36 +1019,20 @@ static void register_handler(opal_list_t *event_codes,
|
||||
static void _dereg_hdlr(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
|
||||
opal_pmix2x_single_event_t *sing;
|
||||
opal_pmix2x_multi_event_t *multi;
|
||||
opal_pmix2x_default_event_t *def;
|
||||
opal_pmix2x_event_t *event;
|
||||
|
||||
/* check the single events first */
|
||||
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
|
||||
if (cd->handler == sing->index) {
|
||||
opal_list_remove_item(&mca_pmix_pmix2x_component.single_events, &sing->super);
|
||||
OBJ_RELEASE(sing);
|
||||
goto release;
|
||||
}
|
||||
}
|
||||
/* check multi events */
|
||||
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
|
||||
if (cd->handler == multi->index) {
|
||||
opal_list_remove_item(&mca_pmix_pmix2x_component.multi_events, &multi->super);
|
||||
OBJ_RELEASE(multi);
|
||||
goto release;
|
||||
}
|
||||
}
|
||||
/* check default events */
|
||||
OPAL_LIST_FOREACH(def, &mca_pmix_pmix2x_component.default_events, opal_pmix2x_default_event_t) {
|
||||
if (cd->handler == def->index) {
|
||||
opal_list_remove_item(&mca_pmix_pmix2x_component.default_events, &def->super);
|
||||
OBJ_RELEASE(def);
|
||||
/* look for this event */
|
||||
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
|
||||
if (cd->handler == event->index) {
|
||||
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
|
||||
OBJ_RELEASE(event);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* tell the library to deregister this handler */
|
||||
PMIx_Deregister_event_handler(cd->handler, NULL, NULL);
|
||||
|
||||
release:
|
||||
/* release the caller */
|
||||
if (NULL != cd->opcbfunc) {
|
||||
cd->opcbfunc(OPAL_SUCCESS, cd->cbdata);
|
||||
}
|
||||
@ -1230,90 +1049,81 @@ static void deregister_handler(size_t evhandler,
|
||||
return;
|
||||
}
|
||||
|
||||
static void _notify_event(int sd, short args, void *cbdata)
|
||||
static void notify_complete(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
|
||||
size_t i;
|
||||
opal_pmix2x_single_event_t *sing;
|
||||
opal_pmix2x_multi_event_t *multi;
|
||||
opal_pmix2x_default_event_t *def;
|
||||
opal_pmix2x_event_chain_t *chain;
|
||||
|
||||
/* check the single events first */
|
||||
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
|
||||
if (cd->status == sing->code) {
|
||||
/* found it - invoke the handler, pointing its
|
||||
* callback function to our progression function */
|
||||
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
|
||||
chain->status = cd->status;
|
||||
chain->range = pmix2x_convert_opalrange(cd->range);
|
||||
chain->source = *(cd->source);
|
||||
chain->info = cd->info;
|
||||
chain->final_cbfunc = cd->opcbfunc;
|
||||
chain->final_cbdata = cd->cbdata;
|
||||
chain->sing = sing;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"[%s] CALLING SINGLE EVHDLR FOR STATUS %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
|
||||
sing->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
OBJ_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
|
||||
if (NULL != op->opcbfunc) {
|
||||
op->opcbfunc(pmix2x_convert_rc(status), op->cbdata);
|
||||
}
|
||||
/* check multi events */
|
||||
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
|
||||
for (i=0; i < multi->ncodes; i++) {
|
||||
if (cd->status == multi->codes[i]) {
|
||||
/* found it - invoke the handler, pointing its
|
||||
* callback function to our progression function */
|
||||
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
|
||||
chain->status = cd->status;
|
||||
chain->range = pmix2x_convert_opalrange(cd->range);
|
||||
chain->source = *(cd->source);
|
||||
chain->info = cd->info;
|
||||
chain->final_cbfunc = cd->opcbfunc;
|
||||
chain->final_cbdata = cd->cbdata;
|
||||
chain->multi = multi;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"[%s] CALLING MULTI EVHDLR FOR STATUS %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
|
||||
multi->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
OBJ_RELEASE(cd);
|
||||
return;
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void _notify(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata;
|
||||
pmix2x_opcaddy_t *op;
|
||||
opal_value_t *kv;
|
||||
pmix_proc_t p, *pptr;
|
||||
pmix_status_t pstatus;
|
||||
size_t n;
|
||||
int rc=OPAL_SUCCESS;
|
||||
pmix_data_range_t prange;
|
||||
opal_pmix2x_jobid_trkr_t *job, *jptr;
|
||||
|
||||
op = OBJ_NEW(pmix2x_opcaddy_t);
|
||||
|
||||
/* convert the status */
|
||||
pstatus = pmix2x_convert_opalrc(cd->status);
|
||||
|
||||
/* convert the source */
|
||||
if (NULL == cd->source) {
|
||||
pptr = NULL;
|
||||
} else {
|
||||
/* look thru our list of jobids and find the
|
||||
* corresponding nspace */
|
||||
job = NULL;
|
||||
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
|
||||
if (jptr->jobid == cd->source->jobid) {
|
||||
job = jptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == job) {
|
||||
rc = OPAL_ERR_NOT_FOUND;
|
||||
goto release;
|
||||
}
|
||||
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||
p.rank = pmix2x_convert_opalrank(cd->source->vpid);
|
||||
pptr = &p;
|
||||
}
|
||||
|
||||
/* convert the range */
|
||||
prange = pmix2x_convert_opalrange(cd->range);
|
||||
|
||||
/* convert the list of info */
|
||||
if (NULL != cd->info) {
|
||||
op->ninfo = opal_list_get_size(cd->info);
|
||||
if (0 < op->ninfo) {
|
||||
PMIX_INFO_CREATE(op->info, op->ninfo);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
|
||||
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
|
||||
pmix2x_value_load(&op->info[n].value, kv);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* check default events */
|
||||
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
|
||||
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
|
||||
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
|
||||
chain->status = cd->status;
|
||||
chain->range = pmix2x_convert_opalrange(cd->range);
|
||||
chain->source = *(cd->source);
|
||||
chain->info = cd->info;
|
||||
chain->final_cbfunc = cd->opcbfunc;
|
||||
chain->final_cbdata = cd->cbdata;
|
||||
chain->def = def;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"[%s] CALLING DEFAULT EVHDLR FOR STATUS %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
|
||||
def->handler(chain->status, &chain->source,
|
||||
chain->info, &chain->results,
|
||||
progress_local_event_hdlr, (void*)chain);
|
||||
OBJ_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
|
||||
/* if we get here, then there are no registered event handlers */
|
||||
/* ask the library to notify our clients */
|
||||
pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op);
|
||||
rc = pmix2x_convert_rc(pstatus);
|
||||
|
||||
release:
|
||||
/* release the caller */
|
||||
if (NULL != cd->opcbfunc) {
|
||||
cd->opcbfunc(OPAL_ERR_NOT_FOUND, cd->cbdata);
|
||||
cd->opcbfunc(rc, cd->cbdata);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
return;
|
||||
}
|
||||
|
||||
static int notify_event(int status,
|
||||
@ -1324,7 +1134,7 @@ static int notify_event(int status,
|
||||
{
|
||||
/* we must threadshift this request as we might not be in an event
|
||||
* and we are going to access framework-global lists/objects */
|
||||
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify_event, cbfunc, cbdata);
|
||||
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1401,47 +1211,14 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(opal_pmix2x_single_event_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void mtevcon(opal_pmix2x_multi_event_t *p)
|
||||
static void evcon(opal_pmix2x_event_t *p)
|
||||
{
|
||||
p->codes = NULL;
|
||||
p->ncodes = 0;
|
||||
p->handler = NULL;
|
||||
p->cbdata = NULL;
|
||||
}
|
||||
static void mtevdes(opal_pmix2x_multi_event_t *p)
|
||||
{
|
||||
if (NULL != p->codes) {
|
||||
free(p->codes);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(opal_pmix2x_multi_event_t,
|
||||
OBJ_CLASS_INSTANCE(opal_pmix2x_event_t,
|
||||
opal_list_item_t,
|
||||
mtevcon, mtevdes);
|
||||
|
||||
OBJ_CLASS_INSTANCE(opal_pmix2x_default_event_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void chcon(opal_pmix2x_event_chain_t *p)
|
||||
{
|
||||
p->nondefault = false;
|
||||
p->info = NULL;
|
||||
OBJ_CONSTRUCT(&p->results, opal_list_t);
|
||||
p->sing = NULL;
|
||||
p->multi = NULL;
|
||||
p->def = NULL;
|
||||
p->final_cbfunc = NULL;
|
||||
p->final_cbdata = NULL;
|
||||
}
|
||||
static void chdes(opal_pmix2x_event_chain_t *p)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&p->results);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(opal_pmix2x_event_chain_t,
|
||||
opal_list_item_t,
|
||||
chcon, chdes);
|
||||
evcon, NULL);
|
||||
|
||||
static void opcon(pmix2x_opcaddy_t *p)
|
||||
{
|
||||
@ -1455,11 +1232,15 @@ static void opcon(pmix2x_opcaddy_t *p)
|
||||
p->apps = NULL;
|
||||
p->sz = 0;
|
||||
p->active = false;
|
||||
p->codes = NULL;
|
||||
p->pcodes = NULL;
|
||||
p->event = NULL;
|
||||
p->opcbfunc = NULL;
|
||||
p->mdxcbfunc = NULL;
|
||||
p->valcbfunc = NULL;
|
||||
p->lkcbfunc = NULL;
|
||||
p->spcbfunc = NULL;
|
||||
p->evregcbfunc = NULL;
|
||||
p->cbdata = NULL;
|
||||
}
|
||||
static void opdes(pmix2x_opcaddy_t *p)
|
||||
@ -1476,6 +1257,9 @@ static void opdes(pmix2x_opcaddy_t *p)
|
||||
if (NULL != p->apps) {
|
||||
PMIX_APP_FREE(p->apps, p->sz);
|
||||
}
|
||||
if (NULL != p->pcodes) {
|
||||
free(p->pcodes);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t,
|
||||
opal_object_t,
|
||||
@ -1513,12 +1297,17 @@ static void tscon(pmix2x_threadshift_t *p)
|
||||
p->source = NULL;
|
||||
p->event_codes = NULL;
|
||||
p->info = NULL;
|
||||
OBJ_CONSTRUCT(&p->results, opal_list_t);
|
||||
p->evhandler = NULL;
|
||||
p->nondefault = false;
|
||||
p->cbfunc = NULL;
|
||||
p->opcbfunc = NULL;
|
||||
p->cbdata = NULL;
|
||||
}
|
||||
static void tsdes(pmix2x_threadshift_t *p)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&p->results);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(pmix2x_threadshift_t,
|
||||
opal_object_t,
|
||||
tscon, NULL);
|
||||
tscon, tsdes);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
@ -39,9 +39,7 @@ typedef struct {
|
||||
opal_list_t jobids;
|
||||
bool native_launch;
|
||||
size_t evindex;
|
||||
opal_list_t single_events;
|
||||
opal_list_t multi_events;
|
||||
opal_list_t default_events;
|
||||
opal_list_t events;
|
||||
int cache_size;
|
||||
opal_list_t cache;
|
||||
} mca_pmix_pmix2x_component_t;
|
||||
@ -61,42 +59,10 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t);
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
size_t index;
|
||||
int code;
|
||||
opal_pmix_notification_fn_t handler;
|
||||
} opal_pmix2x_single_event_t;
|
||||
OBJ_CLASS_DECLARATION(opal_pmix2x_single_event_t);
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
size_t index;
|
||||
int *codes;
|
||||
size_t ncodes;
|
||||
opal_pmix_notification_fn_t handler;
|
||||
} opal_pmix2x_multi_event_t;
|
||||
OBJ_CLASS_DECLARATION(opal_pmix2x_multi_event_t);
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
size_t index;
|
||||
opal_pmix_notification_fn_t handler;
|
||||
} opal_pmix2x_default_event_t;
|
||||
OBJ_CLASS_DECLARATION(opal_pmix2x_default_event_t);
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
int status;
|
||||
bool nondefault;
|
||||
opal_process_name_t source;
|
||||
pmix_data_range_t range;
|
||||
opal_list_t *info;
|
||||
opal_list_t results;
|
||||
opal_pmix2x_single_event_t *sing;
|
||||
opal_pmix2x_multi_event_t *multi;
|
||||
opal_pmix2x_default_event_t *def;
|
||||
opal_pmix_op_cbfunc_t final_cbfunc;
|
||||
void *final_cbdata;
|
||||
} opal_pmix2x_event_chain_t;
|
||||
OBJ_CLASS_DECLARATION(opal_pmix2x_event_chain_t);
|
||||
void *cbdata;
|
||||
} opal_pmix2x_event_t;
|
||||
OBJ_CLASS_DECLARATION(opal_pmix2x_event_t);
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
@ -111,11 +77,16 @@ typedef struct {
|
||||
pmix_app_t *apps;
|
||||
size_t sz;
|
||||
volatile bool active;
|
||||
opal_list_t *codes;
|
||||
pmix_status_t *pcodes;
|
||||
size_t ncodes;
|
||||
opal_pmix2x_event_t *event;
|
||||
opal_pmix_op_cbfunc_t opcbfunc;
|
||||
opal_pmix_modex_cbfunc_t mdxcbfunc;
|
||||
opal_pmix_value_cbfunc_t valcbfunc;
|
||||
opal_pmix_lookup_cbfunc_t lkcbfunc;
|
||||
opal_pmix_spawn_cbfunc_t spcbfunc;
|
||||
opal_pmix_evhandler_reg_cbfunc_t evregcbfunc;
|
||||
void *cbdata;
|
||||
} pmix2x_opcaddy_t;
|
||||
OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t);
|
||||
@ -152,28 +123,15 @@ typedef struct {
|
||||
size_t handler;
|
||||
opal_list_t *event_codes;
|
||||
opal_list_t *info;
|
||||
opal_list_t results;
|
||||
opal_pmix_notification_fn_t evhandler;
|
||||
opal_pmix_evhandler_reg_cbfunc_t cbfunc;
|
||||
opal_pmix_op_cbfunc_t opcbfunc;
|
||||
pmix_event_notification_cbfunc_fn_t pmixcbfunc;
|
||||
void *cbdata;
|
||||
} pmix2x_threadshift_t;
|
||||
OBJ_CLASS_DECLARATION(pmix2x_threadshift_t);
|
||||
|
||||
#define OPAL_PMIX_OPCD_THREADSHIFT(i, s, sr, if, nif, fn, cb, cd) \
|
||||
do { \
|
||||
pmix2x_opalcaddy_t *_cd; \
|
||||
_cd = OBJ_NEW(pmix2x_opalcaddy_t); \
|
||||
_cd->id = (i); \
|
||||
_cd->status = (s); \
|
||||
_cd->source = (sr); \
|
||||
_cd->info = (i); \
|
||||
_cd->evcbfunc = (cb); \
|
||||
_cd->cbdata = (cd); \
|
||||
event_assign(&((_cd)->ev), opal_pmix_base.evbase, \
|
||||
-1, EV_WRITE, (fn), (_cd)); \
|
||||
event_active(&((_cd)->ev), EV_WRITE, 1); \
|
||||
} while(0)
|
||||
|
||||
#define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \
|
||||
do { \
|
||||
pmix2x_threadshift_t *_cd; \
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
|
||||
@ -36,7 +36,6 @@
|
||||
|
||||
static pmix_proc_t my_proc;
|
||||
static char *dbgvalue=NULL;
|
||||
static size_t errhdler_ref = 0;
|
||||
|
||||
#define PMIX_WAIT_FOR_COMPLETION(a) \
|
||||
do { \
|
||||
@ -50,7 +49,9 @@ static void errreg_cbfunc (pmix_status_t status,
|
||||
size_t errhandler_ref,
|
||||
void *cbdata)
|
||||
{
|
||||
errhdler_ref = errhandler_ref;
|
||||
opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata;
|
||||
|
||||
event->index = errhandler_ref;
|
||||
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
|
||||
"PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu",
|
||||
status, (unsigned long)errhandler_ref);
|
||||
@ -62,6 +63,7 @@ int pmix2x_client_init(void)
|
||||
pmix_status_t rc;
|
||||
int dbg;
|
||||
opal_pmix2x_jobid_trkr_t *job;
|
||||
opal_pmix2x_event_t *event;
|
||||
|
||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||
"PMIx_client init");
|
||||
@ -98,7 +100,9 @@ int pmix2x_client_init(void)
|
||||
opal_proc_set_name(&pname);
|
||||
|
||||
/* register the default event handler */
|
||||
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, NULL);
|
||||
event = OBJ_NEW(opal_pmix2x_event_t);
|
||||
opal_list_append(&mca_pmix_pmix2x_component.events, &event->super);
|
||||
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event);
|
||||
return OPAL_SUCCESS;
|
||||
|
||||
}
|
||||
@ -106,12 +110,16 @@ int pmix2x_client_init(void)
|
||||
int pmix2x_client_finalize(void)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
opal_pmix2x_event_t *event;
|
||||
|
||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||
"PMIx_client finalize");
|
||||
|
||||
/* deregister the default event handler */
|
||||
PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL);
|
||||
/* deregister all event handlers */
|
||||
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
|
||||
PMIx_Deregister_event_handler(event->index, NULL, NULL);
|
||||
}
|
||||
/* the list will be destructed when the component is finalized */
|
||||
|
||||
rc = PMIx_Finalize(NULL, 0);
|
||||
return pmix2x_convert_rc(rc);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
||||
@ -36,7 +36,6 @@ const char *opal_pmix_pmix2x_component_version_string =
|
||||
static int external_open(void);
|
||||
static int external_close(void);
|
||||
static int external_component_query(mca_base_module_t **module, int *priority);
|
||||
static int external_register(void);
|
||||
|
||||
|
||||
/*
|
||||
@ -66,7 +65,6 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
|
||||
.mca_open_component = external_open,
|
||||
.mca_close_component = external_close,
|
||||
.mca_query_component = external_component_query,
|
||||
.mca_register_component_params = external_register,
|
||||
},
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
.base_data = {
|
||||
@ -77,27 +75,11 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
|
||||
.native_launch = false
|
||||
};
|
||||
|
||||
static int external_register(void)
|
||||
{
|
||||
mca_pmix_pmix2x_component.cache_size = 256;
|
||||
mca_base_component_var_register(&mca_pmix_pmix2x_component.super.base_version,
|
||||
"cache_size", "Size of the ring buffer cache for events",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_CONSTANT,
|
||||
&mca_pmix_pmix2x_component.cache_size);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int external_open(void)
|
||||
{
|
||||
mca_pmix_pmix2x_component.evindex = 0;
|
||||
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.jobids, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.single_events, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.multi_events, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.default_events, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.cache, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.events, opal_list_t);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -105,10 +87,7 @@ static int external_open(void)
|
||||
static int external_close(void)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.jobids);
|
||||
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.single_events);
|
||||
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.multi_events);
|
||||
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.default_events);
|
||||
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.cache);
|
||||
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.events);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -88,6 +88,7 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
|
||||
|
||||
/* information about relative ranks as assigned by the RM */
|
||||
#define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier
|
||||
#define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
|
||||
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler
|
||||
#define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
|
||||
@ -117,6 +118,8 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace
|
||||
#define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and
|
||||
// thus cannot be prefixed with "pmix"
|
||||
#define OPAL_PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon
|
||||
#define OPAL_PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes
|
||||
|
||||
/* size info */
|
||||
#define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
|
||||
@ -220,6 +223,8 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform"
|
||||
#define OPAL_PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
|
||||
#define OPAL_PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
|
||||
#define OPAL_PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
|
||||
#define OPAL_PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
|
||||
|
||||
/* log attributes */
|
||||
#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr
|
||||
|
@ -4,7 +4,7 @@
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Inria. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
@ -200,10 +200,9 @@ char* opal_get_proc_hostname(const opal_proc_t *proc)
|
||||
}
|
||||
|
||||
/* if we don't already have it, then try to get it */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name,
|
||||
(char**)&(proc->proc_hostname), OPAL_STRING);
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name,
|
||||
(char**)&(proc->proc_hostname), OPAL_STRING);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
OPAL_ERROR_LOG(ret);
|
||||
return "unknown"; // return something so the caller doesn't segfault
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -71,7 +72,7 @@ static int dfs_app_close(void)
|
||||
|
||||
static int dfs_app_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
if (ORTE_PROC_IS_APP && orte_staged_execution) {
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
/* set our priority high as we are the default for apps */
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t *)&orte_dfs_app_module;
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -89,7 +89,6 @@ static int rte_init(void)
|
||||
char *envar, *ev1, *ev2;
|
||||
uint64_t unique_key[2];
|
||||
char *string_key;
|
||||
char *rmluri;
|
||||
opal_value_t *kv;
|
||||
char *val;
|
||||
int u32, *u32ptr;
|
||||
@ -399,19 +398,9 @@ static int rte_init(void)
|
||||
orte_process_info.max_procs = orte_process_info.num_procs;
|
||||
}
|
||||
|
||||
/*** PUSH DATA FOR OTHERS TO FIND ***/
|
||||
|
||||
/* push our RML URI in case others need to talk directly to us */
|
||||
rmluri = orte_rml.get_contact_info();
|
||||
/* push it out for others to use */
|
||||
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
error = "pmix put uri";
|
||||
goto error;
|
||||
}
|
||||
free(rmluri);
|
||||
|
||||
/* push our hostname so others can find us, if they need to */
|
||||
/* push our hostname so others can find us, if they need to - the
|
||||
* native PMIx component will ignore this request as the hostname
|
||||
* is provided by the system */
|
||||
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
error = "db store hostname";
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
@ -336,13 +336,6 @@ static int rte_init(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* push our hostname so others can find us, if they need to */
|
||||
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
error = "db store hostname";
|
||||
goto error;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
|
@ -1341,9 +1341,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
opal_argv_append(argc, argv, "1");
|
||||
}
|
||||
|
||||
if (orte_map_reduce) {
|
||||
opal_argv_append(argc, argv, "--mapreduce");
|
||||
}
|
||||
if (orte_map_stddiag_to_stderr) {
|
||||
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
|
||||
opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr");
|
||||
|
@ -1101,11 +1101,6 @@ static int setup_child(orte_job_t *jdata,
|
||||
opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, &app->env);
|
||||
}
|
||||
|
||||
/* if we are using staged execution, tell it */
|
||||
if (orte_staged_execution) {
|
||||
opal_setenv("OMPI_MCA_orte_staged_execution", "1", true, &app->env);
|
||||
}
|
||||
|
||||
/* if the proc isn't going to forward IO, then we need to flag that
|
||||
* it has "completed" iof termination as otherwise it will never fire
|
||||
*/
|
||||
|
@ -126,7 +126,6 @@ static struct {
|
||||
int uri_pipe;
|
||||
int singleton_died_pipe;
|
||||
bool abort;
|
||||
bool mapreduce;
|
||||
bool tree_spawn;
|
||||
char *hnp_topo_sig;
|
||||
bool test_suicide;
|
||||
@ -217,10 +216,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
|
||||
&orted_globals.hnp_topo_sig, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Topology signature of HNP" },
|
||||
|
||||
{ NULL, '\0', "mapreduce", "mapreduce", 0,
|
||||
&orted_globals.mapreduce, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Whether to report process bindings to stderr" },
|
||||
|
||||
/* End of list */
|
||||
{ NULL, '\0', NULL, NULL, 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
||||
@ -335,11 +330,6 @@ int orte_daemon(int argc, char *argv[])
|
||||
free(tmp_env_var);
|
||||
#endif
|
||||
|
||||
/* if mapreduce set, flag it */
|
||||
if (orted_globals.mapreduce) {
|
||||
orte_map_reduce = true;
|
||||
}
|
||||
|
||||
/* detach from controlling terminal
|
||||
* otherwise, remain attached so output can get to us
|
||||
*/
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -137,6 +137,9 @@ static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jar
|
||||
static int parse_appfile(orte_job_t *jdata, char *filename, char ***env);
|
||||
static void orte_timeout_wakeup(int sd, short args, void *cbdata);
|
||||
static void orte_profile_wakeup(int sd, short args, void *cbdata);
|
||||
static void profile_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer, orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
static void launch_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
@ -896,20 +899,6 @@ int orte_submit_job(char *argv[], int *index,
|
||||
}
|
||||
}
|
||||
|
||||
/* check for debugger test envars and forward them if necessary */
|
||||
if (NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
|
||||
char *evar;
|
||||
evar = getenv("ORTE_TEST_DEBUGGER_SLEEP");
|
||||
for (i=0; i < (orte_app_idx_t)jdata->num_apps; i++) {
|
||||
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||
opal_setenv("ORTE_TEST_DEBUGGER_ATTACH", "1", true, &app->env);
|
||||
if (NULL != evar) {
|
||||
opal_setenv("ORTE_TEST_DEBUGGER_SLEEP", evar, true, &app->env);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for suicide test directives */
|
||||
if (NULL != getenv("ORTE_TEST_HNP_SUICIDE") ||
|
||||
NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) {
|
||||
@ -956,6 +945,8 @@ int orte_submit_job(char *argv[], int *index,
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
//goto DONE;
|
||||
}
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
|
||||
ORTE_RML_PERSISTENT, profile_recv, NULL);
|
||||
orte_memprofile_timeout->tv.tv_sec = timeout_seconds;
|
||||
orte_memprofile_timeout->tv.tv_usec = 0;
|
||||
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
|
||||
@ -2212,10 +2203,9 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
|
||||
|
||||
static bool mpir_breakpoint_fired = false;
|
||||
|
||||
static void _send_notification(void)
|
||||
static void _send_notification(int status)
|
||||
{
|
||||
opal_buffer_t buf;
|
||||
int status = OPAL_ERR_DEBUGGER_RELEASE;
|
||||
orte_grpcomm_signature_t sig;
|
||||
int rc;
|
||||
opal_value_t kv, *kvptr;
|
||||
@ -2448,7 +2438,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
"%s NOTIFYING DEBUGGER RELEASE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* notify all procs that the debugger is ready */
|
||||
_send_notification();
|
||||
_send_notification(OPAL_ERR_DEBUGGER_RELEASE);
|
||||
}
|
||||
}
|
||||
return;
|
||||
@ -2547,7 +2537,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
"%s NOTIFYING DEBUGGER RELEASE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* notify all procs that the debugger is ready */
|
||||
_send_notification();
|
||||
_send_notification(OPAL_ERR_DEBUGGER_RELEASE);
|
||||
} else if (!orte_debugger_test_attach) {
|
||||
/* if I am launching debugger daemons, then I need to do so now
|
||||
* that the job has been started and I know which nodes have
|
||||
@ -3133,6 +3123,16 @@ void orte_timeout_wakeup(int sd, short args, void *cbdata)
|
||||
|
||||
static int nreports = 0;
|
||||
static orte_timer_t profile_timer;
|
||||
static int nchecks = 0;
|
||||
|
||||
static void profile_timeout(int sd, short args, void *cbdata)
|
||||
{
|
||||
/* abort the job */
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
|
||||
/* set the global abnormal exit flag */
|
||||
orte_abnormal_term_ordered = true;
|
||||
}
|
||||
|
||||
|
||||
static void profile_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer, orte_rml_tag_t tag,
|
||||
@ -3167,26 +3167,33 @@ static void profile_recv(int status, orte_process_name_t* sender,
|
||||
done:
|
||||
--nreports;
|
||||
if (nreports == 0) {
|
||||
++nchecks;
|
||||
/* cancel the timeout */
|
||||
OBJ_DESTRUCT(&profile_timer);
|
||||
/* abort the job */
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
|
||||
/* set the global abnormal exit flag */
|
||||
orte_abnormal_term_ordered = true;
|
||||
/* notify to release */
|
||||
_send_notification(12345);
|
||||
/* if this was the first measurement, then we need to
|
||||
* let the probe move along */
|
||||
if (2 > nchecks) {
|
||||
/* reset the event */
|
||||
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
|
||||
orte_profile_wakeup, NULL);
|
||||
opal_event_set_priority(orte_memprofile_timeout->ev, ORTE_ERROR_PRI);
|
||||
opal_event_evtimer_add(orte_memprofile_timeout->ev, &orte_memprofile_timeout->tv);
|
||||
/* reset the timer */
|
||||
OBJ_CONSTRUCT(&profile_timer, orte_timer_t);
|
||||
opal_event_evtimer_set(orte_event_base,
|
||||
profile_timer.ev, profile_timeout, NULL);
|
||||
opal_event_set_priority(profile_timer.ev, ORTE_ERROR_PRI);
|
||||
profile_timer.tv.tv_sec = 30;
|
||||
opal_event_evtimer_add(profile_timer.ev, &profile_timer.tv);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void profile_timeout(int sd, short args, void *cbdata)
|
||||
{
|
||||
/* abort the job */
|
||||
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
|
||||
/* set the global abnormal exit flag */
|
||||
orte_abnormal_term_ordered = true;
|
||||
}
|
||||
|
||||
void orte_profile_wakeup(int sd, short args, void *cbdata)
|
||||
{
|
||||
orte_job_t *jdata = (orte_job_t*)cbdata;
|
||||
orte_job_t *dmns;
|
||||
orte_proc_t *dmn;
|
||||
int i;
|
||||
@ -3202,8 +3209,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
|
||||
|
||||
/* set the recv */
|
||||
nreports = 1; // always get a report from ourselves
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
|
||||
ORTE_RML_PERSISTENT, profile_recv, NULL);
|
||||
|
||||
/* setup the buffer */
|
||||
buffer = OBJ_NEW(opal_buffer_t);
|
||||
@ -3213,13 +3218,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
|
||||
OBJ_RELEASE(buffer);
|
||||
goto giveup;
|
||||
}
|
||||
/* pack the jobid in question */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &jdata->jobid, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buffer);
|
||||
goto giveup;
|
||||
}
|
||||
|
||||
/* goes to just the first daemon beyond ourselves - no need to get it from everyone */
|
||||
dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||
if (NULL != (dmn = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, 1))) {
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -35,6 +35,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/pstat/pstat.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rmaps/rmaps_types.h"
|
||||
@ -449,13 +450,18 @@ static void _query(int sd, short args, void *cbdata)
|
||||
opal_pmix_query_t *q;
|
||||
opal_value_t *kv;
|
||||
orte_job_t *jdata;
|
||||
int rc;
|
||||
opal_list_t *results;
|
||||
orte_proc_t *proct;
|
||||
int rc, i, num_replies;
|
||||
opal_list_t *results, targets, *array;
|
||||
size_t n;
|
||||
uint32_t key;
|
||||
void *nptr;
|
||||
char **nspaces=NULL, nspace[512];
|
||||
char **ans = NULL;
|
||||
bool local_only;
|
||||
orte_namelist_t *nm;
|
||||
opal_pstats_t pstat;
|
||||
float pss;
|
||||
|
||||
opal_output_verbose(2, orte_pmix_server_globals.output,
|
||||
"%s processing query",
|
||||
@ -522,6 +528,84 @@ static void _query(int sd, short args, void *cbdata)
|
||||
opal_list_append(results, &kv->super);
|
||||
opal_argv_free(ans);
|
||||
ans = NULL;
|
||||
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_QUERY_MEMORY_USAGE)) {
|
||||
/* check the qualifiers to find the procs they want to
|
||||
* know about - if qualifiers are NULL, then get it for
|
||||
* the daemons + all active jobs */
|
||||
if (0 == opal_list_get_size(&q->qualifiers)) {
|
||||
/* create a request tracker */
|
||||
/* xcast a request for all memory usage */
|
||||
/* return success - the callback will be done
|
||||
* once we get the results */
|
||||
return;
|
||||
}
|
||||
|
||||
/* scan the qualifiers */
|
||||
OPAL_LIST_FOREACH(kv, &q->qualifiers, opal_value_t) {
|
||||
if (0 == strcmp(kv->key, OPAL_PMIX_QUERY_LOCAL_ONLY)) {
|
||||
if (OPAL_UNDEF == kv->type || kv->data.flag) {
|
||||
local_only = true;
|
||||
} else {
|
||||
local_only = false;
|
||||
}
|
||||
} else if (0 == strcmp(kv->key, OPAL_PMIX_PROCID)) {
|
||||
/* save this directive on our list of targets */
|
||||
nm = OBJ_NEW(orte_namelist_t);
|
||||
}
|
||||
}
|
||||
|
||||
/* if they have asked for only our local procs or daemon,
|
||||
* then we can just get the data directly */
|
||||
if (local_only) {
|
||||
if (0 == opal_list_get_size(&targets)) {
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_QUERY_MEMORY_USAGE);
|
||||
kv->type = OPAL_PTR;
|
||||
array = OBJ_NEW(opal_list_t);
|
||||
kv->data.ptr = array;
|
||||
opal_list_append(results, &kv->super);
|
||||
/* collect my memory usage */
|
||||
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
|
||||
opal_pstat.query(orte_process_info.pid, &pstat, NULL);
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_DAEMON_MEMORY);
|
||||
kv->type = OPAL_FLOAT;
|
||||
kv->data.fval = pstat.pss;
|
||||
opal_list_append(array, &kv->super);
|
||||
OBJ_DESTRUCT(&pstat);
|
||||
/* collect the memory usage of all my children */
|
||||
pss = 0.0;
|
||||
num_replies = 0;
|
||||
for (i=0; i < orte_local_children->size; i++) {
|
||||
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
|
||||
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
|
||||
/* collect the stats on this proc */
|
||||
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
|
||||
if (OPAL_SUCCESS == opal_pstat.query(proct->pid, &pstat, NULL)) {
|
||||
pss += pstat.pss;
|
||||
++num_replies;
|
||||
}
|
||||
OBJ_DESTRUCT(&pstat);
|
||||
}
|
||||
}
|
||||
/* compute the average value */
|
||||
if (0 < num_replies) {
|
||||
pss /= (float)num_replies;
|
||||
}
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_CLIENT_AVG_MEMORY);
|
||||
kv->type = OPAL_FLOAT;
|
||||
kv->data.fval = pss;
|
||||
opal_list_append(array, &kv->super);
|
||||
} else {
|
||||
|
||||
}
|
||||
} else {
|
||||
/* if they want it for remote procs, see who is hosting them
|
||||
* and ask directly for the info - if rank=wildcard, then
|
||||
* we need to xcast the request and collect the results */
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
@ -407,6 +407,14 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
kv->type = OPAL_UINT32;
|
||||
kv->data.uint32 = pptr->node->index;
|
||||
opal_list_append(pmap, &kv->super);
|
||||
|
||||
if (map->num_nodes < orte_hostname_cutoff) {
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_HOSTNAME);
|
||||
kv->type = OPAL_STRING;
|
||||
kv->data.string = strdup(pptr->node->name);
|
||||
opal_list_append(pmap, &kv->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -86,6 +86,7 @@ bool orte_have_fqdn_allocation = false;
|
||||
bool orte_show_resolved_nodenames = false;
|
||||
bool orte_retain_aliases = false;
|
||||
int orte_use_hostname_alias = -1;
|
||||
int orte_hostname_cutoff = 1000;
|
||||
|
||||
int orted_debug_failure = -1;
|
||||
int orted_debug_failure_delay = -1;
|
||||
@ -183,10 +184,6 @@ int orte_stat_history_size = -1;
|
||||
/* envars to forward */
|
||||
char **orte_forwarded_envars = NULL;
|
||||
|
||||
/* map-reduce mode */
|
||||
bool orte_map_reduce = false;
|
||||
bool orte_staged_execution = false;
|
||||
|
||||
/* map stddiag output to stderr so it isn't forwarded to mpirun */
|
||||
bool orte_map_stddiag_to_stderr = false;
|
||||
|
||||
@ -196,9 +193,6 @@ int orte_max_vm_size = -1;
|
||||
/* user debugger */
|
||||
char *orte_base_user_debugger = NULL;
|
||||
|
||||
/* modex cutoff */
|
||||
uint32_t orte_direct_modex_cutoff = UINT32_MAX;
|
||||
|
||||
int orte_debug_output = -1;
|
||||
bool orte_debug_daemons_flag = false;
|
||||
bool orte_xml_output = false;
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -466,6 +466,7 @@ ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
|
||||
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
|
||||
ORTE_DECLSPEC extern bool orte_retain_aliases;
|
||||
ORTE_DECLSPEC extern int orte_use_hostname_alias;
|
||||
ORTE_DECLSPEC extern int orte_hostname_cutoff;
|
||||
|
||||
/* debug flags */
|
||||
ORTE_DECLSPEC extern int orted_debug_failure;
|
||||
@ -564,10 +565,6 @@ ORTE_DECLSPEC extern int orte_stat_history_size;
|
||||
/* envars to forward */
|
||||
ORTE_DECLSPEC extern char **orte_forwarded_envars;
|
||||
|
||||
/* map-reduce mode */
|
||||
ORTE_DECLSPEC extern bool orte_map_reduce;
|
||||
ORTE_DECLSPEC extern bool orte_staged_execution;
|
||||
|
||||
/* map stddiag output to stderr so it isn't forwarded to mpirun */
|
||||
ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr;
|
||||
|
||||
@ -582,9 +579,6 @@ ORTE_DECLSPEC extern char *orte_base_user_debugger;
|
||||
*/
|
||||
ORTE_DECLSPEC extern char *orte_daemon_cores;
|
||||
|
||||
/* cutoff for collective modex */
|
||||
ORTE_DECLSPEC extern uint32_t orte_direct_modex_cutoff;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -429,6 +429,13 @@ int orte_register_params(void)
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_retain_aliases);
|
||||
|
||||
orte_hostname_cutoff = 1000;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_cutoff",
|
||||
"Pass hostnames to all procs when #nodes is less than cutoff [default:1000]",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_hostname_cutoff);
|
||||
|
||||
/* which alias to use in MPIR_proctab */
|
||||
orte_use_hostname_alias = 1;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index",
|
||||
@ -659,13 +666,6 @@ int orte_register_params(void)
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_allowed_exit_without_sync);
|
||||
|
||||
orte_staged_execution = false;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "staged_execution",
|
||||
"Staged execution is being used",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_staged_execution);
|
||||
|
||||
orte_report_child_jobs_separately = false;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately",
|
||||
"Return the exit status of the primary job only",
|
||||
@ -754,17 +754,6 @@ int orte_register_params(void)
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_daemon_cores);
|
||||
|
||||
/* cutoff for full modex */
|
||||
orte_direct_modex_cutoff = UINT32_MAX;
|
||||
id = mca_base_var_register ("orte", "orte", NULL, "direct_modex_cutoff",
|
||||
"If the number of processes in the application exceeds the provided value,"
|
||||
"modex will be done upon demand [default: UINT32_MAX]",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_direct_modex_cutoff);
|
||||
/* register a synonym for old name */
|
||||
mca_base_var_register_synonym (id, "ompi", "ompi", "hostname", "cutoff", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
|
||||
|
||||
/* get the conduit params */
|
||||
orte_coll_transport = "fabric,ethernet";
|
||||
(void) mca_base_var_register("orte", "orte", "coll", "transports",
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user