1
1
Revamp the event notification integration to rely on the PMIx event chaining and remove the duplicate chaining in OPAL. This ensures we get system-level events that target non-default handlers.

Restore the hostname entries for MPI-level error messages, but provide an MCA param (orte_hostname_cutoff) to remove them for large clusters where the memory footprint is problematic. Set the default at 1000 nodes in the job (not the allocation).

Begin first cut at memory profiler

Some minor cleanups of memprobe

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2016-12-31 07:37:24 -08:00
родитель 5f68d655d6
Коммит 9eab9a1ed3
25 изменённых файлов: 554 добавлений и 668 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -111,6 +111,7 @@ contrib/platform/intel/bend/*orcm*
contrib/scaling/orte_no_op
contrib/scaling/mpi_no_op
contrib/scaling/mpi_barrier
contrib/scaling/mpi_memprobe
examples/hello_c
examples/hello_cxx

Просмотреть файл

@ -1,4 +1,4 @@
PROGS = orte_no_op mpi_no_op
PROGS = orte_no_op mpi_no_op mpi_memprobe
all: $(PROGS)
@ -10,5 +10,8 @@ orte_no_op:
mpi_no_op:
mpicc -o mpi_no_op mpi_no_op.c
mpi_memprobe:
mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal
clean:
rm -f $(PROGS) *~

98
contrib/scaling/mpi_memprobe.c Обычный файл
Просмотреть файл

@ -0,0 +1,98 @@
/* -*- C -*-
*
* $HEADER$
*
* The most basic of MPI applications
*/
#include "orte_config.h"
#include <stdio.h>
#include "mpi.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/runtime/runtime.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
static volatile int active;
static volatile bool wait_for_release = true;
#define MEMPROBE_RELEASE 12345
static void _release_fn(int status,
const opal_process_name_t *source,
opal_list_t *info, opal_list_t *results,
opal_pmix_notification_complete_fn_t cbfunc,
void *cbdata)
{
/* must let the notifier know we are done */
if (NULL != cbfunc) {
cbfunc(0, NULL, NULL, NULL, cbdata);
}
/* flag that the debugger is complete so we can exit */
wait_for_release = false;
}
static void _register_fn(int status,
size_t evhandler_ref,
void *cbdata)
{
volatile int *active = (volatile int*)cbdata;
if (0 != status) {
fprintf(stderr, "Client EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
status, (unsigned long)evhandler_ref);
}
*active = status;
}
int main(int argc, char* argv[])
{
int rank, size;
opal_list_t *codes;
opal_value_t *kv;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (0 == rank) {
fprintf(stderr, "Sampling memory usage after MPI_Init\n");
}
codes = OBJ_NEW(opal_list_t);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup("errorcode");
kv->type = OPAL_INT;
kv->data.integer = MEMPROBE_RELEASE;
opal_list_append(codes, &kv->super);
active = -1;
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active);
while (-1 == active) {
usleep(10);
}
/* now wait for notification */
while (wait_for_release) {
usleep(10);
}
wait_for_release = true;
/* perform a barrier so some communication will occur, thus
* requiring exchange of endpoint info */
MPI_Barrier(MPI_COMM_WORLD);
if (0 == rank) {
fprintf(stderr, "\n\nSampling memory usage after MPI_Barrier\n");
}
/* wait again while memory is sampled */
while (wait_for_release) {
usleep(10);
}
MPI_Finalize();
return 0;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -157,6 +157,7 @@ typedef uint32_t pmix_rank_t;
#define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */
#define PMIX_PROCID "pmix.procid" // (pmix_proc_t) process identifier
#define PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@ -282,6 +283,8 @@ typedef uint32_t pmix_rank_t;
#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform
#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
/* log attributes */
#define PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -553,11 +553,18 @@ static void _putfn(int sd, short args, void *cbdata)
{
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
pmix_status_t rc;
pmix_kval_t *kv;
pmix_kval_t *kv = NULL;
pmix_nspace_t *ns;
uint8_t *tmp;
size_t len;
/* no need to push info that starts with "pmix" as that is
* info we would have been provided at startup */
if (0 == strncmp(cb->key, "pmix", 4)) {
rc = PMIX_SUCCESS;
goto done;
}
/* setup to xfer the data */
kv = PMIX_NEW(pmix_kval_t);
kv->key = strdup(cb->key); // need to copy as the input belongs to the user
@ -622,7 +629,9 @@ static void _putfn(int sd, short args, void *cbdata)
}
done:
PMIX_RELEASE(kv); // maintain accounting
if (NULL != kv) {
PMIX_RELEASE(kv); // maintain accounting
}
cb->pstatus = rc;
cb->active = false;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -763,7 +763,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
* us to attempt to retrieve it from the server */
for (n=0; n < cb->ninfo; n++) {
if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) &&
cb->info[n].value.data.flag) {
(PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) {
/* they don't want us to try and retrieve it */
pmix_output_verbose(2, pmix_globals.debug_output,
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -356,6 +356,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
sing->code = cd->codes[0];
index = pmix_globals.events.nhdlrs;
sing->index = index;
sing->evhdlr = cd->evhdlr;
++pmix_globals.events.nhdlrs;
sing->cbobject = cbobject;
rc = _add_hdlr(&pmix_globals.events.single_events, &sing->super,
@ -365,17 +366,17 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
PMIX_ERR_WOULD_BLOCK != rc) {
/* unable to register */
--pmix_globals.events.nhdlrs;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
goto ack;
}
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */
PMIX_RELEASE(cd);
return;
}
goto ack;
}
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */
PMIX_RELEASE(cd);
return;
}
goto ack;
}
/* must be a multi-code registration */
multi = PMIX_NEW(pmix_multi_event_t);
@ -387,6 +388,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
memcpy(multi->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t));
index = pmix_globals.events.nhdlrs;
multi->index = index;
multi->evhdlr = cd->evhdlr;
++pmix_globals.events.nhdlrs;
multi->cbobject = cbobject;
rc = _add_hdlr(&pmix_globals.events.multi_events, &multi->super,
@ -396,9 +398,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
PMIX_ERR_WOULD_BLOCK != rc) {
/* unable to register */
--pmix_globals.events.nhdlrs;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
goto ack;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
goto ack;
}
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@ -145,121 +145,52 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace)
opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super);
}
static void completion_handler(int status, void *cbdata)
static void event_hdlr_complete(pmix_status_t status, void *cbdata)
{
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)cbdata;
if (NULL != chain->info) {
OPAL_LIST_RELEASE(chain->info);
}
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
OBJ_RELEASE(op);
}
static void progress_local_event_hdlr(int status,
opal_list_t *results,
opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
void *notification_cbdata)
static void return_local_event_hdlr(int status, opal_list_t *results,
opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
void *notification_cbdata)
{
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)notification_cbdata;
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)notification_cbdata;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
pmix_status_t pstatus;
size_t n;
opal_list_item_t *nxt;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
/* if the caller indicates that the chain is completed, then stop here */
if (OPAL_ERR_HANDLERS_COMPLETE == status) {
goto complete;
}
if (NULL != cd->pmixcbfunc) {
op = OBJ_NEW(pmix2x_opcaddy_t);
/* if any results were provided, then add them here */
if (NULL != results) {
while (NULL != (nxt = opal_list_remove_first(results))) {
opal_list_append(results, nxt);
}
}
/* see if we need to continue, starting with the single code events */
if (NULL != chain->sing) {
/* the last handler was for a single code - see if there are
* any others that match this event */
while (opal_list_get_end(&mca_pmix_pmix2x_component.single_events) != (nxt = opal_list_get_next(&chain->sing->super))) {
sing = (opal_pmix2x_single_event_t*)nxt;
if (sing->code == chain->status) {
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING SINGLE EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
goto complete;
}
}
/* if we get here, then there are no more single code
* events that match */
chain->sing = NULL;
/* pickup the beginning of the multi-code event list */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.multi_events)) {
chain->multi = (opal_pmix2x_multi_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.multi_events);
}
}
/* see if we need to continue with the multi code events */
if (NULL != chain->multi) {
while (opal_list_get_end(&mca_pmix_pmix2x_component.multi_events) != (nxt = opal_list_get_next(&chain->multi->super))) {
multi = (opal_pmix2x_multi_event_t*)nxt;
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
goto complete;
if (NULL != results) {
/* convert the list of results to an array of info */
op->ninfo = opal_list_get_size(results);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
}
}
}
/* if we get here, then there are no more multi-mode
* events that match */
chain->multi = NULL;
/* pickup the beginning of the default event list */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
chain->def = (opal_pmix2x_default_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.default_events);
}
/* convert the status */
pstatus = pmix2x_convert_opalrc(status);
/* call the library's callback function */
cd->pmixcbfunc(pstatus, op->info, op->ninfo, event_hdlr_complete, op, cd->cbdata);
}
/* if they didn't want it to go to a default handler, then we are done */
if (chain->nondefault) {
goto complete;
/* release the threadshift object */
if (NULL != cd->info) {
OPAL_LIST_RELEASE(cd->info);
}
OBJ_RELEASE(cd);
if (NULL != chain->def) {
if (opal_list_get_end(&mca_pmix_pmix2x_component.default_events) != (nxt = opal_list_get_next(&chain->def->super))) {
def = (opal_pmix2x_default_event_t*)nxt;
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
}
}
complete:
/* we still have to call their final callback */
if (NULL != chain->final_cbfunc) {
chain->final_cbfunc(OPAL_SUCCESS, chain->final_cbdata);
}
/* maintain acctng */
OBJ_RELEASE(chain);
/* let the caller know that we are done with their callback */
/* release the caller */
if (NULL != cbfunc) {
cbfunc(OPAL_SUCCESS, thiscbdata);
}
@ -268,92 +199,29 @@ static void progress_local_event_hdlr(int status,
static void _event_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
size_t n;
opal_pmix2x_event_chain_t *chain;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix2x_event_t *event;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status);
"%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status);
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
/* point it at our final callback */
chain->final_cbfunc = completion_handler;
chain->final_cbdata = chain;
/* carry across provided info */
chain->status = cd->status;
chain->source = cd->pname;
chain->info = cd->info;
chain->nondefault = cd->nondefault;
/* cycle thru the single-event registrations first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (sing->code == chain->status) {
/* cycle thru the registrations */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (cd->id == event->index) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->sing = sing;
* callback function to our callback function */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING SINGLE EVHDLR",
"%s _EVENT_HDLR CALLING EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
event->handler(cd->status, &cd->pname,
cd->info, &cd->results,
return_local_event_hdlr, (void*)cd);
return;
}
}
/* if we didn't find any match in the single-event registrations,
* then cycle thru the multi-event registrations next */
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
return;
}
}
}
/* if they didn't want it to go to a default handler, then we are done */
if (chain->nondefault) {
/* if we get here, then we need to cache this event in case they
* register for it later - we cannot lose individual events */
opal_list_append(&mca_pmix_pmix2x_component.cache, &chain->super);
return;
}
/* we are done with the threadshift caddy */
OBJ_RELEASE(cd);
/* finally, pass it to any default handlers */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
return;
}
/* we still have to call their final callback */
if (NULL != chain->final_cbfunc) {
chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata);
/* if we didn't find a match, we still have to call their final callback */
if (NULL != cd->pmixcbfunc) {
cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata);
}
return;
}
@ -385,6 +253,9 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status);
cd = OBJ_NEW(pmix2x_threadshift_t);
cd->id = evhdlr_registration_id;
cd->pmixcbfunc = cbfunc;
cd->cbdata = cbdata;
/* convert the incoming status */
cd->status = pmix2x_convert_rc(status);
@ -409,9 +280,6 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
if (NULL != info) {
cd->info = OBJ_NEW(opal_list_t);
for (n=0; n < ninfo; n++) {
if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
cd->nondefault = true;
}
iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) {
@ -422,17 +290,25 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
opal_list_append(cd->info, &iptr->super);
}
}
/* convert the array of prior results */
if (NULL != results) {
for (n=0; n < nresults; n++) {
iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(results[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &results[n].value))) {
OPAL_ERROR_LOG(rc);
OBJ_RELEASE(iptr);
continue;
}
opal_list_append(&cd->results, &iptr->super);
}
}
/* now push it into the local thread */
event_assign(&cd->ev, opal_pmix_base.evbase,
-1, EV_WRITE, _event_hdlr, cd);
event_active(&cd->ev, EV_WRITE, 1);
/* we don't need any of the data they provided,
* so let them go - also tell them that we will handle
* everything from this point forward */
if (NULL != cbfunc) {
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
}
}
opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank)
@ -536,7 +412,7 @@ pmix_status_t pmix2x_convert_opalrc(int rc)
case OPAL_SUCCESS:
return PMIX_SUCCESS;
default:
return PMIX_ERROR;
return rc;
}
}
@ -620,7 +496,7 @@ int pmix2x_convert_rc(pmix_status_t rc)
case PMIX_SUCCESS:
return OPAL_SUCCESS;
default:
return OPAL_ERROR;
return rc;
}
}
@ -735,6 +611,10 @@ void pmix2x_value_load(pmix_value_t *v,
{
opal_pmix2x_jobid_trkr_t *job;
bool found;
opal_list_t *list;
opal_value_t *val;
pmix_info_t *info;
size_t n;
switch(kv->type) {
case OPAL_UNDEF:
@ -876,8 +756,22 @@ void pmix2x_value_load(pmix_value_t *v,
memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t));
break;
case OPAL_PTR:
v->type = PMIX_POINTER;
v->data.ptr = kv->data.ptr;
/* if someone returned a pointer, it must be to a list of
* opal_value_t's that we need to convert to a pmix_data_array
* of pmix_info_t structures */
list = (opal_list_t*)kv->data.ptr;
v->type = PMIX_DATA_ARRAY;
v->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
v->data.darray->type = PMIX_INFO;
v->data.darray->size = opal_list_get_size(list);
PMIX_INFO_CREATE(info, v->data.darray->size);
v->data.darray->array = info;
n=0;
OPAL_LIST_FOREACH(val, list, opal_value_t) {
(void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&info[n].value, val);
++n;
}
break;
default:
/* silence warnings */
@ -1041,16 +935,27 @@ int pmix2x_value_unload(opal_value_t *kv,
return rc;
}
static void errreg_cbfunc (pmix_status_t status,
size_t errhandler_ref,
void *cbdata)
{
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
op->event->index = errhandler_ref;
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu",
status, (unsigned long)errhandler_ref);
if (NULL != op->evregcbfunc) {
op->evregcbfunc(pmix2x_convert_rc(status), errhandler_ref, op->cbdata);
}
OBJ_RELEASE(op);
}
static void _reg_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_event_chain_t *chain;
opal_pmix2x_single_event_t *sing = NULL;
opal_pmix2x_multi_event_t *multi = NULL;
opal_pmix2x_default_event_t *def = NULL;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
int i;
bool prepend = false;
size_t n;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
@ -1058,116 +963,46 @@ static void _reg_hdlr(int sd, short args, void *cbdata)
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == cd->event_codes) ? "NULL" : "NON-NULL");
if (NULL != cd->info) {
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
if (0 == strcmp(kv->key, OPAL_PMIX_EVENT_ORDER_PREPEND)) {
prepend = true;
break;
}
}
}
op = OBJ_NEW(pmix2x_opcaddy_t);
op->evregcbfunc = cd->cbfunc;
op->cbdata = cd->cbdata;
if (NULL == cd->event_codes) {
/* this is a default handler */
def = OBJ_NEW(opal_pmix2x_default_event_t);
def->handler = cd->evhandler;
def->index = mca_pmix_pmix2x_component.evindex;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO DEFAULT EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_prepend(&mca_pmix_pmix2x_component.default_events, &def->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO DEFAULT EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_append(&mca_pmix_pmix2x_component.default_events, &def->super);
}
} else if (1 == opal_list_get_size(cd->event_codes)) {
/* single handler */
sing = OBJ_NEW(opal_pmix2x_single_event_t);
kv = (opal_value_t*)opal_list_get_first(cd->event_codes);
sing->code = kv->data.integer;
sing->index = mca_pmix_pmix2x_component.evindex;
sing->handler = cd->evhandler;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO SINGLE EVENTS WITH CODE %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
opal_list_prepend(&mca_pmix_pmix2x_component.single_events, &sing->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO SINGLE EVENTS WITH CODE %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
opal_list_append(&mca_pmix_pmix2x_component.single_events, &sing->super);
}
} else {
multi = OBJ_NEW(opal_pmix2x_multi_event_t);
multi->ncodes = opal_list_get_size(cd->event_codes);
multi->codes = (int*)malloc(multi->ncodes * sizeof(int));
i=0;
/* convert the event codes */
if (NULL != cd->event_codes) {
op->ncodes = opal_list_get_size(cd->event_codes);
op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t));
n=0;
OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) {
multi->codes[i] = kv->data.integer;
++i;
}
multi->index = mca_pmix_pmix2x_component.evindex;
multi->handler = cd->evhandler;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO MULTI EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_prepend(&mca_pmix_pmix2x_component.multi_events, &multi->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO MULTI EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_append(&mca_pmix_pmix2x_component.multi_events, &multi->super);
op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer);
}
}
/* release the caller */
if (NULL != cd->cbfunc) {
cd->cbfunc(OPAL_SUCCESS, mca_pmix_pmix2x_component.evindex, cd->cbdata);
}
mca_pmix_pmix2x_component.evindex++;
/* check if any matching notifications have been cached - only nondefault
* events will have been cached*/
if (NULL == def) {
/* check single code registrations */
if (NULL != sing) {
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
if (sing->code == chain->status) {
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
chain->sing = sing;
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
}
} else if (NULL != multi) {
/* check for multi code registrations */
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
chain->multi = multi;
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
}
/* convert the list of info to an array of pmix_info_t */
if (NULL != cd->info) {
op->ninfo = opal_list_get_size(cd->info);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
}
}
}
/* register the event */
op->event = OBJ_NEW(opal_pmix2x_event_t);
op->event->handler = cd->evhandler;
opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super);
PMIx_Register_event_handler(op->pcodes, op->ncodes,
op->info, op->ninfo,
pmix2x_event_hdlr, errreg_cbfunc, op);
OBJ_RELEASE(cd);
return;
}
static void register_handler(opal_list_t *event_codes,
opal_list_t *info,
opal_pmix_notification_fn_t evhandler,
@ -1184,36 +1019,20 @@ static void register_handler(opal_list_t *event_codes,
static void _dereg_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix2x_event_t *event;
/* check the single events first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (cd->handler == sing->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.single_events, &sing->super);
OBJ_RELEASE(sing);
goto release;
}
}
/* check multi events */
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
if (cd->handler == multi->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.multi_events, &multi->super);
OBJ_RELEASE(multi);
goto release;
}
}
/* check default events */
OPAL_LIST_FOREACH(def, &mca_pmix_pmix2x_component.default_events, opal_pmix2x_default_event_t) {
if (cd->handler == def->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.default_events, &def->super);
OBJ_RELEASE(def);
/* look for this event */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (cd->handler == event->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
OBJ_RELEASE(event);
break;
}
}
/* tell the library to deregister this handler */
PMIx_Deregister_event_handler(cd->handler, NULL, NULL);
release:
/* release the caller */
if (NULL != cd->opcbfunc) {
cd->opcbfunc(OPAL_SUCCESS, cd->cbdata);
}
@ -1230,90 +1049,81 @@ static void deregister_handler(size_t evhandler,
return;
}
static void _notify_event(int sd, short args, void *cbdata)
static void notify_complete(pmix_status_t status, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
size_t i;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix2x_event_chain_t *chain;
/* check the single events first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (cd->status == sing->code) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING SINGLE EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
if (NULL != op->opcbfunc) {
op->opcbfunc(pmix2x_convert_rc(status), op->cbdata);
}
/* check multi events */
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
for (i=0; i < multi->ncodes; i++) {
if (cd->status == multi->codes[i]) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING MULTI EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
OBJ_RELEASE(op);
}
static void _notify(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
pmix_proc_t p, *pptr;
pmix_status_t pstatus;
size_t n;
int rc=OPAL_SUCCESS;
pmix_data_range_t prange;
opal_pmix2x_jobid_trkr_t *job, *jptr;
op = OBJ_NEW(pmix2x_opcaddy_t);
/* convert the status */
pstatus = pmix2x_convert_opalrc(cd->status);
/* convert the source */
if (NULL == cd->source) {
pptr = NULL;
} else {
/* look thru our list of jobids and find the
* corresponding nspace */
job = NULL;
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == cd->source->jobid) {
job = jptr;
break;
}
}
if (NULL == job) {
rc = OPAL_ERR_NOT_FOUND;
goto release;
}
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
p.rank = pmix2x_convert_opalrank(cd->source->vpid);
pptr = &p;
}
/* convert the range */
prange = pmix2x_convert_opalrange(cd->range);
/* convert the list of info */
if (NULL != cd->info) {
op->ninfo = opal_list_get_size(cd->info);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
}
}
}
/* check default events */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING DEFAULT EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
/* if we get here, then there are no registered event handlers */
/* ask the library to notify our clients */
pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op);
rc = pmix2x_convert_rc(pstatus);
release:
/* release the caller */
if (NULL != cd->opcbfunc) {
cd->opcbfunc(OPAL_ERR_NOT_FOUND, cd->cbdata);
cd->opcbfunc(rc, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
static int notify_event(int status,
@ -1324,7 +1134,7 @@ static int notify_event(int status,
{
/* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify_event, cbfunc, cbdata);
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata);
return OPAL_SUCCESS;
}
@ -1401,47 +1211,14 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
opal_list_item_t,
NULL, NULL);
OBJ_CLASS_INSTANCE(opal_pmix2x_single_event_t,
opal_list_item_t,
NULL, NULL);
static void mtevcon(opal_pmix2x_multi_event_t *p)
static void evcon(opal_pmix2x_event_t *p)
{
p->codes = NULL;
p->ncodes = 0;
p->handler = NULL;
p->cbdata = NULL;
}
static void mtevdes(opal_pmix2x_multi_event_t *p)
{
if (NULL != p->codes) {
free(p->codes);
}
}
OBJ_CLASS_INSTANCE(opal_pmix2x_multi_event_t,
OBJ_CLASS_INSTANCE(opal_pmix2x_event_t,
opal_list_item_t,
mtevcon, mtevdes);
OBJ_CLASS_INSTANCE(opal_pmix2x_default_event_t,
opal_list_item_t,
NULL, NULL);
static void chcon(opal_pmix2x_event_chain_t *p)
{
p->nondefault = false;
p->info = NULL;
OBJ_CONSTRUCT(&p->results, opal_list_t);
p->sing = NULL;
p->multi = NULL;
p->def = NULL;
p->final_cbfunc = NULL;
p->final_cbdata = NULL;
}
static void chdes(opal_pmix2x_event_chain_t *p)
{
OPAL_LIST_DESTRUCT(&p->results);
}
OBJ_CLASS_INSTANCE(opal_pmix2x_event_chain_t,
opal_list_item_t,
chcon, chdes);
evcon, NULL);
static void opcon(pmix2x_opcaddy_t *p)
{
@ -1455,11 +1232,15 @@ static void opcon(pmix2x_opcaddy_t *p)
p->apps = NULL;
p->sz = 0;
p->active = false;
p->codes = NULL;
p->pcodes = NULL;
p->event = NULL;
p->opcbfunc = NULL;
p->mdxcbfunc = NULL;
p->valcbfunc = NULL;
p->lkcbfunc = NULL;
p->spcbfunc = NULL;
p->evregcbfunc = NULL;
p->cbdata = NULL;
}
static void opdes(pmix2x_opcaddy_t *p)
@ -1476,6 +1257,9 @@ static void opdes(pmix2x_opcaddy_t *p)
if (NULL != p->apps) {
PMIX_APP_FREE(p->apps, p->sz);
}
if (NULL != p->pcodes) {
free(p->pcodes);
}
}
OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t,
opal_object_t,
@ -1513,12 +1297,17 @@ static void tscon(pmix2x_threadshift_t *p)
p->source = NULL;
p->event_codes = NULL;
p->info = NULL;
OBJ_CONSTRUCT(&p->results, opal_list_t);
p->evhandler = NULL;
p->nondefault = false;
p->cbfunc = NULL;
p->opcbfunc = NULL;
p->cbdata = NULL;
}
static void tsdes(pmix2x_threadshift_t *p)
{
OPAL_LIST_DESTRUCT(&p->results);
}
OBJ_CLASS_INSTANCE(pmix2x_threadshift_t,
opal_object_t,
tscon, NULL);
tscon, tsdes);

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
@ -39,9 +39,7 @@ typedef struct {
opal_list_t jobids;
bool native_launch;
size_t evindex;
opal_list_t single_events;
opal_list_t multi_events;
opal_list_t default_events;
opal_list_t events;
int cache_size;
opal_list_t cache;
} mca_pmix_pmix2x_component_t;
@ -61,42 +59,10 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t);
typedef struct {
opal_list_item_t super;
size_t index;
int code;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_single_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_single_event_t);
typedef struct {
opal_list_item_t super;
size_t index;
int *codes;
size_t ncodes;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_multi_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_multi_event_t);
typedef struct {
opal_list_item_t super;
size_t index;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_default_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_default_event_t);
typedef struct {
opal_list_item_t super;
int status;
bool nondefault;
opal_process_name_t source;
pmix_data_range_t range;
opal_list_t *info;
opal_list_t results;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix_op_cbfunc_t final_cbfunc;
void *final_cbdata;
} opal_pmix2x_event_chain_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_event_chain_t);
void *cbdata;
} opal_pmix2x_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_event_t);
typedef struct {
opal_object_t super;
@ -111,11 +77,16 @@ typedef struct {
pmix_app_t *apps;
size_t sz;
volatile bool active;
opal_list_t *codes;
pmix_status_t *pcodes;
size_t ncodes;
opal_pmix2x_event_t *event;
opal_pmix_op_cbfunc_t opcbfunc;
opal_pmix_modex_cbfunc_t mdxcbfunc;
opal_pmix_value_cbfunc_t valcbfunc;
opal_pmix_lookup_cbfunc_t lkcbfunc;
opal_pmix_spawn_cbfunc_t spcbfunc;
opal_pmix_evhandler_reg_cbfunc_t evregcbfunc;
void *cbdata;
} pmix2x_opcaddy_t;
OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t);
@ -152,28 +123,15 @@ typedef struct {
size_t handler;
opal_list_t *event_codes;
opal_list_t *info;
opal_list_t results;
opal_pmix_notification_fn_t evhandler;
opal_pmix_evhandler_reg_cbfunc_t cbfunc;
opal_pmix_op_cbfunc_t opcbfunc;
pmix_event_notification_cbfunc_fn_t pmixcbfunc;
void *cbdata;
} pmix2x_threadshift_t;
OBJ_CLASS_DECLARATION(pmix2x_threadshift_t);
#define OPAL_PMIX_OPCD_THREADSHIFT(i, s, sr, if, nif, fn, cb, cd) \
do { \
pmix2x_opalcaddy_t *_cd; \
_cd = OBJ_NEW(pmix2x_opalcaddy_t); \
_cd->id = (i); \
_cd->status = (s); \
_cd->source = (sr); \
_cd->info = (i); \
_cd->evcbfunc = (cb); \
_cd->cbdata = (cd); \
event_assign(&((_cd)->ev), opal_pmix_base.evbase, \
-1, EV_WRITE, (fn), (_cd)); \
event_active(&((_cd)->ev), EV_WRITE, 1); \
} while(0)
#define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \
do { \
pmix2x_threadshift_t *_cd; \

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@ -36,7 +36,6 @@
static pmix_proc_t my_proc;
static char *dbgvalue=NULL;
static size_t errhdler_ref = 0;
#define PMIX_WAIT_FOR_COMPLETION(a) \
do { \
@ -50,7 +49,9 @@ static void errreg_cbfunc (pmix_status_t status,
size_t errhandler_ref,
void *cbdata)
{
errhdler_ref = errhandler_ref;
opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata;
event->index = errhandler_ref;
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu",
status, (unsigned long)errhandler_ref);
@ -62,6 +63,7 @@ int pmix2x_client_init(void)
pmix_status_t rc;
int dbg;
opal_pmix2x_jobid_trkr_t *job;
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_client init");
@ -98,7 +100,9 @@ int pmix2x_client_init(void)
opal_proc_set_name(&pname);
/* register the default event handler */
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, NULL);
event = OBJ_NEW(opal_pmix2x_event_t);
opal_list_append(&mca_pmix_pmix2x_component.events, &event->super);
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event);
return OPAL_SUCCESS;
}
@ -106,12 +110,16 @@ int pmix2x_client_init(void)
int pmix2x_client_finalize(void)
{
pmix_status_t rc;
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_client finalize");
/* deregister the default event handler */
PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL);
/* deregister all event handlers */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
PMIx_Deregister_event_handler(event->index, NULL, NULL);
}
/* the list will be destructed when the component is finalized */
rc = PMIx_Finalize(NULL, 0);
return pmix2x_convert_rc(rc);

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
@ -36,7 +36,6 @@ const char *opal_pmix_pmix2x_component_version_string =
static int external_open(void);
static int external_close(void);
static int external_component_query(mca_base_module_t **module, int *priority);
static int external_register(void);
/*
@ -66,7 +65,6 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.mca_open_component = external_open,
.mca_close_component = external_close,
.mca_query_component = external_component_query,
.mca_register_component_params = external_register,
},
/* Next the MCA v1.0.0 component meta data */
.base_data = {
@ -77,27 +75,11 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.native_launch = false
};
static int external_register(void)
{
mca_pmix_pmix2x_component.cache_size = 256;
mca_base_component_var_register(&mca_pmix_pmix2x_component.super.base_version,
"cache_size", "Size of the ring buffer cache for events",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&mca_pmix_pmix2x_component.cache_size);
return OPAL_SUCCESS;
}
static int external_open(void)
{
mca_pmix_pmix2x_component.evindex = 0;
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.jobids, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.single_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.multi_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.default_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.cache, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.events, opal_list_t);
return OPAL_SUCCESS;
}
@ -105,10 +87,7 @@ static int external_open(void)
static int external_close(void)
{
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.jobids);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.single_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.multi_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.default_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.cache);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.events);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -88,6 +88,7 @@ BEGIN_C_DECLS
#define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */
#define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier
#define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler
#define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@ -117,6 +118,8 @@ BEGIN_C_DECLS
#define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace
#define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and
// thus cannot be prefixed with "pmix"
#define OPAL_PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon
#define OPAL_PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes
/* size info */
#define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
@ -220,6 +223,8 @@ BEGIN_C_DECLS
#define OPAL_PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform"
#define OPAL_PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
#define OPAL_PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
#define OPAL_PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
#define OPAL_PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
/* log attributes */
#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr

Просмотреть файл

@ -4,7 +4,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2013 Inria. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
@ -200,10 +200,9 @@ char* opal_get_proc_hostname(const opal_proc_t *proc)
}
/* if we don't already have it, then try to get it */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name,
(char**)&(proc->proc_hostname), OPAL_STRING);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name,
(char**)&(proc->proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
OPAL_ERROR_LOG(ret);
return "unknown"; // return something so the caller doesn't segfault
}

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -71,7 +72,7 @@ static int dfs_app_close(void)
static int dfs_app_component_query(mca_base_module_t **module, int *priority)
{
if (ORTE_PROC_IS_APP && orte_staged_execution) {
if (ORTE_PROC_IS_APP) {
/* set our priority high as we are the default for apps */
*priority = 1000;
*module = (mca_base_module_t *)&orte_dfs_app_module;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -89,7 +89,6 @@ static int rte_init(void)
char *envar, *ev1, *ev2;
uint64_t unique_key[2];
char *string_key;
char *rmluri;
opal_value_t *kv;
char *val;
int u32, *u32ptr;
@ -399,19 +398,9 @@ static int rte_init(void)
orte_process_info.max_procs = orte_process_info.num_procs;
}
/*** PUSH DATA FOR OTHERS TO FIND ***/
/* push our RML URI in case others need to talk directly to us */
rmluri = orte_rml.get_contact_info();
/* push it out for others to use */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "pmix put uri";
goto error;
}
free(rmluri);
/* push our hostname so others can find us, if they need to */
/* push our hostname so others can find us, if they need to - the
* native PMIx component will ignore this request as the hostname
* is provided by the system */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "db store hostname";

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Research Organization for Information Science
@ -336,13 +336,6 @@ static int rte_init(void)
return rc;
}
/* push our hostname so others can find us, if they need to */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "db store hostname";
goto error;
}
return ORTE_SUCCESS;
error:

Просмотреть файл

@ -1341,9 +1341,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, "1");
}
if (orte_map_reduce) {
opal_argv_append(argc, argv, "--mapreduce");
}
if (orte_map_stddiag_to_stderr) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr");

Просмотреть файл

@ -1101,11 +1101,6 @@ static int setup_child(orte_job_t *jdata,
opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, &app->env);
}
/* if we are using staged execution, tell it */
if (orte_staged_execution) {
opal_setenv("OMPI_MCA_orte_staged_execution", "1", true, &app->env);
}
/* if the proc isn't going to forward IO, then we need to flag that
* it has "completed" iof termination as otherwise it will never fire
*/

Просмотреть файл

@ -126,7 +126,6 @@ static struct {
int uri_pipe;
int singleton_died_pipe;
bool abort;
bool mapreduce;
bool tree_spawn;
char *hnp_topo_sig;
bool test_suicide;
@ -217,10 +216,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
&orted_globals.hnp_topo_sig, OPAL_CMD_LINE_TYPE_STRING,
"Topology signature of HNP" },
{ NULL, '\0', "mapreduce", "mapreduce", 0,
&orted_globals.mapreduce, OPAL_CMD_LINE_TYPE_BOOL,
"Whether to report process bindings to stderr" },
/* End of list */
{ NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@ -335,11 +330,6 @@ int orte_daemon(int argc, char *argv[])
free(tmp_env_var);
#endif
/* if mapreduce set, flag it */
if (orted_globals.mapreduce) {
orte_map_reduce = true;
}
/* detach from controlling terminal
* otherwise, remain attached so output can get to us
*/

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -137,6 +137,9 @@ static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jar
static int parse_appfile(orte_job_t *jdata, char *filename, char ***env);
static void orte_timeout_wakeup(int sd, short args, void *cbdata);
static void orte_profile_wakeup(int sd, short args, void *cbdata);
static void profile_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata);
static void launch_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata);
@ -896,20 +899,6 @@ int orte_submit_job(char *argv[], int *index,
}
}
/* check for debugger test envars and forward them if necessary */
if (NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
char *evar;
evar = getenv("ORTE_TEST_DEBUGGER_SLEEP");
for (i=0; i < (orte_app_idx_t)jdata->num_apps; i++) {
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
opal_setenv("ORTE_TEST_DEBUGGER_ATTACH", "1", true, &app->env);
if (NULL != evar) {
opal_setenv("ORTE_TEST_DEBUGGER_SLEEP", evar, true, &app->env);
}
}
}
}
/* check for suicide test directives */
if (NULL != getenv("ORTE_TEST_HNP_SUICIDE") ||
NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) {
@ -956,6 +945,8 @@ int orte_submit_job(char *argv[], int *index,
ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE);
//goto DONE;
}
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
ORTE_RML_PERSISTENT, profile_recv, NULL);
orte_memprofile_timeout->tv.tv_sec = timeout_seconds;
orte_memprofile_timeout->tv.tv_usec = 0;
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
@ -2212,10 +2203,9 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
static bool mpir_breakpoint_fired = false;
static void _send_notification(void)
static void _send_notification(int status)
{
opal_buffer_t buf;
int status = OPAL_ERR_DEBUGGER_RELEASE;
orte_grpcomm_signature_t sig;
int rc;
opal_value_t kv, *kvptr;
@ -2448,7 +2438,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
"%s NOTIFYING DEBUGGER RELEASE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* notify all procs that the debugger is ready */
_send_notification();
_send_notification(OPAL_ERR_DEBUGGER_RELEASE);
}
}
return;
@ -2547,7 +2537,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
"%s NOTIFYING DEBUGGER RELEASE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* notify all procs that the debugger is ready */
_send_notification();
_send_notification(OPAL_ERR_DEBUGGER_RELEASE);
} else if (!orte_debugger_test_attach) {
/* if I am launching debugger daemons, then I need to do so now
* that the job has been started and I know which nodes have
@ -3133,6 +3123,16 @@ void orte_timeout_wakeup(int sd, short args, void *cbdata)
static int nreports = 0;
static orte_timer_t profile_timer;
static int nchecks = 0;
static void profile_timeout(int sd, short args, void *cbdata)
{
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
}
static void profile_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag,
@ -3167,26 +3167,33 @@ static void profile_recv(int status, orte_process_name_t* sender,
done:
--nreports;
if (nreports == 0) {
++nchecks;
/* cancel the timeout */
OBJ_DESTRUCT(&profile_timer);
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
/* notify to release */
_send_notification(12345);
/* if this was the first measurement, then we need to
* let the probe move along */
if (2 > nchecks) {
/* reset the event */
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
orte_profile_wakeup, NULL);
opal_event_set_priority(orte_memprofile_timeout->ev, ORTE_ERROR_PRI);
opal_event_evtimer_add(orte_memprofile_timeout->ev, &orte_memprofile_timeout->tv);
/* reset the timer */
OBJ_CONSTRUCT(&profile_timer, orte_timer_t);
opal_event_evtimer_set(orte_event_base,
profile_timer.ev, profile_timeout, NULL);
opal_event_set_priority(profile_timer.ev, ORTE_ERROR_PRI);
profile_timer.tv.tv_sec = 30;
opal_event_evtimer_add(profile_timer.ev, &profile_timer.tv);
return;
}
}
}
static void profile_timeout(int sd, short args, void *cbdata)
{
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
}
void orte_profile_wakeup(int sd, short args, void *cbdata)
{
orte_job_t *jdata = (orte_job_t*)cbdata;
orte_job_t *dmns;
orte_proc_t *dmn;
int i;
@ -3202,8 +3209,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
/* set the recv */
nreports = 1; // always get a report from ourselves
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
ORTE_RML_PERSISTENT, profile_recv, NULL);
/* setup the buffer */
buffer = OBJ_NEW(opal_buffer_t);
@ -3213,13 +3218,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
OBJ_RELEASE(buffer);
goto giveup;
}
/* pack the jobid in question */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &jdata->jobid, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
goto giveup;
}
/* goes to just the first daemon beyond ourselves - no need to get it from everyone */
dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (NULL != (dmn = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, 1))) {

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -35,6 +35,7 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/dss/dss.h"
#include "opal/mca/pstat/pstat.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/rmaps_types.h"
@ -449,13 +450,18 @@ static void _query(int sd, short args, void *cbdata)
opal_pmix_query_t *q;
opal_value_t *kv;
orte_job_t *jdata;
int rc;
opal_list_t *results;
orte_proc_t *proct;
int rc, i, num_replies;
opal_list_t *results, targets, *array;
size_t n;
uint32_t key;
void *nptr;
char **nspaces=NULL, nspace[512];
char **ans = NULL;
bool local_only;
orte_namelist_t *nm;
opal_pstats_t pstat;
float pss;
opal_output_verbose(2, orte_pmix_server_globals.output,
"%s processing query",
@ -522,6 +528,84 @@ static void _query(int sd, short args, void *cbdata)
opal_list_append(results, &kv->super);
opal_argv_free(ans);
ans = NULL;
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_QUERY_MEMORY_USAGE)) {
/* check the qualifiers to find the procs they want to
* know about - if qualifiers are NULL, then get it for
* the daemons + all active jobs */
if (0 == opal_list_get_size(&q->qualifiers)) {
/* create a request tracker */
/* xcast a request for all memory usage */
/* return success - the callback will be done
* once we get the results */
return;
}
/* scan the qualifiers */
OPAL_LIST_FOREACH(kv, &q->qualifiers, opal_value_t) {
if (0 == strcmp(kv->key, OPAL_PMIX_QUERY_LOCAL_ONLY)) {
if (OPAL_UNDEF == kv->type || kv->data.flag) {
local_only = true;
} else {
local_only = false;
}
} else if (0 == strcmp(kv->key, OPAL_PMIX_PROCID)) {
/* save this directive on our list of targets */
nm = OBJ_NEW(orte_namelist_t);
}
}
/* if they have asked for only our local procs or daemon,
* then we can just get the data directly */
if (local_only) {
if (0 == opal_list_get_size(&targets)) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_QUERY_MEMORY_USAGE);
kv->type = OPAL_PTR;
array = OBJ_NEW(opal_list_t);
kv->data.ptr = array;
opal_list_append(results, &kv->super);
/* collect my memory usage */
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
opal_pstat.query(orte_process_info.pid, &pstat, NULL);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_DAEMON_MEMORY);
kv->type = OPAL_FLOAT;
kv->data.fval = pstat.pss;
opal_list_append(array, &kv->super);
OBJ_DESTRUCT(&pstat);
/* collect the memory usage of all my children */
pss = 0.0;
num_replies = 0;
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
/* collect the stats on this proc */
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
if (OPAL_SUCCESS == opal_pstat.query(proct->pid, &pstat, NULL)) {
pss += pstat.pss;
++num_replies;
}
OBJ_DESTRUCT(&pstat);
}
}
/* compute the average value */
if (0 < num_replies) {
pss /= (float)num_replies;
}
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_CLIENT_AVG_MEMORY);
kv->type = OPAL_FLOAT;
kv->data.fval = pss;
opal_list_append(array, &kv->super);
} else {
}
} else {
/* if they want it for remote procs, see who is hosting them
* and ask directly for the info - if rank=wildcard, then
* we need to xcast the request and collect the results */
}
}
}
}

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
@ -407,6 +407,14 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
kv->type = OPAL_UINT32;
kv->data.uint32 = pptr->node->index;
opal_list_append(pmap, &kv->super);
if (map->num_nodes < orte_hostname_cutoff) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_HOSTNAME);
kv->type = OPAL_STRING;
kv->data.string = strdup(pptr->node->name);
opal_list_append(pmap, &kv->super);
}
}
}

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -86,6 +86,7 @@ bool orte_have_fqdn_allocation = false;
bool orte_show_resolved_nodenames = false;
bool orte_retain_aliases = false;
int orte_use_hostname_alias = -1;
int orte_hostname_cutoff = 1000;
int orted_debug_failure = -1;
int orted_debug_failure_delay = -1;
@ -183,10 +184,6 @@ int orte_stat_history_size = -1;
/* envars to forward */
char **orte_forwarded_envars = NULL;
/* map-reduce mode */
bool orte_map_reduce = false;
bool orte_staged_execution = false;
/* map stddiag output to stderr so it isn't forwarded to mpirun */
bool orte_map_stddiag_to_stderr = false;
@ -196,9 +193,6 @@ int orte_max_vm_size = -1;
/* user debugger */
char *orte_base_user_debugger = NULL;
/* modex cutoff */
uint32_t orte_direct_modex_cutoff = UINT32_MAX;
int orte_debug_output = -1;
bool orte_debug_daemons_flag = false;
bool orte_xml_output = false;

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -466,6 +466,7 @@ ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
ORTE_DECLSPEC extern bool orte_retain_aliases;
ORTE_DECLSPEC extern int orte_use_hostname_alias;
ORTE_DECLSPEC extern int orte_hostname_cutoff;
/* debug flags */
ORTE_DECLSPEC extern int orted_debug_failure;
@ -564,10 +565,6 @@ ORTE_DECLSPEC extern int orte_stat_history_size;
/* envars to forward */
ORTE_DECLSPEC extern char **orte_forwarded_envars;
/* map-reduce mode */
ORTE_DECLSPEC extern bool orte_map_reduce;
ORTE_DECLSPEC extern bool orte_staged_execution;
/* map stddiag output to stderr so it isn't forwarded to mpirun */
ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr;
@ -582,9 +579,6 @@ ORTE_DECLSPEC extern char *orte_base_user_debugger;
*/
ORTE_DECLSPEC extern char *orte_daemon_cores;
/* cutoff for collective modex */
ORTE_DECLSPEC extern uint32_t orte_direct_modex_cutoff;
END_C_DECLS
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -429,6 +429,13 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_retain_aliases);
orte_hostname_cutoff = 1000;
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_cutoff",
"Pass hostnames to all procs when #nodes is less than cutoff [default:1000]",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY,
&orte_hostname_cutoff);
/* which alias to use in MPIR_proctab */
orte_use_hostname_alias = 1;
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index",
@ -659,13 +666,6 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_allowed_exit_without_sync);
orte_staged_execution = false;
(void) mca_base_var_register ("orte", "orte", NULL, "staged_execution",
"Staged execution is being used",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_staged_execution);
orte_report_child_jobs_separately = false;
(void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately",
"Return the exit status of the primary job only",
@ -754,17 +754,6 @@ int orte_register_params(void)
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
&orte_daemon_cores);
/* cutoff for full modex */
orte_direct_modex_cutoff = UINT32_MAX;
id = mca_base_var_register ("orte", "orte", NULL, "direct_modex_cutoff",
"If the number of processes in the application exceeds the provided value,"
"modex will be done upon demand [default: UINT32_MAX]",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_direct_modex_cutoff);
/* register a synonym for old name */
mca_base_var_register_synonym (id, "ompi", "ompi", "hostname", "cutoff", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
/* get the conduit params */
orte_coll_transport = "fabric,ethernet";
(void) mca_base_var_register("orte", "orte", "coll", "transports",