1
1

Merge pull request #2654 from rhc54/topic/memory

Remove stale global variables
Этот коммит содержится в:
Ralph Castain 2017-01-02 15:09:09 -08:00 коммит произвёл GitHub
родитель 5f68d655d6 9eab9a1ed3
Коммит 218aed144d
25 изменённых файлов: 554 добавлений и 668 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -111,6 +111,7 @@ contrib/platform/intel/bend/*orcm*
contrib/scaling/orte_no_op
contrib/scaling/mpi_no_op
contrib/scaling/mpi_barrier
contrib/scaling/mpi_memprobe
examples/hello_c
examples/hello_cxx

Просмотреть файл

@ -1,4 +1,4 @@
PROGS = orte_no_op mpi_no_op
PROGS = orte_no_op mpi_no_op mpi_memprobe
all: $(PROGS)
@ -10,5 +10,8 @@ orte_no_op:
mpi_no_op:
mpicc -o mpi_no_op mpi_no_op.c
mpi_memprobe:
mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal
clean:
rm -f $(PROGS) *~

98
contrib/scaling/mpi_memprobe.c Обычный файл
Просмотреть файл

@ -0,0 +1,98 @@
/* -*- C -*-
*
* $HEADER$
*
* The most basic of MPI applications
*/
#include "orte_config.h"
#include <stdio.h>
#include "mpi.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/runtime/runtime.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
static volatile int active;
static volatile bool wait_for_release = true;
#define MEMPROBE_RELEASE 12345
static void _release_fn(int status,
const opal_process_name_t *source,
opal_list_t *info, opal_list_t *results,
opal_pmix_notification_complete_fn_t cbfunc,
void *cbdata)
{
/* must let the notifier know we are done */
if (NULL != cbfunc) {
cbfunc(0, NULL, NULL, NULL, cbdata);
}
/* flag that the debugger is complete so we can exit */
wait_for_release = false;
}
static void _register_fn(int status,
size_t evhandler_ref,
void *cbdata)
{
volatile int *active = (volatile int*)cbdata;
if (0 != status) {
fprintf(stderr, "Client EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
status, (unsigned long)evhandler_ref);
}
*active = status;
}
int main(int argc, char* argv[])
{
int rank, size;
opal_list_t *codes;
opal_value_t *kv;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (0 == rank) {
fprintf(stderr, "Sampling memory usage after MPI_Init\n");
}
codes = OBJ_NEW(opal_list_t);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup("errorcode");
kv->type = OPAL_INT;
kv->data.integer = MEMPROBE_RELEASE;
opal_list_append(codes, &kv->super);
active = -1;
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active);
while (-1 == active) {
usleep(10);
}
/* now wait for notification */
while (wait_for_release) {
usleep(10);
}
wait_for_release = true;
/* perform a barrier so some communication will occur, thus
* requiring exchange of endpoint info */
MPI_Barrier(MPI_COMM_WORLD);
if (0 == rank) {
fprintf(stderr, "\n\nSampling memory usage after MPI_Barrier\n");
}
/* wait again while memory is sampled */
while (wait_for_release) {
usleep(10);
}
MPI_Finalize();
return 0;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -157,6 +157,7 @@ typedef uint32_t pmix_rank_t;
#define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */
#define PMIX_PROCID "pmix.procid" // (pmix_proc_t) process identifier
#define PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@ -282,6 +283,8 @@ typedef uint32_t pmix_rank_t;
#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform
#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
/* log attributes */
#define PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -553,11 +553,18 @@ static void _putfn(int sd, short args, void *cbdata)
{
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
pmix_status_t rc;
pmix_kval_t *kv;
pmix_kval_t *kv = NULL;
pmix_nspace_t *ns;
uint8_t *tmp;
size_t len;
/* no need to push info that starts with "pmix" as that is
* info we would have been provided at startup */
if (0 == strncmp(cb->key, "pmix", 4)) {
rc = PMIX_SUCCESS;
goto done;
}
/* setup to xfer the data */
kv = PMIX_NEW(pmix_kval_t);
kv->key = strdup(cb->key); // need to copy as the input belongs to the user
@ -622,7 +629,9 @@ static void _putfn(int sd, short args, void *cbdata)
}
done:
PMIX_RELEASE(kv); // maintain accounting
if (NULL != kv) {
PMIX_RELEASE(kv); // maintain accounting
}
cb->pstatus = rc;
cb->active = false;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -763,7 +763,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
* us to attempt to retrieve it from the server */
for (n=0; n < cb->ninfo; n++) {
if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) &&
cb->info[n].value.data.flag) {
(PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) {
/* they don't want us to try and retrieve it */
pmix_output_verbose(2, pmix_globals.debug_output,
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -356,6 +356,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
sing->code = cd->codes[0];
index = pmix_globals.events.nhdlrs;
sing->index = index;
sing->evhdlr = cd->evhdlr;
++pmix_globals.events.nhdlrs;
sing->cbobject = cbobject;
rc = _add_hdlr(&pmix_globals.events.single_events, &sing->super,
@ -365,17 +366,17 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
PMIX_ERR_WOULD_BLOCK != rc) {
/* unable to register */
--pmix_globals.events.nhdlrs;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
goto ack;
}
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */
PMIX_RELEASE(cd);
return;
}
goto ack;
}
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */
PMIX_RELEASE(cd);
return;
}
goto ack;
}
/* must be a multi-code registration */
multi = PMIX_NEW(pmix_multi_event_t);
@ -387,6 +388,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
memcpy(multi->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t));
index = pmix_globals.events.nhdlrs;
multi->index = index;
multi->evhdlr = cd->evhdlr;
++pmix_globals.events.nhdlrs;
multi->cbobject = cbobject;
rc = _add_hdlr(&pmix_globals.events.multi_events, &multi->super,
@ -396,9 +398,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
PMIX_ERR_WOULD_BLOCK != rc) {
/* unable to register */
--pmix_globals.events.nhdlrs;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
goto ack;
rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX;
goto ack;
}
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@ -145,121 +145,52 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace)
opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super);
}
static void completion_handler(int status, void *cbdata)
static void event_hdlr_complete(pmix_status_t status, void *cbdata)
{
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)cbdata;
if (NULL != chain->info) {
OPAL_LIST_RELEASE(chain->info);
}
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
OBJ_RELEASE(op);
}
static void progress_local_event_hdlr(int status,
opal_list_t *results,
opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
void *notification_cbdata)
static void return_local_event_hdlr(int status, opal_list_t *results,
opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
void *notification_cbdata)
{
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)notification_cbdata;
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)notification_cbdata;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
pmix_status_t pstatus;
size_t n;
opal_list_item_t *nxt;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
/* if the caller indicates that the chain is completed, then stop here */
if (OPAL_ERR_HANDLERS_COMPLETE == status) {
goto complete;
}
if (NULL != cd->pmixcbfunc) {
op = OBJ_NEW(pmix2x_opcaddy_t);
/* if any results were provided, then add them here */
if (NULL != results) {
while (NULL != (nxt = opal_list_remove_first(results))) {
opal_list_append(results, nxt);
}
}
/* see if we need to continue, starting with the single code events */
if (NULL != chain->sing) {
/* the last handler was for a single code - see if there are
* any others that match this event */
while (opal_list_get_end(&mca_pmix_pmix2x_component.single_events) != (nxt = opal_list_get_next(&chain->sing->super))) {
sing = (opal_pmix2x_single_event_t*)nxt;
if (sing->code == chain->status) {
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING SINGLE EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
goto complete;
}
}
/* if we get here, then there are no more single code
* events that match */
chain->sing = NULL;
/* pickup the beginning of the multi-code event list */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.multi_events)) {
chain->multi = (opal_pmix2x_multi_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.multi_events);
}
}
/* see if we need to continue with the multi code events */
if (NULL != chain->multi) {
while (opal_list_get_end(&mca_pmix_pmix2x_component.multi_events) != (nxt = opal_list_get_next(&chain->multi->super))) {
multi = (opal_pmix2x_multi_event_t*)nxt;
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
goto complete;
if (NULL != results) {
/* convert the list of results to an array of info */
op->ninfo = opal_list_get_size(results);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
}
}
}
/* if we get here, then there are no more multi-mode
* events that match */
chain->multi = NULL;
/* pickup the beginning of the default event list */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
chain->def = (opal_pmix2x_default_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.default_events);
}
/* convert the status */
pstatus = pmix2x_convert_opalrc(status);
/* call the library's callback function */
cd->pmixcbfunc(pstatus, op->info, op->ninfo, event_hdlr_complete, op, cd->cbdata);
}
/* if they didn't want it to go to a default handler, then we are done */
if (chain->nondefault) {
goto complete;
/* release the threadshift object */
if (NULL != cd->info) {
OPAL_LIST_RELEASE(cd->info);
}
OBJ_RELEASE(cd);
if (NULL != chain->def) {
if (opal_list_get_end(&mca_pmix_pmix2x_component.default_events) != (nxt = opal_list_get_next(&chain->def->super))) {
def = (opal_pmix2x_default_event_t*)nxt;
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
}
}
complete:
/* we still have to call their final callback */
if (NULL != chain->final_cbfunc) {
chain->final_cbfunc(OPAL_SUCCESS, chain->final_cbdata);
}
/* maintain acctng */
OBJ_RELEASE(chain);
/* let the caller know that we are done with their callback */
/* release the caller */
if (NULL != cbfunc) {
cbfunc(OPAL_SUCCESS, thiscbdata);
}
@ -268,92 +199,29 @@ static void progress_local_event_hdlr(int status,
static void _event_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
size_t n;
opal_pmix2x_event_chain_t *chain;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix2x_event_t *event;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status);
"%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status);
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
/* point it at our final callback */
chain->final_cbfunc = completion_handler;
chain->final_cbdata = chain;
/* carry across provided info */
chain->status = cd->status;
chain->source = cd->pname;
chain->info = cd->info;
chain->nondefault = cd->nondefault;
/* cycle thru the single-event registrations first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (sing->code == chain->status) {
/* cycle thru the registrations */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (cd->id == event->index) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->sing = sing;
* callback function to our callback function */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING SINGLE EVHDLR",
"%s _EVENT_HDLR CALLING EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
event->handler(cd->status, &cd->pname,
cd->info, &cd->results,
return_local_event_hdlr, (void*)cd);
return;
}
}
/* if we didn't find any match in the single-event registrations,
* then cycle thru the multi-event registrations next */
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
return;
}
}
}
/* if they didn't want it to go to a default handler, then we are done */
if (chain->nondefault) {
/* if we get here, then we need to cache this event in case they
* register for it later - we cannot lose individual events */
opal_list_append(&mca_pmix_pmix2x_component.cache, &chain->super);
return;
}
/* we are done with the threadshift caddy */
OBJ_RELEASE(cd);
/* finally, pass it to any default handlers */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
return;
}
/* we still have to call their final callback */
if (NULL != chain->final_cbfunc) {
chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata);
/* if we didn't find a match, we still have to call their final callback */
if (NULL != cd->pmixcbfunc) {
cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata);
}
return;
}
@ -385,6 +253,9 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status);
cd = OBJ_NEW(pmix2x_threadshift_t);
cd->id = evhdlr_registration_id;
cd->pmixcbfunc = cbfunc;
cd->cbdata = cbdata;
/* convert the incoming status */
cd->status = pmix2x_convert_rc(status);
@ -409,9 +280,6 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
if (NULL != info) {
cd->info = OBJ_NEW(opal_list_t);
for (n=0; n < ninfo; n++) {
if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
cd->nondefault = true;
}
iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) {
@ -422,17 +290,25 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
opal_list_append(cd->info, &iptr->super);
}
}
/* convert the array of prior results */
if (NULL != results) {
for (n=0; n < nresults; n++) {
iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(results[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &results[n].value))) {
OPAL_ERROR_LOG(rc);
OBJ_RELEASE(iptr);
continue;
}
opal_list_append(&cd->results, &iptr->super);
}
}
/* now push it into the local thread */
event_assign(&cd->ev, opal_pmix_base.evbase,
-1, EV_WRITE, _event_hdlr, cd);
event_active(&cd->ev, EV_WRITE, 1);
/* we don't need any of the data they provided,
* so let them go - also tell them that we will handle
* everything from this point forward */
if (NULL != cbfunc) {
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
}
}
opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank)
@ -536,7 +412,7 @@ pmix_status_t pmix2x_convert_opalrc(int rc)
case OPAL_SUCCESS:
return PMIX_SUCCESS;
default:
return PMIX_ERROR;
return rc;
}
}
@ -620,7 +496,7 @@ int pmix2x_convert_rc(pmix_status_t rc)
case PMIX_SUCCESS:
return OPAL_SUCCESS;
default:
return OPAL_ERROR;
return rc;
}
}
@ -735,6 +611,10 @@ void pmix2x_value_load(pmix_value_t *v,
{
opal_pmix2x_jobid_trkr_t *job;
bool found;
opal_list_t *list;
opal_value_t *val;
pmix_info_t *info;
size_t n;
switch(kv->type) {
case OPAL_UNDEF:
@ -876,8 +756,22 @@ void pmix2x_value_load(pmix_value_t *v,
memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t));
break;
case OPAL_PTR:
v->type = PMIX_POINTER;
v->data.ptr = kv->data.ptr;
/* if someone returned a pointer, it must be to a list of
* opal_value_t's that we need to convert to a pmix_data_array
* of pmix_info_t structures */
list = (opal_list_t*)kv->data.ptr;
v->type = PMIX_DATA_ARRAY;
v->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
v->data.darray->type = PMIX_INFO;
v->data.darray->size = opal_list_get_size(list);
PMIX_INFO_CREATE(info, v->data.darray->size);
v->data.darray->array = info;
n=0;
OPAL_LIST_FOREACH(val, list, opal_value_t) {
(void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&info[n].value, val);
++n;
}
break;
default:
/* silence warnings */
@ -1041,16 +935,27 @@ int pmix2x_value_unload(opal_value_t *kv,
return rc;
}
static void errreg_cbfunc (pmix_status_t status,
size_t errhandler_ref,
void *cbdata)
{
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
op->event->index = errhandler_ref;
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu",
status, (unsigned long)errhandler_ref);
if (NULL != op->evregcbfunc) {
op->evregcbfunc(pmix2x_convert_rc(status), errhandler_ref, op->cbdata);
}
OBJ_RELEASE(op);
}
static void _reg_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_event_chain_t *chain;
opal_pmix2x_single_event_t *sing = NULL;
opal_pmix2x_multi_event_t *multi = NULL;
opal_pmix2x_default_event_t *def = NULL;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
int i;
bool prepend = false;
size_t n;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
@ -1058,116 +963,46 @@ static void _reg_hdlr(int sd, short args, void *cbdata)
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == cd->event_codes) ? "NULL" : "NON-NULL");
if (NULL != cd->info) {
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
if (0 == strcmp(kv->key, OPAL_PMIX_EVENT_ORDER_PREPEND)) {
prepend = true;
break;
}
}
}
op = OBJ_NEW(pmix2x_opcaddy_t);
op->evregcbfunc = cd->cbfunc;
op->cbdata = cd->cbdata;
if (NULL == cd->event_codes) {
/* this is a default handler */
def = OBJ_NEW(opal_pmix2x_default_event_t);
def->handler = cd->evhandler;
def->index = mca_pmix_pmix2x_component.evindex;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO DEFAULT EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_prepend(&mca_pmix_pmix2x_component.default_events, &def->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO DEFAULT EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_append(&mca_pmix_pmix2x_component.default_events, &def->super);
}
} else if (1 == opal_list_get_size(cd->event_codes)) {
/* single handler */
sing = OBJ_NEW(opal_pmix2x_single_event_t);
kv = (opal_value_t*)opal_list_get_first(cd->event_codes);
sing->code = kv->data.integer;
sing->index = mca_pmix_pmix2x_component.evindex;
sing->handler = cd->evhandler;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO SINGLE EVENTS WITH CODE %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
opal_list_prepend(&mca_pmix_pmix2x_component.single_events, &sing->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO SINGLE EVENTS WITH CODE %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
opal_list_append(&mca_pmix_pmix2x_component.single_events, &sing->super);
}
} else {
multi = OBJ_NEW(opal_pmix2x_multi_event_t);
multi->ncodes = opal_list_get_size(cd->event_codes);
multi->codes = (int*)malloc(multi->ncodes * sizeof(int));
i=0;
/* convert the event codes */
if (NULL != cd->event_codes) {
op->ncodes = opal_list_get_size(cd->event_codes);
op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t));
n=0;
OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) {
multi->codes[i] = kv->data.integer;
++i;
}
multi->index = mca_pmix_pmix2x_component.evindex;
multi->handler = cd->evhandler;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO MULTI EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_prepend(&mca_pmix_pmix2x_component.multi_events, &multi->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO MULTI EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_append(&mca_pmix_pmix2x_component.multi_events, &multi->super);
op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer);
}
}
/* release the caller */
if (NULL != cd->cbfunc) {
cd->cbfunc(OPAL_SUCCESS, mca_pmix_pmix2x_component.evindex, cd->cbdata);
}
mca_pmix_pmix2x_component.evindex++;
/* check if any matching notifications have been cached - only nondefault
* events will have been cached*/
if (NULL == def) {
/* check single code registrations */
if (NULL != sing) {
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
if (sing->code == chain->status) {
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
chain->sing = sing;
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
}
} else if (NULL != multi) {
/* check for multi code registrations */
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
chain->multi = multi;
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
}
/* convert the list of info to an array of pmix_info_t */
if (NULL != cd->info) {
op->ninfo = opal_list_get_size(cd->info);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
}
}
}
/* register the event */
op->event = OBJ_NEW(opal_pmix2x_event_t);
op->event->handler = cd->evhandler;
opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super);
PMIx_Register_event_handler(op->pcodes, op->ncodes,
op->info, op->ninfo,
pmix2x_event_hdlr, errreg_cbfunc, op);
OBJ_RELEASE(cd);
return;
}
static void register_handler(opal_list_t *event_codes,
opal_list_t *info,
opal_pmix_notification_fn_t evhandler,
@ -1184,36 +1019,20 @@ static void register_handler(opal_list_t *event_codes,
static void _dereg_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix2x_event_t *event;
/* check the single events first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (cd->handler == sing->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.single_events, &sing->super);
OBJ_RELEASE(sing);
goto release;
}
}
/* check multi events */
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
if (cd->handler == multi->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.multi_events, &multi->super);
OBJ_RELEASE(multi);
goto release;
}
}
/* check default events */
OPAL_LIST_FOREACH(def, &mca_pmix_pmix2x_component.default_events, opal_pmix2x_default_event_t) {
if (cd->handler == def->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.default_events, &def->super);
OBJ_RELEASE(def);
/* look for this event */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (cd->handler == event->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
OBJ_RELEASE(event);
break;
}
}
/* tell the library to deregister this handler */
PMIx_Deregister_event_handler(cd->handler, NULL, NULL);
release:
/* release the caller */
if (NULL != cd->opcbfunc) {
cd->opcbfunc(OPAL_SUCCESS, cd->cbdata);
}
@ -1230,90 +1049,81 @@ static void deregister_handler(size_t evhandler,
return;
}
static void _notify_event(int sd, short args, void *cbdata)
static void notify_complete(pmix_status_t status, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
size_t i;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix2x_event_chain_t *chain;
/* check the single events first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (cd->status == sing->code) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING SINGLE EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
if (NULL != op->opcbfunc) {
op->opcbfunc(pmix2x_convert_rc(status), op->cbdata);
}
/* check multi events */
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
for (i=0; i < multi->ncodes; i++) {
if (cd->status == multi->codes[i]) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING MULTI EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
OBJ_RELEASE(op);
}
static void _notify(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
pmix_proc_t p, *pptr;
pmix_status_t pstatus;
size_t n;
int rc=OPAL_SUCCESS;
pmix_data_range_t prange;
opal_pmix2x_jobid_trkr_t *job, *jptr;
op = OBJ_NEW(pmix2x_opcaddy_t);
/* convert the status */
pstatus = pmix2x_convert_opalrc(cd->status);
/* convert the source */
if (NULL == cd->source) {
pptr = NULL;
} else {
/* look thru our list of jobids and find the
* corresponding nspace */
job = NULL;
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == cd->source->jobid) {
job = jptr;
break;
}
}
if (NULL == job) {
rc = OPAL_ERR_NOT_FOUND;
goto release;
}
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
p.rank = pmix2x_convert_opalrank(cd->source->vpid);
pptr = &p;
}
/* convert the range */
prange = pmix2x_convert_opalrange(cd->range);
/* convert the list of info */
if (NULL != cd->info) {
op->ninfo = opal_list_get_size(cd->info);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
}
}
}
/* check default events */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING DEFAULT EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
/* if we get here, then there are no registered event handlers */
/* ask the library to notify our clients */
pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op);
rc = pmix2x_convert_rc(pstatus);
release:
/* release the caller */
if (NULL != cd->opcbfunc) {
cd->opcbfunc(OPAL_ERR_NOT_FOUND, cd->cbdata);
cd->opcbfunc(rc, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
static int notify_event(int status,
@ -1324,7 +1134,7 @@ static int notify_event(int status,
{
/* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify_event, cbfunc, cbdata);
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata);
return OPAL_SUCCESS;
}
@ -1401,47 +1211,14 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
opal_list_item_t,
NULL, NULL);
OBJ_CLASS_INSTANCE(opal_pmix2x_single_event_t,
opal_list_item_t,
NULL, NULL);
static void mtevcon(opal_pmix2x_multi_event_t *p)
static void evcon(opal_pmix2x_event_t *p)
{
p->codes = NULL;
p->ncodes = 0;
p->handler = NULL;
p->cbdata = NULL;
}
static void mtevdes(opal_pmix2x_multi_event_t *p)
{
if (NULL != p->codes) {
free(p->codes);
}
}
OBJ_CLASS_INSTANCE(opal_pmix2x_multi_event_t,
OBJ_CLASS_INSTANCE(opal_pmix2x_event_t,
opal_list_item_t,
mtevcon, mtevdes);
OBJ_CLASS_INSTANCE(opal_pmix2x_default_event_t,
opal_list_item_t,
NULL, NULL);
static void chcon(opal_pmix2x_event_chain_t *p)
{
p->nondefault = false;
p->info = NULL;
OBJ_CONSTRUCT(&p->results, opal_list_t);
p->sing = NULL;
p->multi = NULL;
p->def = NULL;
p->final_cbfunc = NULL;
p->final_cbdata = NULL;
}
static void chdes(opal_pmix2x_event_chain_t *p)
{
OPAL_LIST_DESTRUCT(&p->results);
}
OBJ_CLASS_INSTANCE(opal_pmix2x_event_chain_t,
opal_list_item_t,
chcon, chdes);
evcon, NULL);
static void opcon(pmix2x_opcaddy_t *p)
{
@ -1455,11 +1232,15 @@ static void opcon(pmix2x_opcaddy_t *p)
p->apps = NULL;
p->sz = 0;
p->active = false;
p->codes = NULL;
p->pcodes = NULL;
p->event = NULL;
p->opcbfunc = NULL;
p->mdxcbfunc = NULL;
p->valcbfunc = NULL;
p->lkcbfunc = NULL;
p->spcbfunc = NULL;
p->evregcbfunc = NULL;
p->cbdata = NULL;
}
static void opdes(pmix2x_opcaddy_t *p)
@ -1476,6 +1257,9 @@ static void opdes(pmix2x_opcaddy_t *p)
if (NULL != p->apps) {
PMIX_APP_FREE(p->apps, p->sz);
}
if (NULL != p->pcodes) {
free(p->pcodes);
}
}
OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t,
opal_object_t,
@ -1513,12 +1297,17 @@ static void tscon(pmix2x_threadshift_t *p)
p->source = NULL;
p->event_codes = NULL;
p->info = NULL;
OBJ_CONSTRUCT(&p->results, opal_list_t);
p->evhandler = NULL;
p->nondefault = false;
p->cbfunc = NULL;
p->opcbfunc = NULL;
p->cbdata = NULL;
}
static void tsdes(pmix2x_threadshift_t *p)
{
OPAL_LIST_DESTRUCT(&p->results);
}
OBJ_CLASS_INSTANCE(pmix2x_threadshift_t,
opal_object_t,
tscon, NULL);
tscon, tsdes);

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
@ -39,9 +39,7 @@ typedef struct {
opal_list_t jobids;
bool native_launch;
size_t evindex;
opal_list_t single_events;
opal_list_t multi_events;
opal_list_t default_events;
opal_list_t events;
int cache_size;
opal_list_t cache;
} mca_pmix_pmix2x_component_t;
@ -61,42 +59,10 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t);
typedef struct {
opal_list_item_t super;
size_t index;
int code;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_single_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_single_event_t);
typedef struct {
opal_list_item_t super;
size_t index;
int *codes;
size_t ncodes;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_multi_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_multi_event_t);
typedef struct {
opal_list_item_t super;
size_t index;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_default_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_default_event_t);
typedef struct {
opal_list_item_t super;
int status;
bool nondefault;
opal_process_name_t source;
pmix_data_range_t range;
opal_list_t *info;
opal_list_t results;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix_op_cbfunc_t final_cbfunc;
void *final_cbdata;
} opal_pmix2x_event_chain_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_event_chain_t);
void *cbdata;
} opal_pmix2x_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_event_t);
typedef struct {
opal_object_t super;
@ -111,11 +77,16 @@ typedef struct {
pmix_app_t *apps;
size_t sz;
volatile bool active;
opal_list_t *codes;
pmix_status_t *pcodes;
size_t ncodes;
opal_pmix2x_event_t *event;
opal_pmix_op_cbfunc_t opcbfunc;
opal_pmix_modex_cbfunc_t mdxcbfunc;
opal_pmix_value_cbfunc_t valcbfunc;
opal_pmix_lookup_cbfunc_t lkcbfunc;
opal_pmix_spawn_cbfunc_t spcbfunc;
opal_pmix_evhandler_reg_cbfunc_t evregcbfunc;
void *cbdata;
} pmix2x_opcaddy_t;
OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t);
@ -152,28 +123,15 @@ typedef struct {
size_t handler;
opal_list_t *event_codes;
opal_list_t *info;
opal_list_t results;
opal_pmix_notification_fn_t evhandler;
opal_pmix_evhandler_reg_cbfunc_t cbfunc;
opal_pmix_op_cbfunc_t opcbfunc;
pmix_event_notification_cbfunc_fn_t pmixcbfunc;
void *cbdata;
} pmix2x_threadshift_t;
OBJ_CLASS_DECLARATION(pmix2x_threadshift_t);
#define OPAL_PMIX_OPCD_THREADSHIFT(i, s, sr, if, nif, fn, cb, cd) \
do { \
pmix2x_opalcaddy_t *_cd; \
_cd = OBJ_NEW(pmix2x_opalcaddy_t); \
_cd->id = (i); \
_cd->status = (s); \
_cd->source = (sr); \
_cd->info = (i); \
_cd->evcbfunc = (cb); \
_cd->cbdata = (cd); \
event_assign(&((_cd)->ev), opal_pmix_base.evbase, \
-1, EV_WRITE, (fn), (_cd)); \
event_active(&((_cd)->ev), EV_WRITE, 1); \
} while(0)
#define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \
do { \
pmix2x_threadshift_t *_cd; \

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@ -36,7 +36,6 @@
static pmix_proc_t my_proc;
static char *dbgvalue=NULL;
static size_t errhdler_ref = 0;
#define PMIX_WAIT_FOR_COMPLETION(a) \
do { \
@ -50,7 +49,9 @@ static void errreg_cbfunc (pmix_status_t status,
size_t errhandler_ref,
void *cbdata)
{
errhdler_ref = errhandler_ref;
opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata;
event->index = errhandler_ref;
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu",
status, (unsigned long)errhandler_ref);
@ -62,6 +63,7 @@ int pmix2x_client_init(void)
pmix_status_t rc;
int dbg;
opal_pmix2x_jobid_trkr_t *job;
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_client init");
@ -98,7 +100,9 @@ int pmix2x_client_init(void)
opal_proc_set_name(&pname);
/* register the default event handler */
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, NULL);
event = OBJ_NEW(opal_pmix2x_event_t);
opal_list_append(&mca_pmix_pmix2x_component.events, &event->super);
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event);
return OPAL_SUCCESS;
}
@ -106,12 +110,16 @@ int pmix2x_client_init(void)
int pmix2x_client_finalize(void)
{
pmix_status_t rc;
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_client finalize");
/* deregister the default event handler */
PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL);
/* deregister all event handlers */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
PMIx_Deregister_event_handler(event->index, NULL, NULL);
}
/* the list will be destructed when the component is finalized */
rc = PMIx_Finalize(NULL, 0);
return pmix2x_convert_rc(rc);

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
@ -36,7 +36,6 @@ const char *opal_pmix_pmix2x_component_version_string =
static int external_open(void);
static int external_close(void);
static int external_component_query(mca_base_module_t **module, int *priority);
static int external_register(void);
/*
@ -66,7 +65,6 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.mca_open_component = external_open,
.mca_close_component = external_close,
.mca_query_component = external_component_query,
.mca_register_component_params = external_register,
},
/* Next the MCA v1.0.0 component meta data */
.base_data = {
@ -77,27 +75,11 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.native_launch = false
};
static int external_register(void)
{
mca_pmix_pmix2x_component.cache_size = 256;
mca_base_component_var_register(&mca_pmix_pmix2x_component.super.base_version,
"cache_size", "Size of the ring buffer cache for events",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&mca_pmix_pmix2x_component.cache_size);
return OPAL_SUCCESS;
}
static int external_open(void)
{
mca_pmix_pmix2x_component.evindex = 0;
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.jobids, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.single_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.multi_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.default_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.cache, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.events, opal_list_t);
return OPAL_SUCCESS;
}
@ -105,10 +87,7 @@ static int external_open(void)
static int external_close(void)
{
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.jobids);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.single_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.multi_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.default_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.cache);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.events);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -88,6 +88,7 @@ BEGIN_C_DECLS
#define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */
#define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier
#define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler
#define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@ -117,6 +118,8 @@ BEGIN_C_DECLS
#define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace
#define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and
// thus cannot be prefixed with "pmix"
#define OPAL_PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon
#define OPAL_PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes
/* size info */
#define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
@ -220,6 +223,8 @@ BEGIN_C_DECLS
#define OPAL_PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform"
#define OPAL_PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
#define OPAL_PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
#define OPAL_PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
#define OPAL_PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
/* log attributes */
#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr

Просмотреть файл

@ -4,7 +4,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2013 Inria. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
@ -200,10 +200,9 @@ char* opal_get_proc_hostname(const opal_proc_t *proc)
}
/* if we don't already have it, then try to get it */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name,
(char**)&(proc->proc_hostname), OPAL_STRING);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name,
(char**)&(proc->proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
OPAL_ERROR_LOG(ret);
return "unknown"; // return something so the caller doesn't segfault
}

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -71,7 +72,7 @@ static int dfs_app_close(void)
static int dfs_app_component_query(mca_base_module_t **module, int *priority)
{
if (ORTE_PROC_IS_APP && orte_staged_execution) {
if (ORTE_PROC_IS_APP) {
/* set our priority high as we are the default for apps */
*priority = 1000;
*module = (mca_base_module_t *)&orte_dfs_app_module;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -89,7 +89,6 @@ static int rte_init(void)
char *envar, *ev1, *ev2;
uint64_t unique_key[2];
char *string_key;
char *rmluri;
opal_value_t *kv;
char *val;
int u32, *u32ptr;
@ -399,19 +398,9 @@ static int rte_init(void)
orte_process_info.max_procs = orte_process_info.num_procs;
}
/*** PUSH DATA FOR OTHERS TO FIND ***/
/* push our RML URI in case others need to talk directly to us */
rmluri = orte_rml.get_contact_info();
/* push it out for others to use */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "pmix put uri";
goto error;
}
free(rmluri);
/* push our hostname so others can find us, if they need to */
/* push our hostname so others can find us, if they need to - the
* native PMIx component will ignore this request as the hostname
* is provided by the system */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "db store hostname";

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Research Organization for Information Science
@ -336,13 +336,6 @@ static int rte_init(void)
return rc;
}
/* push our hostname so others can find us, if they need to */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "db store hostname";
goto error;
}
return ORTE_SUCCESS;
error:

Просмотреть файл

@ -1341,9 +1341,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, "1");
}
if (orte_map_reduce) {
opal_argv_append(argc, argv, "--mapreduce");
}
if (orte_map_stddiag_to_stderr) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr");

Просмотреть файл

@ -1101,11 +1101,6 @@ static int setup_child(orte_job_t *jdata,
opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, &app->env);
}
/* if we are using staged execution, tell it */
if (orte_staged_execution) {
opal_setenv("OMPI_MCA_orte_staged_execution", "1", true, &app->env);
}
/* if the proc isn't going to forward IO, then we need to flag that
* it has "completed" iof termination as otherwise it will never fire
*/

Просмотреть файл

@ -126,7 +126,6 @@ static struct {
int uri_pipe;
int singleton_died_pipe;
bool abort;
bool mapreduce;
bool tree_spawn;
char *hnp_topo_sig;
bool test_suicide;
@ -217,10 +216,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
&orted_globals.hnp_topo_sig, OPAL_CMD_LINE_TYPE_STRING,
"Topology signature of HNP" },
{ NULL, '\0', "mapreduce", "mapreduce", 0,
&orted_globals.mapreduce, OPAL_CMD_LINE_TYPE_BOOL,
"Whether to report process bindings to stderr" },
/* End of list */
{ NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@ -335,11 +330,6 @@ int orte_daemon(int argc, char *argv[])
free(tmp_env_var);
#endif
/* if mapreduce set, flag it */
if (orted_globals.mapreduce) {
orte_map_reduce = true;
}
/* detach from controlling terminal
* otherwise, remain attached so output can get to us
*/

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -137,6 +137,9 @@ static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jar
static int parse_appfile(orte_job_t *jdata, char *filename, char ***env);
static void orte_timeout_wakeup(int sd, short args, void *cbdata);
static void orte_profile_wakeup(int sd, short args, void *cbdata);
static void profile_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata);
static void launch_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata);
@ -896,20 +899,6 @@ int orte_submit_job(char *argv[], int *index,
}
}
/* check for debugger test envars and forward them if necessary */
if (NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
char *evar;
evar = getenv("ORTE_TEST_DEBUGGER_SLEEP");
for (i=0; i < (orte_app_idx_t)jdata->num_apps; i++) {
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
opal_setenv("ORTE_TEST_DEBUGGER_ATTACH", "1", true, &app->env);
if (NULL != evar) {
opal_setenv("ORTE_TEST_DEBUGGER_SLEEP", evar, true, &app->env);
}
}
}
}
/* check for suicide test directives */
if (NULL != getenv("ORTE_TEST_HNP_SUICIDE") ||
NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) {
@ -956,6 +945,8 @@ int orte_submit_job(char *argv[], int *index,
ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE);
//goto DONE;
}
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
ORTE_RML_PERSISTENT, profile_recv, NULL);
orte_memprofile_timeout->tv.tv_sec = timeout_seconds;
orte_memprofile_timeout->tv.tv_usec = 0;
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
@ -2212,10 +2203,9 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
static bool mpir_breakpoint_fired = false;
static void _send_notification(void)
static void _send_notification(int status)
{
opal_buffer_t buf;
int status = OPAL_ERR_DEBUGGER_RELEASE;
orte_grpcomm_signature_t sig;
int rc;
opal_value_t kv, *kvptr;
@ -2448,7 +2438,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
"%s NOTIFYING DEBUGGER RELEASE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* notify all procs that the debugger is ready */
_send_notification();
_send_notification(OPAL_ERR_DEBUGGER_RELEASE);
}
}
return;
@ -2547,7 +2537,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
"%s NOTIFYING DEBUGGER RELEASE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* notify all procs that the debugger is ready */
_send_notification();
_send_notification(OPAL_ERR_DEBUGGER_RELEASE);
} else if (!orte_debugger_test_attach) {
/* if I am launching debugger daemons, then I need to do so now
* that the job has been started and I know which nodes have
@ -3133,6 +3123,16 @@ void orte_timeout_wakeup(int sd, short args, void *cbdata)
static int nreports = 0;
static orte_timer_t profile_timer;
static int nchecks = 0;
static void profile_timeout(int sd, short args, void *cbdata)
{
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
}
static void profile_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag,
@ -3167,26 +3167,33 @@ static void profile_recv(int status, orte_process_name_t* sender,
done:
--nreports;
if (nreports == 0) {
++nchecks;
/* cancel the timeout */
OBJ_DESTRUCT(&profile_timer);
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
/* notify to release */
_send_notification(12345);
/* if this was the first measurement, then we need to
* let the probe move along */
if (2 > nchecks) {
/* reset the event */
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
orte_profile_wakeup, NULL);
opal_event_set_priority(orte_memprofile_timeout->ev, ORTE_ERROR_PRI);
opal_event_evtimer_add(orte_memprofile_timeout->ev, &orte_memprofile_timeout->tv);
/* reset the timer */
OBJ_CONSTRUCT(&profile_timer, orte_timer_t);
opal_event_evtimer_set(orte_event_base,
profile_timer.ev, profile_timeout, NULL);
opal_event_set_priority(profile_timer.ev, ORTE_ERROR_PRI);
profile_timer.tv.tv_sec = 30;
opal_event_evtimer_add(profile_timer.ev, &profile_timer.tv);
return;
}
}
}
static void profile_timeout(int sd, short args, void *cbdata)
{
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
}
void orte_profile_wakeup(int sd, short args, void *cbdata)
{
orte_job_t *jdata = (orte_job_t*)cbdata;
orte_job_t *dmns;
orte_proc_t *dmn;
int i;
@ -3202,8 +3209,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
/* set the recv */
nreports = 1; // always get a report from ourselves
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
ORTE_RML_PERSISTENT, profile_recv, NULL);
/* setup the buffer */
buffer = OBJ_NEW(opal_buffer_t);
@ -3213,13 +3218,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
OBJ_RELEASE(buffer);
goto giveup;
}
/* pack the jobid in question */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &jdata->jobid, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
goto giveup;
}
/* goes to just the first daemon beyond ourselves - no need to get it from everyone */
dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (NULL != (dmn = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, 1))) {

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -35,6 +35,7 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/dss/dss.h"
#include "opal/mca/pstat/pstat.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/rmaps_types.h"
@ -449,13 +450,18 @@ static void _query(int sd, short args, void *cbdata)
opal_pmix_query_t *q;
opal_value_t *kv;
orte_job_t *jdata;
int rc;
opal_list_t *results;
orte_proc_t *proct;
int rc, i, num_replies;
opal_list_t *results, targets, *array;
size_t n;
uint32_t key;
void *nptr;
char **nspaces=NULL, nspace[512];
char **ans = NULL;
bool local_only;
orte_namelist_t *nm;
opal_pstats_t pstat;
float pss;
opal_output_verbose(2, orte_pmix_server_globals.output,
"%s processing query",
@ -522,6 +528,84 @@ static void _query(int sd, short args, void *cbdata)
opal_list_append(results, &kv->super);
opal_argv_free(ans);
ans = NULL;
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_QUERY_MEMORY_USAGE)) {
/* check the qualifiers to find the procs they want to
* know about - if qualifiers are NULL, then get it for
* the daemons + all active jobs */
if (0 == opal_list_get_size(&q->qualifiers)) {
/* create a request tracker */
/* xcast a request for all memory usage */
/* return success - the callback will be done
* once we get the results */
return;
}
/* scan the qualifiers */
OPAL_LIST_FOREACH(kv, &q->qualifiers, opal_value_t) {
if (0 == strcmp(kv->key, OPAL_PMIX_QUERY_LOCAL_ONLY)) {
if (OPAL_UNDEF == kv->type || kv->data.flag) {
local_only = true;
} else {
local_only = false;
}
} else if (0 == strcmp(kv->key, OPAL_PMIX_PROCID)) {
/* save this directive on our list of targets */
nm = OBJ_NEW(orte_namelist_t);
}
}
/* if they have asked for only our local procs or daemon,
* then we can just get the data directly */
if (local_only) {
if (0 == opal_list_get_size(&targets)) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_QUERY_MEMORY_USAGE);
kv->type = OPAL_PTR;
array = OBJ_NEW(opal_list_t);
kv->data.ptr = array;
opal_list_append(results, &kv->super);
/* collect my memory usage */
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
opal_pstat.query(orte_process_info.pid, &pstat, NULL);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_DAEMON_MEMORY);
kv->type = OPAL_FLOAT;
kv->data.fval = pstat.pss;
opal_list_append(array, &kv->super);
OBJ_DESTRUCT(&pstat);
/* collect the memory usage of all my children */
pss = 0.0;
num_replies = 0;
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
/* collect the stats on this proc */
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
if (OPAL_SUCCESS == opal_pstat.query(proct->pid, &pstat, NULL)) {
pss += pstat.pss;
++num_replies;
}
OBJ_DESTRUCT(&pstat);
}
}
/* compute the average value */
if (0 < num_replies) {
pss /= (float)num_replies;
}
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_CLIENT_AVG_MEMORY);
kv->type = OPAL_FLOAT;
kv->data.fval = pss;
opal_list_append(array, &kv->super);
} else {
}
} else {
/* if they want it for remote procs, see who is hosting them
* and ask directly for the info - if rank=wildcard, then
* we need to xcast the request and collect the results */
}
}
}
}

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
@ -407,6 +407,14 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
kv->type = OPAL_UINT32;
kv->data.uint32 = pptr->node->index;
opal_list_append(pmap, &kv->super);
if (map->num_nodes < orte_hostname_cutoff) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_HOSTNAME);
kv->type = OPAL_STRING;
kv->data.string = strdup(pptr->node->name);
opal_list_append(pmap, &kv->super);
}
}
}

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -86,6 +86,7 @@ bool orte_have_fqdn_allocation = false;
bool orte_show_resolved_nodenames = false;
bool orte_retain_aliases = false;
int orte_use_hostname_alias = -1;
int orte_hostname_cutoff = 1000;
int orted_debug_failure = -1;
int orted_debug_failure_delay = -1;
@ -183,10 +184,6 @@ int orte_stat_history_size = -1;
/* envars to forward */
char **orte_forwarded_envars = NULL;
/* map-reduce mode */
bool orte_map_reduce = false;
bool orte_staged_execution = false;
/* map stddiag output to stderr so it isn't forwarded to mpirun */
bool orte_map_stddiag_to_stderr = false;
@ -196,9 +193,6 @@ int orte_max_vm_size = -1;
/* user debugger */
char *orte_base_user_debugger = NULL;
/* modex cutoff */
uint32_t orte_direct_modex_cutoff = UINT32_MAX;
int orte_debug_output = -1;
bool orte_debug_daemons_flag = false;
bool orte_xml_output = false;

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -466,6 +466,7 @@ ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
ORTE_DECLSPEC extern bool orte_retain_aliases;
ORTE_DECLSPEC extern int orte_use_hostname_alias;
ORTE_DECLSPEC extern int orte_hostname_cutoff;
/* debug flags */
ORTE_DECLSPEC extern int orted_debug_failure;
@ -564,10 +565,6 @@ ORTE_DECLSPEC extern int orte_stat_history_size;
/* envars to forward */
ORTE_DECLSPEC extern char **orte_forwarded_envars;
/* map-reduce mode */
ORTE_DECLSPEC extern bool orte_map_reduce;
ORTE_DECLSPEC extern bool orte_staged_execution;
/* map stddiag output to stderr so it isn't forwarded to mpirun */
ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr;
@ -582,9 +579,6 @@ ORTE_DECLSPEC extern char *orte_base_user_debugger;
*/
ORTE_DECLSPEC extern char *orte_daemon_cores;
/* cutoff for collective modex */
ORTE_DECLSPEC extern uint32_t orte_direct_modex_cutoff;
END_C_DECLS
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -429,6 +429,13 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_retain_aliases);
orte_hostname_cutoff = 1000;
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_cutoff",
"Pass hostnames to all procs when #nodes is less than cutoff [default:1000]",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY,
&orte_hostname_cutoff);
/* which alias to use in MPIR_proctab */
orte_use_hostname_alias = 1;
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index",
@ -659,13 +666,6 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_allowed_exit_without_sync);
orte_staged_execution = false;
(void) mca_base_var_register ("orte", "orte", NULL, "staged_execution",
"Staged execution is being used",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_staged_execution);
orte_report_child_jobs_separately = false;
(void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately",
"Return the exit status of the primary job only",
@ -754,17 +754,6 @@ int orte_register_params(void)
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
&orte_daemon_cores);
/* cutoff for full modex */
orte_direct_modex_cutoff = UINT32_MAX;
id = mca_base_var_register ("orte", "orte", NULL, "direct_modex_cutoff",
"If the number of processes in the application exceeds the provided value,"
"modex will be done upon demand [default: UINT32_MAX]",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_direct_modex_cutoff);
/* register a synonym for old name */
mca_base_var_register_synonym (id, "ompi", "ompi", "hostname", "cutoff", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
/* get the conduit params */
orte_coll_transport = "fabric,ethernet";
(void) mca_base_var_register("orte", "orte", "coll", "transports",