1
1
Revamp the event notification integration to rely on the PMIx event chaining and remove the duplicate chaining in OPAL. This ensures we get system-level events that target non-default handlers.

Restore the hostname entries for MPI-level error messages, but provide an MCA param (orte_hostname_cutoff) to remove them for large clusters where the memory footprint is problematic. Set the default at 1000 nodes in the job (not the allocation).

Begin first cut at memory profiler

Some minor cleanups of memprobe

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2016-12-31 07:37:24 -08:00
родитель 5f68d655d6
Коммит 9eab9a1ed3
25 изменённых файлов: 554 добавлений и 668 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -111,6 +111,7 @@ contrib/platform/intel/bend/*orcm*
contrib/scaling/orte_no_op contrib/scaling/orte_no_op
contrib/scaling/mpi_no_op contrib/scaling/mpi_no_op
contrib/scaling/mpi_barrier contrib/scaling/mpi_barrier
contrib/scaling/mpi_memprobe
examples/hello_c examples/hello_c
examples/hello_cxx examples/hello_cxx

Просмотреть файл

@ -1,4 +1,4 @@
PROGS = orte_no_op mpi_no_op PROGS = orte_no_op mpi_no_op mpi_memprobe
all: $(PROGS) all: $(PROGS)
@ -10,5 +10,8 @@ orte_no_op:
mpi_no_op: mpi_no_op:
mpicc -o mpi_no_op mpi_no_op.c mpicc -o mpi_no_op mpi_no_op.c
mpi_memprobe:
mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal
clean: clean:
rm -f $(PROGS) *~ rm -f $(PROGS) *~

98
contrib/scaling/mpi_memprobe.c Обычный файл
Просмотреть файл

@ -0,0 +1,98 @@
/* -*- C -*-
*
* $HEADER$
*
* The most basic of MPI applications
*/
#include "orte_config.h"
#include <stdio.h>
#include "mpi.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/runtime/runtime.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
static volatile int active;
static volatile bool wait_for_release = true;
#define MEMPROBE_RELEASE 12345
static void _release_fn(int status,
const opal_process_name_t *source,
opal_list_t *info, opal_list_t *results,
opal_pmix_notification_complete_fn_t cbfunc,
void *cbdata)
{
/* must let the notifier know we are done */
if (NULL != cbfunc) {
cbfunc(0, NULL, NULL, NULL, cbdata);
}
/* flag that the debugger is complete so we can exit */
wait_for_release = false;
}
static void _register_fn(int status,
size_t evhandler_ref,
void *cbdata)
{
volatile int *active = (volatile int*)cbdata;
if (0 != status) {
fprintf(stderr, "Client EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
status, (unsigned long)evhandler_ref);
}
*active = status;
}
int main(int argc, char* argv[])
{
int rank, size;
opal_list_t *codes;
opal_value_t *kv;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (0 == rank) {
fprintf(stderr, "Sampling memory usage after MPI_Init\n");
}
codes = OBJ_NEW(opal_list_t);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup("errorcode");
kv->type = OPAL_INT;
kv->data.integer = MEMPROBE_RELEASE;
opal_list_append(codes, &kv->super);
active = -1;
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active);
while (-1 == active) {
usleep(10);
}
/* now wait for notification */
while (wait_for_release) {
usleep(10);
}
wait_for_release = true;
/* perform a barrier so some communication will occur, thus
* requiring exchange of endpoint info */
MPI_Barrier(MPI_COMM_WORLD);
if (0 == rank) {
fprintf(stderr, "\n\nSampling memory usage after MPI_Barrier\n");
}
/* wait again while memory is sampled */
while (wait_for_release) {
usleep(10);
}
MPI_Finalize();
return 0;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -157,6 +157,7 @@ typedef uint32_t pmix_rank_t;
#define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories #define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */ /* information about relative ranks as assigned by the RM */
#define PMIX_PROCID "pmix.procid" // (pmix_proc_t) process identifier
#define PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job #define PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler #define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job #define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@ -282,6 +283,8 @@ typedef uint32_t pmix_rank_t;
#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform #define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform
#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes #define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes #define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
/* log attributes */ /* log attributes */
#define PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr #define PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>. * Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -553,11 +553,18 @@ static void _putfn(int sd, short args, void *cbdata)
{ {
pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_cb_t *cb = (pmix_cb_t*)cbdata;
pmix_status_t rc; pmix_status_t rc;
pmix_kval_t *kv; pmix_kval_t *kv = NULL;
pmix_nspace_t *ns; pmix_nspace_t *ns;
uint8_t *tmp; uint8_t *tmp;
size_t len; size_t len;
/* no need to push info that starts with "pmix" as that is
* info we would have been provided at startup */
if (0 == strncmp(cb->key, "pmix", 4)) {
rc = PMIX_SUCCESS;
goto done;
}
/* setup to xfer the data */ /* setup to xfer the data */
kv = PMIX_NEW(pmix_kval_t); kv = PMIX_NEW(pmix_kval_t);
kv->key = strdup(cb->key); // need to copy as the input belongs to the user kv->key = strdup(cb->key); // need to copy as the input belongs to the user
@ -622,7 +629,9 @@ static void _putfn(int sd, short args, void *cbdata)
} }
done: done:
PMIX_RELEASE(kv); // maintain accounting if (NULL != kv) {
PMIX_RELEASE(kv); // maintain accounting
}
cb->pstatus = rc; cb->pstatus = rc;
cb->active = false; cb->active = false;
} }

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>. * Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -763,7 +763,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
* us to attempt to retrieve it from the server */ * us to attempt to retrieve it from the server */
for (n=0; n < cb->ninfo; n++) { for (n=0; n < cb->ninfo; n++) {
if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) && if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) &&
cb->info[n].value.data.flag) { (PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) {
/* they don't want us to try and retrieve it */ /* they don't want us to try and retrieve it */
pmix_output_verbose(2, pmix_globals.debug_output, pmix_output_verbose(2, pmix_globals.debug_output,
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -356,6 +356,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
sing->code = cd->codes[0]; sing->code = cd->codes[0];
index = pmix_globals.events.nhdlrs; index = pmix_globals.events.nhdlrs;
sing->index = index; sing->index = index;
sing->evhdlr = cd->evhdlr;
++pmix_globals.events.nhdlrs; ++pmix_globals.events.nhdlrs;
sing->cbobject = cbobject; sing->cbobject = cbobject;
rc = _add_hdlr(&pmix_globals.events.single_events, &sing->super, rc = _add_hdlr(&pmix_globals.events.single_events, &sing->super,
@ -365,17 +366,17 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
PMIX_ERR_WOULD_BLOCK != rc) { PMIX_ERR_WOULD_BLOCK != rc) {
/* unable to register */ /* unable to register */
--pmix_globals.events.nhdlrs; --pmix_globals.events.nhdlrs;
rc = PMIX_ERR_EVENT_REGISTRATION; rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX; index = UINT_MAX;
goto ack;
}
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */
PMIX_RELEASE(cd);
return;
}
goto ack; goto ack;
} }
if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */
PMIX_RELEASE(cd);
return;
}
goto ack;
}
/* must be a multi-code registration */ /* must be a multi-code registration */
multi = PMIX_NEW(pmix_multi_event_t); multi = PMIX_NEW(pmix_multi_event_t);
@ -387,6 +388,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
memcpy(multi->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t)); memcpy(multi->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t));
index = pmix_globals.events.nhdlrs; index = pmix_globals.events.nhdlrs;
multi->index = index; multi->index = index;
multi->evhdlr = cd->evhdlr;
++pmix_globals.events.nhdlrs; ++pmix_globals.events.nhdlrs;
multi->cbobject = cbobject; multi->cbobject = cbobject;
rc = _add_hdlr(&pmix_globals.events.multi_events, &multi->super, rc = _add_hdlr(&pmix_globals.events.multi_events, &multi->super,
@ -396,9 +398,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
PMIX_ERR_WOULD_BLOCK != rc) { PMIX_ERR_WOULD_BLOCK != rc) {
/* unable to register */ /* unable to register */
--pmix_globals.events.nhdlrs; --pmix_globals.events.nhdlrs;
rc = PMIX_ERR_EVENT_REGISTRATION; rc = PMIX_ERR_EVENT_REGISTRATION;
index = UINT_MAX; index = UINT_MAX;
goto ack; goto ack;
} }
if (PMIX_ERR_WOULD_BLOCK == rc) { if (PMIX_ERR_WOULD_BLOCK == rc) {
/* the callback will provide our response */ /* the callback will provide our response */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc. * Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@ -145,121 +145,52 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace)
opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super); opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super);
} }
static void completion_handler(int status, void *cbdata) static void event_hdlr_complete(pmix_status_t status, void *cbdata)
{ {
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)cbdata; pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
if (NULL != chain->info) {
OPAL_LIST_RELEASE(chain->info); OBJ_RELEASE(op);
}
} }
static void progress_local_event_hdlr(int status, static void return_local_event_hdlr(int status, opal_list_t *results,
opal_list_t *results, opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata, void *notification_cbdata)
void *notification_cbdata)
{ {
opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)notification_cbdata; pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)notification_cbdata;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
pmix_status_t pstatus;
size_t n; size_t n;
opal_list_item_t *nxt;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
/* if the caller indicates that the chain is completed, then stop here */ if (NULL != cd->pmixcbfunc) {
if (OPAL_ERR_HANDLERS_COMPLETE == status) { op = OBJ_NEW(pmix2x_opcaddy_t);
goto complete;
}
/* if any results were provided, then add them here */ if (NULL != results) {
if (NULL != results) { /* convert the list of results to an array of info */
while (NULL != (nxt = opal_list_remove_first(results))) { op->ninfo = opal_list_get_size(results);
opal_list_append(results, nxt); if (0 < op->ninfo) {
} PMIX_INFO_CREATE(op->info, op->ninfo);
} n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
/* see if we need to continue, starting with the single code events */ (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
if (NULL != chain->sing) { pmix2x_value_load(&op->info[n].value, kv);
/* the last handler was for a single code - see if there are ++n;
* any others that match this event */
while (opal_list_get_end(&mca_pmix_pmix2x_component.single_events) != (nxt = opal_list_get_next(&chain->sing->super))) {
sing = (opal_pmix2x_single_event_t*)nxt;
if (sing->code == chain->status) {
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING SINGLE EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
goto complete;
}
}
/* if we get here, then there are no more single code
* events that match */
chain->sing = NULL;
/* pickup the beginning of the multi-code event list */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.multi_events)) {
chain->multi = (opal_pmix2x_multi_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.multi_events);
}
}
/* see if we need to continue with the multi code events */
if (NULL != chain->multi) {
while (opal_list_get_end(&mca_pmix_pmix2x_component.multi_events) != (nxt = opal_list_get_next(&chain->multi->super))) {
multi = (opal_pmix2x_multi_event_t*)nxt;
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
goto complete;
} }
} }
} }
/* if we get here, then there are no more multi-mode /* convert the status */
* events that match */ pstatus = pmix2x_convert_opalrc(status);
chain->multi = NULL; /* call the library's callback function */
/* pickup the beginning of the default event list */ cd->pmixcbfunc(pstatus, op->info, op->ninfo, event_hdlr_complete, op, cd->cbdata);
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
chain->def = (opal_pmix2x_default_event_t*)opal_list_get_begin(&mca_pmix_pmix2x_component.default_events);
}
} }
/* if they didn't want it to go to a default handler, then we are done */ /* release the threadshift object */
if (chain->nondefault) { if (NULL != cd->info) {
goto complete; OPAL_LIST_RELEASE(cd->info);
} }
OBJ_RELEASE(cd);
if (NULL != chain->def) { /* release the caller */
if (opal_list_get_end(&mca_pmix_pmix2x_component.default_events) != (nxt = opal_list_get_next(&chain->def->super))) {
def = (opal_pmix2x_default_event_t*)nxt;
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
}
}
complete:
/* we still have to call their final callback */
if (NULL != chain->final_cbfunc) {
chain->final_cbfunc(OPAL_SUCCESS, chain->final_cbdata);
}
/* maintain acctng */
OBJ_RELEASE(chain);
/* let the caller know that we are done with their callback */
if (NULL != cbfunc) { if (NULL != cbfunc) {
cbfunc(OPAL_SUCCESS, thiscbdata); cbfunc(OPAL_SUCCESS, thiscbdata);
} }
@ -268,92 +199,29 @@ static void progress_local_event_hdlr(int status,
static void _event_hdlr(int sd, short args, void *cbdata) static void _event_hdlr(int sd, short args, void *cbdata)
{ {
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
size_t n; opal_pmix2x_event_t *event;
opal_pmix2x_event_chain_t *chain;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output, opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d", "%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status); OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status);
chain = OBJ_NEW(opal_pmix2x_event_chain_t); /* cycle thru the registrations */
/* point it at our final callback */ OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
chain->final_cbfunc = completion_handler; if (cd->id == event->index) {
chain->final_cbdata = chain;
/* carry across provided info */
chain->status = cd->status;
chain->source = cd->pname;
chain->info = cd->info;
chain->nondefault = cd->nondefault;
/* cycle thru the single-event registrations first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (sing->code == chain->status) {
/* found it - invoke the handler, pointing its /* found it - invoke the handler, pointing its
* callback function to our progression function */ * callback function to our callback function */
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output, opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING SINGLE EVHDLR", "%s _EVENT_HDLR CALLING EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source, event->handler(cd->status, &cd->pname,
chain->info, &chain->results, cd->info, &cd->results,
progress_local_event_hdlr, (void*)chain); return_local_event_hdlr, (void*)cd);
return; return;
} }
} }
/* if we didn't find a match, we still have to call their final callback */
/* if we didn't find any match in the single-event registrations, if (NULL != cd->pmixcbfunc) {
* then cycle thru the multi-event registrations next */ cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata);
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
return;
}
}
}
/* if they didn't want it to go to a default handler, then we are done */
if (chain->nondefault) {
/* if we get here, then we need to cache this event in case they
* register for it later - we cannot lose individual events */
opal_list_append(&mca_pmix_pmix2x_component.cache, &chain->super);
return;
}
/* we are done with the threadshift caddy */
OBJ_RELEASE(cd);
/* finally, pass it to any default handlers */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
return;
}
/* we still have to call their final callback */
if (NULL != chain->final_cbfunc) {
chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata);
} }
return; return;
} }
@ -385,6 +253,9 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status);
cd = OBJ_NEW(pmix2x_threadshift_t); cd = OBJ_NEW(pmix2x_threadshift_t);
cd->id = evhdlr_registration_id;
cd->pmixcbfunc = cbfunc;
cd->cbdata = cbdata;
/* convert the incoming status */ /* convert the incoming status */
cd->status = pmix2x_convert_rc(status); cd->status = pmix2x_convert_rc(status);
@ -409,9 +280,6 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
if (NULL != info) { if (NULL != info) {
cd->info = OBJ_NEW(opal_list_t); cd->info = OBJ_NEW(opal_list_t);
for (n=0; n < ninfo; n++) { for (n=0; n < ninfo; n++) {
if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
cd->nondefault = true;
}
iptr = OBJ_NEW(opal_value_t); iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(info[n].key); iptr->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) {
@ -422,17 +290,25 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
opal_list_append(cd->info, &iptr->super); opal_list_append(cd->info, &iptr->super);
} }
} }
/* convert the array of prior results */
if (NULL != results) {
for (n=0; n < nresults; n++) {
iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(results[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &results[n].value))) {
OPAL_ERROR_LOG(rc);
OBJ_RELEASE(iptr);
continue;
}
opal_list_append(&cd->results, &iptr->super);
}
}
/* now push it into the local thread */ /* now push it into the local thread */
event_assign(&cd->ev, opal_pmix_base.evbase, event_assign(&cd->ev, opal_pmix_base.evbase,
-1, EV_WRITE, _event_hdlr, cd); -1, EV_WRITE, _event_hdlr, cd);
event_active(&cd->ev, EV_WRITE, 1); event_active(&cd->ev, EV_WRITE, 1);
/* we don't need any of the data they provided,
* so let them go - also tell them that we will handle
* everything from this point forward */
if (NULL != cbfunc) {
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
}
} }
opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank)
@ -536,7 +412,7 @@ pmix_status_t pmix2x_convert_opalrc(int rc)
case OPAL_SUCCESS: case OPAL_SUCCESS:
return PMIX_SUCCESS; return PMIX_SUCCESS;
default: default:
return PMIX_ERROR; return rc;
} }
} }
@ -620,7 +496,7 @@ int pmix2x_convert_rc(pmix_status_t rc)
case PMIX_SUCCESS: case PMIX_SUCCESS:
return OPAL_SUCCESS; return OPAL_SUCCESS;
default: default:
return OPAL_ERROR; return rc;
} }
} }
@ -735,6 +611,10 @@ void pmix2x_value_load(pmix_value_t *v,
{ {
opal_pmix2x_jobid_trkr_t *job; opal_pmix2x_jobid_trkr_t *job;
bool found; bool found;
opal_list_t *list;
opal_value_t *val;
pmix_info_t *info;
size_t n;
switch(kv->type) { switch(kv->type) {
case OPAL_UNDEF: case OPAL_UNDEF:
@ -876,8 +756,22 @@ void pmix2x_value_load(pmix_value_t *v,
memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t)); memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t));
break; break;
case OPAL_PTR: case OPAL_PTR:
v->type = PMIX_POINTER; /* if someone returned a pointer, it must be to a list of
v->data.ptr = kv->data.ptr; * opal_value_t's that we need to convert to a pmix_data_array
* of pmix_info_t structures */
list = (opal_list_t*)kv->data.ptr;
v->type = PMIX_DATA_ARRAY;
v->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
v->data.darray->type = PMIX_INFO;
v->data.darray->size = opal_list_get_size(list);
PMIX_INFO_CREATE(info, v->data.darray->size);
v->data.darray->array = info;
n=0;
OPAL_LIST_FOREACH(val, list, opal_value_t) {
(void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&info[n].value, val);
++n;
}
break; break;
default: default:
/* silence warnings */ /* silence warnings */
@ -1041,16 +935,27 @@ int pmix2x_value_unload(opal_value_t *kv,
return rc; return rc;
} }
static void errreg_cbfunc (pmix_status_t status,
size_t errhandler_ref,
void *cbdata)
{
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
op->event->index = errhandler_ref;
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu",
status, (unsigned long)errhandler_ref);
if (NULL != op->evregcbfunc) {
op->evregcbfunc(pmix2x_convert_rc(status), errhandler_ref, op->cbdata);
}
OBJ_RELEASE(op);
}
static void _reg_hdlr(int sd, short args, void *cbdata) static void _reg_hdlr(int sd, short args, void *cbdata)
{ {
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_event_chain_t *chain; pmix2x_opcaddy_t *op;
opal_pmix2x_single_event_t *sing = NULL;
opal_pmix2x_multi_event_t *multi = NULL;
opal_pmix2x_default_event_t *def = NULL;
opal_value_t *kv; opal_value_t *kv;
int i;
bool prepend = false;
size_t n; size_t n;
opal_output_verbose(2, opal_pmix_base_framework.framework_output, opal_output_verbose(2, opal_pmix_base_framework.framework_output,
@ -1058,116 +963,46 @@ static void _reg_hdlr(int sd, short args, void *cbdata)
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == cd->event_codes) ? "NULL" : "NON-NULL"); (NULL == cd->event_codes) ? "NULL" : "NON-NULL");
if (NULL != cd->info) { op = OBJ_NEW(pmix2x_opcaddy_t);
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { op->evregcbfunc = cd->cbfunc;
if (0 == strcmp(kv->key, OPAL_PMIX_EVENT_ORDER_PREPEND)) { op->cbdata = cd->cbdata;
prepend = true;
break;
}
}
}
if (NULL == cd->event_codes) { /* convert the event codes */
/* this is a default handler */ if (NULL != cd->event_codes) {
def = OBJ_NEW(opal_pmix2x_default_event_t); op->ncodes = opal_list_get_size(cd->event_codes);
def->handler = cd->evhandler; op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t));
def->index = mca_pmix_pmix2x_component.evindex; n=0;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO DEFAULT EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_prepend(&mca_pmix_pmix2x_component.default_events, &def->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO DEFAULT EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_append(&mca_pmix_pmix2x_component.default_events, &def->super);
}
} else if (1 == opal_list_get_size(cd->event_codes)) {
/* single handler */
sing = OBJ_NEW(opal_pmix2x_single_event_t);
kv = (opal_value_t*)opal_list_get_first(cd->event_codes);
sing->code = kv->data.integer;
sing->index = mca_pmix_pmix2x_component.evindex;
sing->handler = cd->evhandler;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO SINGLE EVENTS WITH CODE %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
opal_list_prepend(&mca_pmix_pmix2x_component.single_events, &sing->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO SINGLE EVENTS WITH CODE %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code);
opal_list_append(&mca_pmix_pmix2x_component.single_events, &sing->super);
}
} else {
multi = OBJ_NEW(opal_pmix2x_multi_event_t);
multi->ncodes = opal_list_get_size(cd->event_codes);
multi->codes = (int*)malloc(multi->ncodes * sizeof(int));
i=0;
OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) {
multi->codes[i] = kv->data.integer; op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer);
++i;
}
multi->index = mca_pmix_pmix2x_component.evindex;
multi->handler = cd->evhandler;
if (prepend) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PREPENDING TO MULTI EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_prepend(&mca_pmix_pmix2x_component.multi_events, &multi->super);
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s APPENDING TO MULTI EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_list_append(&mca_pmix_pmix2x_component.multi_events, &multi->super);
} }
} }
/* release the caller */ /* convert the list of info to an array of pmix_info_t */
if (NULL != cd->cbfunc) { if (NULL != cd->info) {
cd->cbfunc(OPAL_SUCCESS, mca_pmix_pmix2x_component.evindex, cd->cbdata); op->ninfo = opal_list_get_size(cd->info);
} if (0 < op->ninfo) {
mca_pmix_pmix2x_component.evindex++; PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
/* check if any matching notifications have been cached - only nondefault OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
* events will have been cached*/ (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
if (NULL == def) { pmix2x_value_load(&op->info[n].value, kv);
/* check single code registrations */ ++n;
if (NULL != sing) {
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
if (sing->code == chain->status) {
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
chain->sing = sing;
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
}
} else if (NULL != multi) {
/* check for multi code registrations */
OPAL_LIST_FOREACH(chain, &mca_pmix_pmix2x_component.cache, opal_pmix2x_event_chain_t) {
for (n=0; n < multi->ncodes; n++) {
if (multi->codes[n] == chain->status) {
opal_list_remove_item(&mca_pmix_pmix2x_component.cache, &chain->super);
chain->multi = multi;
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
}
} }
} }
} }
/* register the event */
op->event = OBJ_NEW(opal_pmix2x_event_t);
op->event->handler = cd->evhandler;
opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super);
PMIx_Register_event_handler(op->pcodes, op->ncodes,
op->info, op->ninfo,
pmix2x_event_hdlr, errreg_cbfunc, op);
OBJ_RELEASE(cd); OBJ_RELEASE(cd);
return; return;
} }
static void register_handler(opal_list_t *event_codes, static void register_handler(opal_list_t *event_codes,
opal_list_t *info, opal_list_t *info,
opal_pmix_notification_fn_t evhandler, opal_pmix_notification_fn_t evhandler,
@ -1184,36 +1019,20 @@ static void register_handler(opal_list_t *event_codes,
static void _dereg_hdlr(int sd, short args, void *cbdata) static void _dereg_hdlr(int sd, short args, void *cbdata)
{ {
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_single_event_t *sing; opal_pmix2x_event_t *event;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
/* check the single events first */ /* look for this event */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) { OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (cd->handler == sing->index) { if (cd->handler == event->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.single_events, &sing->super); opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
OBJ_RELEASE(sing); OBJ_RELEASE(event);
goto release;
}
}
/* check multi events */
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) {
if (cd->handler == multi->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.multi_events, &multi->super);
OBJ_RELEASE(multi);
goto release;
}
}
/* check default events */
OPAL_LIST_FOREACH(def, &mca_pmix_pmix2x_component.default_events, opal_pmix2x_default_event_t) {
if (cd->handler == def->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.default_events, &def->super);
OBJ_RELEASE(def);
break; break;
} }
} }
/* tell the library to deregister this handler */
PMIx_Deregister_event_handler(cd->handler, NULL, NULL);
release: /* release the caller */
if (NULL != cd->opcbfunc) { if (NULL != cd->opcbfunc) {
cd->opcbfunc(OPAL_SUCCESS, cd->cbdata); cd->opcbfunc(OPAL_SUCCESS, cd->cbdata);
} }
@ -1230,90 +1049,81 @@ static void deregister_handler(size_t evhandler,
return; return;
} }
static void _notify_event(int sd, short args, void *cbdata) static void notify_complete(pmix_status_t status, void *cbdata)
{ {
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
size_t i; if (NULL != op->opcbfunc) {
opal_pmix2x_single_event_t *sing; op->opcbfunc(pmix2x_convert_rc(status), op->cbdata);
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix2x_event_chain_t *chain;
/* check the single events first */
OPAL_LIST_FOREACH(sing, &mca_pmix_pmix2x_component.single_events, opal_pmix2x_single_event_t) {
if (cd->status == sing->code) {
/* found it - invoke the handler, pointing its
* callback function to our progression function */
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING SINGLE EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
} }
/* check multi events */ OBJ_RELEASE(op);
OPAL_LIST_FOREACH(multi, &mca_pmix_pmix2x_component.multi_events, opal_pmix2x_multi_event_t) { }
for (i=0; i < multi->ncodes; i++) {
if (cd->status == multi->codes[i]) { static void _notify(int sd, short args, void *cbdata)
/* found it - invoke the handler, pointing its {
* callback function to our progression function */ pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata;
chain = OBJ_NEW(opal_pmix2x_event_chain_t); pmix2x_opcaddy_t *op;
chain->status = cd->status; opal_value_t *kv;
chain->range = pmix2x_convert_opalrange(cd->range); pmix_proc_t p, *pptr;
chain->source = *(cd->source); pmix_status_t pstatus;
chain->info = cd->info; size_t n;
chain->final_cbfunc = cd->opcbfunc; int rc=OPAL_SUCCESS;
chain->final_cbdata = cd->cbdata; pmix_data_range_t prange;
chain->multi = multi; opal_pmix2x_jobid_trkr_t *job, *jptr;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING MULTI EVHDLR FOR STATUS %d", op = OBJ_NEW(pmix2x_opcaddy_t);
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
multi->handler(chain->status, &chain->source, /* convert the status */
chain->info, &chain->results, pstatus = pmix2x_convert_opalrc(cd->status);
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd); /* convert the source */
return; if (NULL == cd->source) {
pptr = NULL;
} else {
/* look thru our list of jobids and find the
* corresponding nspace */
job = NULL;
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == cd->source->jobid) {
job = jptr;
break;
}
}
if (NULL == job) {
rc = OPAL_ERR_NOT_FOUND;
goto release;
}
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
p.rank = pmix2x_convert_opalrank(cd->source->vpid);
pptr = &p;
}
/* convert the range */
prange = pmix2x_convert_opalrange(cd->range);
/* convert the list of info */
if (NULL != cd->info) {
op->ninfo = opal_list_get_size(cd->info);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
} }
} }
} }
/* check default events */
if (0 < opal_list_get_size(&mca_pmix_pmix2x_component.default_events)) {
def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_pmix2x_component.default_events);
chain = OBJ_NEW(opal_pmix2x_event_chain_t);
chain->status = cd->status;
chain->range = pmix2x_convert_opalrange(cd->range);
chain->source = *(cd->source);
chain->info = cd->info;
chain->final_cbfunc = cd->opcbfunc;
chain->final_cbdata = cd->cbdata;
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"[%s] CALLING DEFAULT EVHDLR FOR STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status);
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
OBJ_RELEASE(cd);
return;
}
/* if we get here, then there are no registered event handlers */ /* ask the library to notify our clients */
pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op);
rc = pmix2x_convert_rc(pstatus);
release:
/* release the caller */
if (NULL != cd->opcbfunc) { if (NULL != cd->opcbfunc) {
cd->opcbfunc(OPAL_ERR_NOT_FOUND, cd->cbdata); cd->opcbfunc(rc, cd->cbdata);
} }
OBJ_RELEASE(cd); OBJ_RELEASE(cd);
return;
} }
static int notify_event(int status, static int notify_event(int status,
@ -1324,7 +1134,7 @@ static int notify_event(int status,
{ {
/* we must threadshift this request as we might not be in an event /* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */ * and we are going to access framework-global lists/objects */
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify_event, cbfunc, cbdata); OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -1401,47 +1211,14 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
opal_list_item_t, opal_list_item_t,
NULL, NULL); NULL, NULL);
OBJ_CLASS_INSTANCE(opal_pmix2x_single_event_t, static void evcon(opal_pmix2x_event_t *p)
opal_list_item_t,
NULL, NULL);
static void mtevcon(opal_pmix2x_multi_event_t *p)
{ {
p->codes = NULL; p->handler = NULL;
p->ncodes = 0; p->cbdata = NULL;
} }
static void mtevdes(opal_pmix2x_multi_event_t *p) OBJ_CLASS_INSTANCE(opal_pmix2x_event_t,
{
if (NULL != p->codes) {
free(p->codes);
}
}
OBJ_CLASS_INSTANCE(opal_pmix2x_multi_event_t,
opal_list_item_t, opal_list_item_t,
mtevcon, mtevdes); evcon, NULL);
OBJ_CLASS_INSTANCE(opal_pmix2x_default_event_t,
opal_list_item_t,
NULL, NULL);
static void chcon(opal_pmix2x_event_chain_t *p)
{
p->nondefault = false;
p->info = NULL;
OBJ_CONSTRUCT(&p->results, opal_list_t);
p->sing = NULL;
p->multi = NULL;
p->def = NULL;
p->final_cbfunc = NULL;
p->final_cbdata = NULL;
}
static void chdes(opal_pmix2x_event_chain_t *p)
{
OPAL_LIST_DESTRUCT(&p->results);
}
OBJ_CLASS_INSTANCE(opal_pmix2x_event_chain_t,
opal_list_item_t,
chcon, chdes);
static void opcon(pmix2x_opcaddy_t *p) static void opcon(pmix2x_opcaddy_t *p)
{ {
@ -1455,11 +1232,15 @@ static void opcon(pmix2x_opcaddy_t *p)
p->apps = NULL; p->apps = NULL;
p->sz = 0; p->sz = 0;
p->active = false; p->active = false;
p->codes = NULL;
p->pcodes = NULL;
p->event = NULL;
p->opcbfunc = NULL; p->opcbfunc = NULL;
p->mdxcbfunc = NULL; p->mdxcbfunc = NULL;
p->valcbfunc = NULL; p->valcbfunc = NULL;
p->lkcbfunc = NULL; p->lkcbfunc = NULL;
p->spcbfunc = NULL; p->spcbfunc = NULL;
p->evregcbfunc = NULL;
p->cbdata = NULL; p->cbdata = NULL;
} }
static void opdes(pmix2x_opcaddy_t *p) static void opdes(pmix2x_opcaddy_t *p)
@ -1476,6 +1257,9 @@ static void opdes(pmix2x_opcaddy_t *p)
if (NULL != p->apps) { if (NULL != p->apps) {
PMIX_APP_FREE(p->apps, p->sz); PMIX_APP_FREE(p->apps, p->sz);
} }
if (NULL != p->pcodes) {
free(p->pcodes);
}
} }
OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t, OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t,
opal_object_t, opal_object_t,
@ -1513,12 +1297,17 @@ static void tscon(pmix2x_threadshift_t *p)
p->source = NULL; p->source = NULL;
p->event_codes = NULL; p->event_codes = NULL;
p->info = NULL; p->info = NULL;
OBJ_CONSTRUCT(&p->results, opal_list_t);
p->evhandler = NULL; p->evhandler = NULL;
p->nondefault = false; p->nondefault = false;
p->cbfunc = NULL; p->cbfunc = NULL;
p->opcbfunc = NULL; p->opcbfunc = NULL;
p->cbdata = NULL; p->cbdata = NULL;
} }
static void tsdes(pmix2x_threadshift_t *p)
{
OPAL_LIST_DESTRUCT(&p->results);
}
OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, OBJ_CLASS_INSTANCE(pmix2x_threadshift_t,
opal_object_t, opal_object_t,
tscon, NULL); tscon, tsdes);

Просмотреть файл

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc. * Copyright (c) 2014-2015 Mellanox Technologies, Inc.
* All rights reserved. * All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
@ -39,9 +39,7 @@ typedef struct {
opal_list_t jobids; opal_list_t jobids;
bool native_launch; bool native_launch;
size_t evindex; size_t evindex;
opal_list_t single_events; opal_list_t events;
opal_list_t multi_events;
opal_list_t default_events;
int cache_size; int cache_size;
opal_list_t cache; opal_list_t cache;
} mca_pmix_pmix2x_component_t; } mca_pmix_pmix2x_component_t;
@ -61,42 +59,10 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t);
typedef struct { typedef struct {
opal_list_item_t super; opal_list_item_t super;
size_t index; size_t index;
int code;
opal_pmix_notification_fn_t handler; opal_pmix_notification_fn_t handler;
} opal_pmix2x_single_event_t; void *cbdata;
OBJ_CLASS_DECLARATION(opal_pmix2x_single_event_t); } opal_pmix2x_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_event_t);
typedef struct {
opal_list_item_t super;
size_t index;
int *codes;
size_t ncodes;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_multi_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_multi_event_t);
typedef struct {
opal_list_item_t super;
size_t index;
opal_pmix_notification_fn_t handler;
} opal_pmix2x_default_event_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_default_event_t);
typedef struct {
opal_list_item_t super;
int status;
bool nondefault;
opal_process_name_t source;
pmix_data_range_t range;
opal_list_t *info;
opal_list_t results;
opal_pmix2x_single_event_t *sing;
opal_pmix2x_multi_event_t *multi;
opal_pmix2x_default_event_t *def;
opal_pmix_op_cbfunc_t final_cbfunc;
void *final_cbdata;
} opal_pmix2x_event_chain_t;
OBJ_CLASS_DECLARATION(opal_pmix2x_event_chain_t);
typedef struct { typedef struct {
opal_object_t super; opal_object_t super;
@ -111,11 +77,16 @@ typedef struct {
pmix_app_t *apps; pmix_app_t *apps;
size_t sz; size_t sz;
volatile bool active; volatile bool active;
opal_list_t *codes;
pmix_status_t *pcodes;
size_t ncodes;
opal_pmix2x_event_t *event;
opal_pmix_op_cbfunc_t opcbfunc; opal_pmix_op_cbfunc_t opcbfunc;
opal_pmix_modex_cbfunc_t mdxcbfunc; opal_pmix_modex_cbfunc_t mdxcbfunc;
opal_pmix_value_cbfunc_t valcbfunc; opal_pmix_value_cbfunc_t valcbfunc;
opal_pmix_lookup_cbfunc_t lkcbfunc; opal_pmix_lookup_cbfunc_t lkcbfunc;
opal_pmix_spawn_cbfunc_t spcbfunc; opal_pmix_spawn_cbfunc_t spcbfunc;
opal_pmix_evhandler_reg_cbfunc_t evregcbfunc;
void *cbdata; void *cbdata;
} pmix2x_opcaddy_t; } pmix2x_opcaddy_t;
OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t); OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t);
@ -152,28 +123,15 @@ typedef struct {
size_t handler; size_t handler;
opal_list_t *event_codes; opal_list_t *event_codes;
opal_list_t *info; opal_list_t *info;
opal_list_t results;
opal_pmix_notification_fn_t evhandler; opal_pmix_notification_fn_t evhandler;
opal_pmix_evhandler_reg_cbfunc_t cbfunc; opal_pmix_evhandler_reg_cbfunc_t cbfunc;
opal_pmix_op_cbfunc_t opcbfunc; opal_pmix_op_cbfunc_t opcbfunc;
pmix_event_notification_cbfunc_fn_t pmixcbfunc;
void *cbdata; void *cbdata;
} pmix2x_threadshift_t; } pmix2x_threadshift_t;
OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); OBJ_CLASS_DECLARATION(pmix2x_threadshift_t);
#define OPAL_PMIX_OPCD_THREADSHIFT(i, s, sr, if, nif, fn, cb, cd) \
do { \
pmix2x_opalcaddy_t *_cd; \
_cd = OBJ_NEW(pmix2x_opalcaddy_t); \
_cd->id = (i); \
_cd->status = (s); \
_cd->source = (sr); \
_cd->info = (i); \
_cd->evcbfunc = (cb); \
_cd->cbdata = (cd); \
event_assign(&((_cd)->ev), opal_pmix_base.evbase, \
-1, EV_WRITE, (fn), (_cd)); \
event_active(&((_cd)->ev), EV_WRITE, 1); \
} while(0)
#define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \ #define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \
do { \ do { \
pmix2x_threadshift_t *_cd; \ pmix2x_threadshift_t *_cd; \

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc. * Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@ -36,7 +36,6 @@
static pmix_proc_t my_proc; static pmix_proc_t my_proc;
static char *dbgvalue=NULL; static char *dbgvalue=NULL;
static size_t errhdler_ref = 0;
#define PMIX_WAIT_FOR_COMPLETION(a) \ #define PMIX_WAIT_FOR_COMPLETION(a) \
do { \ do { \
@ -50,7 +49,9 @@ static void errreg_cbfunc (pmix_status_t status,
size_t errhandler_ref, size_t errhandler_ref,
void *cbdata) void *cbdata)
{ {
errhdler_ref = errhandler_ref; opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata;
event->index = errhandler_ref;
opal_output_verbose(5, opal_pmix_base_framework.framework_output, opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu",
status, (unsigned long)errhandler_ref); status, (unsigned long)errhandler_ref);
@ -62,6 +63,7 @@ int pmix2x_client_init(void)
pmix_status_t rc; pmix_status_t rc;
int dbg; int dbg;
opal_pmix2x_jobid_trkr_t *job; opal_pmix2x_jobid_trkr_t *job;
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output, opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_client init"); "PMIx_client init");
@ -98,7 +100,9 @@ int pmix2x_client_init(void)
opal_proc_set_name(&pname); opal_proc_set_name(&pname);
/* register the default event handler */ /* register the default event handler */
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, NULL); event = OBJ_NEW(opal_pmix2x_event_t);
opal_list_append(&mca_pmix_pmix2x_component.events, &event->super);
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -106,12 +110,16 @@ int pmix2x_client_init(void)
int pmix2x_client_finalize(void) int pmix2x_client_finalize(void)
{ {
pmix_status_t rc; pmix_status_t rc;
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output, opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_client finalize"); "PMIx_client finalize");
/* deregister the default event handler */ /* deregister all event handlers */
PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL); OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
PMIx_Deregister_event_handler(event->index, NULL, NULL);
}
/* the list will be destructed when the component is finalized */
rc = PMIx_Finalize(NULL, 0); rc = PMIx_Finalize(NULL, 0);
return pmix2x_convert_rc(rc); return pmix2x_convert_rc(rc);

Просмотреть файл

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
@ -36,7 +36,6 @@ const char *opal_pmix_pmix2x_component_version_string =
static int external_open(void); static int external_open(void);
static int external_close(void); static int external_close(void);
static int external_component_query(mca_base_module_t **module, int *priority); static int external_component_query(mca_base_module_t **module, int *priority);
static int external_register(void);
/* /*
@ -66,7 +65,6 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.mca_open_component = external_open, .mca_open_component = external_open,
.mca_close_component = external_close, .mca_close_component = external_close,
.mca_query_component = external_component_query, .mca_query_component = external_component_query,
.mca_register_component_params = external_register,
}, },
/* Next the MCA v1.0.0 component meta data */ /* Next the MCA v1.0.0 component meta data */
.base_data = { .base_data = {
@ -77,27 +75,11 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.native_launch = false .native_launch = false
}; };
static int external_register(void)
{
mca_pmix_pmix2x_component.cache_size = 256;
mca_base_component_var_register(&mca_pmix_pmix2x_component.super.base_version,
"cache_size", "Size of the ring buffer cache for events",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&mca_pmix_pmix2x_component.cache_size);
return OPAL_SUCCESS;
}
static int external_open(void) static int external_open(void)
{ {
mca_pmix_pmix2x_component.evindex = 0; mca_pmix_pmix2x_component.evindex = 0;
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.jobids, opal_list_t); OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.jobids, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.single_events, opal_list_t); OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.multi_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.default_events, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.cache, opal_list_t);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -105,10 +87,7 @@ static int external_open(void)
static int external_close(void) static int external_close(void)
{ {
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.jobids); OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.jobids);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.single_events); OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.multi_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.default_events);
OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.cache);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -88,6 +88,7 @@ BEGIN_C_DECLS
#define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories #define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */ /* information about relative ranks as assigned by the RM */
#define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier
#define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job #define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler #define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler
#define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job #define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@ -117,6 +118,8 @@ BEGIN_C_DECLS
#define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace #define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace
#define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and #define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and
// thus cannot be prefixed with "pmix" // thus cannot be prefixed with "pmix"
#define OPAL_PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon
#define OPAL_PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes
/* size info */ /* size info */
#define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace #define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
@ -220,6 +223,8 @@ BEGIN_C_DECLS
#define OPAL_PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform" #define OPAL_PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform"
#define OPAL_PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes #define OPAL_PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
#define OPAL_PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes #define OPAL_PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
#define OPAL_PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
#define OPAL_PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
/* log attributes */ /* log attributes */
#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr #define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr

Просмотреть файл

@ -4,7 +4,7 @@
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2013 Inria. All rights reserved. * Copyright (c) 2013 Inria. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
@ -200,10 +200,9 @@ char* opal_get_proc_hostname(const opal_proc_t *proc)
} }
/* if we don't already have it, then try to get it */ /* if we don't already have it, then try to get it */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name, OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name,
(char**)&(proc->proc_hostname), OPAL_STRING); (char**)&(proc->proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) { if (OPAL_SUCCESS != ret) {
OPAL_ERROR_LOG(ret);
return "unknown"; // return something so the caller doesn't segfault return "unknown"; // return something so the caller doesn't segfault
} }

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* *
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -71,7 +72,7 @@ static int dfs_app_close(void)
static int dfs_app_component_query(mca_base_module_t **module, int *priority) static int dfs_app_component_query(mca_base_module_t **module, int *priority)
{ {
if (ORTE_PROC_IS_APP && orte_staged_execution) { if (ORTE_PROC_IS_APP) {
/* set our priority high as we are the default for apps */ /* set our priority high as we are the default for apps */
*priority = 1000; *priority = 1000;
*module = (mca_base_module_t *)&orte_dfs_app_module; *module = (mca_base_module_t *)&orte_dfs_app_module;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. * Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -89,7 +89,6 @@ static int rte_init(void)
char *envar, *ev1, *ev2; char *envar, *ev1, *ev2;
uint64_t unique_key[2]; uint64_t unique_key[2];
char *string_key; char *string_key;
char *rmluri;
opal_value_t *kv; opal_value_t *kv;
char *val; char *val;
int u32, *u32ptr; int u32, *u32ptr;
@ -399,19 +398,9 @@ static int rte_init(void)
orte_process_info.max_procs = orte_process_info.num_procs; orte_process_info.max_procs = orte_process_info.num_procs;
} }
/*** PUSH DATA FOR OTHERS TO FIND ***/ /* push our hostname so others can find us, if they need to - the
* native PMIx component will ignore this request as the hostname
/* push our RML URI in case others need to talk directly to us */ * is provided by the system */
rmluri = orte_rml.get_contact_info();
/* push it out for others to use */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "pmix put uri";
goto error;
}
free(rmluri);
/* push our hostname so others can find us, if they need to */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING); OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
if (ORTE_SUCCESS != ret) { if (ORTE_SUCCESS != ret) {
error = "db store hostname"; error = "db store hostname";

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2016 Research Organization for Information Science * Copyright (c) 2016 Research Organization for Information Science
@ -336,13 +336,6 @@ static int rte_init(void)
return rc; return rc;
} }
/* push our hostname so others can find us, if they need to */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "db store hostname";
goto error;
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
error: error:

Просмотреть файл

@ -1341,9 +1341,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, "1"); opal_argv_append(argc, argv, "1");
} }
if (orte_map_reduce) {
opal_argv_append(argc, argv, "--mapreduce");
}
if (orte_map_stddiag_to_stderr) { if (orte_map_stddiag_to_stderr) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr"); opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr");

Просмотреть файл

@ -1101,11 +1101,6 @@ static int setup_child(orte_job_t *jdata,
opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, &app->env); opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, &app->env);
} }
/* if we are using staged execution, tell it */
if (orte_staged_execution) {
opal_setenv("OMPI_MCA_orte_staged_execution", "1", true, &app->env);
}
/* if the proc isn't going to forward IO, then we need to flag that /* if the proc isn't going to forward IO, then we need to flag that
* it has "completed" iof termination as otherwise it will never fire * it has "completed" iof termination as otherwise it will never fire
*/ */

Просмотреть файл

@ -126,7 +126,6 @@ static struct {
int uri_pipe; int uri_pipe;
int singleton_died_pipe; int singleton_died_pipe;
bool abort; bool abort;
bool mapreduce;
bool tree_spawn; bool tree_spawn;
char *hnp_topo_sig; char *hnp_topo_sig;
bool test_suicide; bool test_suicide;
@ -217,10 +216,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
&orted_globals.hnp_topo_sig, OPAL_CMD_LINE_TYPE_STRING, &orted_globals.hnp_topo_sig, OPAL_CMD_LINE_TYPE_STRING,
"Topology signature of HNP" }, "Topology signature of HNP" },
{ NULL, '\0', "mapreduce", "mapreduce", 0,
&orted_globals.mapreduce, OPAL_CMD_LINE_TYPE_BOOL,
"Whether to report process bindings to stderr" },
/* End of list */ /* End of list */
{ NULL, '\0', NULL, NULL, 0, { NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@ -335,11 +330,6 @@ int orte_daemon(int argc, char *argv[])
free(tmp_env_var); free(tmp_env_var);
#endif #endif
/* if mapreduce set, flag it */
if (orted_globals.mapreduce) {
orte_map_reduce = true;
}
/* detach from controlling terminal /* detach from controlling terminal
* otherwise, remain attached so output can get to us * otherwise, remain attached so output can get to us
*/ */

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science * Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -137,6 +137,9 @@ static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jar
static int parse_appfile(orte_job_t *jdata, char *filename, char ***env); static int parse_appfile(orte_job_t *jdata, char *filename, char ***env);
static void orte_timeout_wakeup(int sd, short args, void *cbdata); static void orte_timeout_wakeup(int sd, short args, void *cbdata);
static void orte_profile_wakeup(int sd, short args, void *cbdata); static void orte_profile_wakeup(int sd, short args, void *cbdata);
static void profile_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata);
static void launch_recv(int status, orte_process_name_t* sender, static void launch_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata); orte_rml_tag_t tag, void *cbdata);
@ -896,20 +899,6 @@ int orte_submit_job(char *argv[], int *index,
} }
} }
/* check for debugger test envars and forward them if necessary */
if (NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
char *evar;
evar = getenv("ORTE_TEST_DEBUGGER_SLEEP");
for (i=0; i < (orte_app_idx_t)jdata->num_apps; i++) {
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
opal_setenv("ORTE_TEST_DEBUGGER_ATTACH", "1", true, &app->env);
if (NULL != evar) {
opal_setenv("ORTE_TEST_DEBUGGER_SLEEP", evar, true, &app->env);
}
}
}
}
/* check for suicide test directives */ /* check for suicide test directives */
if (NULL != getenv("ORTE_TEST_HNP_SUICIDE") || if (NULL != getenv("ORTE_TEST_HNP_SUICIDE") ||
NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) { NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) {
@ -956,6 +945,8 @@ int orte_submit_job(char *argv[], int *index,
ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE); ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE);
//goto DONE; //goto DONE;
} }
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
ORTE_RML_PERSISTENT, profile_recv, NULL);
orte_memprofile_timeout->tv.tv_sec = timeout_seconds; orte_memprofile_timeout->tv.tv_sec = timeout_seconds;
orte_memprofile_timeout->tv.tv_usec = 0; orte_memprofile_timeout->tv.tv_usec = 0;
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev, opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
@ -2212,10 +2203,9 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
static bool mpir_breakpoint_fired = false; static bool mpir_breakpoint_fired = false;
static void _send_notification(void) static void _send_notification(int status)
{ {
opal_buffer_t buf; opal_buffer_t buf;
int status = OPAL_ERR_DEBUGGER_RELEASE;
orte_grpcomm_signature_t sig; orte_grpcomm_signature_t sig;
int rc; int rc;
opal_value_t kv, *kvptr; opal_value_t kv, *kvptr;
@ -2448,7 +2438,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
"%s NOTIFYING DEBUGGER RELEASE", "%s NOTIFYING DEBUGGER RELEASE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* notify all procs that the debugger is ready */ /* notify all procs that the debugger is ready */
_send_notification(); _send_notification(OPAL_ERR_DEBUGGER_RELEASE);
} }
} }
return; return;
@ -2547,7 +2537,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
"%s NOTIFYING DEBUGGER RELEASE", "%s NOTIFYING DEBUGGER RELEASE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* notify all procs that the debugger is ready */ /* notify all procs that the debugger is ready */
_send_notification(); _send_notification(OPAL_ERR_DEBUGGER_RELEASE);
} else if (!orte_debugger_test_attach) { } else if (!orte_debugger_test_attach) {
/* if I am launching debugger daemons, then I need to do so now /* if I am launching debugger daemons, then I need to do so now
* that the job has been started and I know which nodes have * that the job has been started and I know which nodes have
@ -3133,6 +3123,16 @@ void orte_timeout_wakeup(int sd, short args, void *cbdata)
static int nreports = 0; static int nreports = 0;
static orte_timer_t profile_timer; static orte_timer_t profile_timer;
static int nchecks = 0;
static void profile_timeout(int sd, short args, void *cbdata)
{
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
}
static void profile_recv(int status, orte_process_name_t* sender, static void profile_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag, opal_buffer_t *buffer, orte_rml_tag_t tag,
@ -3167,26 +3167,33 @@ static void profile_recv(int status, orte_process_name_t* sender,
done: done:
--nreports; --nreports;
if (nreports == 0) { if (nreports == 0) {
++nchecks;
/* cancel the timeout */ /* cancel the timeout */
OBJ_DESTRUCT(&profile_timer); OBJ_DESTRUCT(&profile_timer);
/* abort the job */ /* notify to release */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE); _send_notification(12345);
/* set the global abnormal exit flag */ /* if this was the first measurement, then we need to
orte_abnormal_term_ordered = true; * let the probe move along */
if (2 > nchecks) {
/* reset the event */
opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
orte_profile_wakeup, NULL);
opal_event_set_priority(orte_memprofile_timeout->ev, ORTE_ERROR_PRI);
opal_event_evtimer_add(orte_memprofile_timeout->ev, &orte_memprofile_timeout->tv);
/* reset the timer */
OBJ_CONSTRUCT(&profile_timer, orte_timer_t);
opal_event_evtimer_set(orte_event_base,
profile_timer.ev, profile_timeout, NULL);
opal_event_set_priority(profile_timer.ev, ORTE_ERROR_PRI);
profile_timer.tv.tv_sec = 30;
opal_event_evtimer_add(profile_timer.ev, &profile_timer.tv);
return;
}
} }
} }
static void profile_timeout(int sd, short args, void *cbdata)
{
/* abort the job */
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
/* set the global abnormal exit flag */
orte_abnormal_term_ordered = true;
}
void orte_profile_wakeup(int sd, short args, void *cbdata) void orte_profile_wakeup(int sd, short args, void *cbdata)
{ {
orte_job_t *jdata = (orte_job_t*)cbdata;
orte_job_t *dmns; orte_job_t *dmns;
orte_proc_t *dmn; orte_proc_t *dmn;
int i; int i;
@ -3202,8 +3209,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
/* set the recv */ /* set the recv */
nreports = 1; // always get a report from ourselves nreports = 1; // always get a report from ourselves
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
ORTE_RML_PERSISTENT, profile_recv, NULL);
/* setup the buffer */ /* setup the buffer */
buffer = OBJ_NEW(opal_buffer_t); buffer = OBJ_NEW(opal_buffer_t);
@ -3213,13 +3218,6 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
OBJ_RELEASE(buffer); OBJ_RELEASE(buffer);
goto giveup; goto giveup;
} }
/* pack the jobid in question */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &jdata->jobid, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
goto giveup;
}
/* goes to just the first daemon beyond ourselves - no need to get it from everyone */ /* goes to just the first daemon beyond ourselves - no need to get it from everyone */
dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (NULL != (dmn = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, 1))) { if (NULL != (dmn = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, 1))) {

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc. * Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved. * All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
@ -35,6 +35,7 @@
#include "opal/util/argv.h" #include "opal/util/argv.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/dss/dss.h" #include "opal/dss/dss.h"
#include "opal/mca/pstat/pstat.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/rmaps/rmaps_types.h"
@ -449,13 +450,18 @@ static void _query(int sd, short args, void *cbdata)
opal_pmix_query_t *q; opal_pmix_query_t *q;
opal_value_t *kv; opal_value_t *kv;
orte_job_t *jdata; orte_job_t *jdata;
int rc; orte_proc_t *proct;
opal_list_t *results; int rc, i, num_replies;
opal_list_t *results, targets, *array;
size_t n; size_t n;
uint32_t key; uint32_t key;
void *nptr; void *nptr;
char **nspaces=NULL, nspace[512]; char **nspaces=NULL, nspace[512];
char **ans = NULL; char **ans = NULL;
bool local_only;
orte_namelist_t *nm;
opal_pstats_t pstat;
float pss;
opal_output_verbose(2, orte_pmix_server_globals.output, opal_output_verbose(2, orte_pmix_server_globals.output,
"%s processing query", "%s processing query",
@ -522,6 +528,84 @@ static void _query(int sd, short args, void *cbdata)
opal_list_append(results, &kv->super); opal_list_append(results, &kv->super);
opal_argv_free(ans); opal_argv_free(ans);
ans = NULL; ans = NULL;
} else if (0 == strcmp(q->keys[n], OPAL_PMIX_QUERY_MEMORY_USAGE)) {
/* check the qualifiers to find the procs they want to
* know about - if qualifiers are NULL, then get it for
* the daemons + all active jobs */
if (0 == opal_list_get_size(&q->qualifiers)) {
/* create a request tracker */
/* xcast a request for all memory usage */
/* return success - the callback will be done
* once we get the results */
return;
}
/* scan the qualifiers */
OPAL_LIST_FOREACH(kv, &q->qualifiers, opal_value_t) {
if (0 == strcmp(kv->key, OPAL_PMIX_QUERY_LOCAL_ONLY)) {
if (OPAL_UNDEF == kv->type || kv->data.flag) {
local_only = true;
} else {
local_only = false;
}
} else if (0 == strcmp(kv->key, OPAL_PMIX_PROCID)) {
/* save this directive on our list of targets */
nm = OBJ_NEW(orte_namelist_t);
}
}
/* if they have asked for only our local procs or daemon,
* then we can just get the data directly */
if (local_only) {
if (0 == opal_list_get_size(&targets)) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_QUERY_MEMORY_USAGE);
kv->type = OPAL_PTR;
array = OBJ_NEW(opal_list_t);
kv->data.ptr = array;
opal_list_append(results, &kv->super);
/* collect my memory usage */
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
opal_pstat.query(orte_process_info.pid, &pstat, NULL);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_DAEMON_MEMORY);
kv->type = OPAL_FLOAT;
kv->data.fval = pstat.pss;
opal_list_append(array, &kv->super);
OBJ_DESTRUCT(&pstat);
/* collect the memory usage of all my children */
pss = 0.0;
num_replies = 0;
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
/* collect the stats on this proc */
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
if (OPAL_SUCCESS == opal_pstat.query(proct->pid, &pstat, NULL)) {
pss += pstat.pss;
++num_replies;
}
OBJ_DESTRUCT(&pstat);
}
}
/* compute the average value */
if (0 < num_replies) {
pss /= (float)num_replies;
}
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_CLIENT_AVG_MEMORY);
kv->type = OPAL_FLOAT;
kv->data.fval = pss;
opal_list_append(array, &kv->super);
} else {
}
} else {
/* if they want it for remote procs, see who is hosting them
* and ask directly for the info - if rank=wildcard, then
* we need to xcast the request and collect the results */
}
} }
} }
} }

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc. * Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved. * All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science * Copyright (c) 2014-2016 Research Organization for Information Science
@ -407,6 +407,14 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
kv->type = OPAL_UINT32; kv->type = OPAL_UINT32;
kv->data.uint32 = pptr->node->index; kv->data.uint32 = pptr->node->index;
opal_list_append(pmap, &kv->super); opal_list_append(pmap, &kv->super);
if (map->num_nodes < orte_hostname_cutoff) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_HOSTNAME);
kv->type = OPAL_STRING;
kv->data.string = strdup(pptr->node->name);
opal_list_append(pmap, &kv->super);
}
} }
} }

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -86,6 +86,7 @@ bool orte_have_fqdn_allocation = false;
bool orte_show_resolved_nodenames = false; bool orte_show_resolved_nodenames = false;
bool orte_retain_aliases = false; bool orte_retain_aliases = false;
int orte_use_hostname_alias = -1; int orte_use_hostname_alias = -1;
int orte_hostname_cutoff = 1000;
int orted_debug_failure = -1; int orted_debug_failure = -1;
int orted_debug_failure_delay = -1; int orted_debug_failure_delay = -1;
@ -183,10 +184,6 @@ int orte_stat_history_size = -1;
/* envars to forward */ /* envars to forward */
char **orte_forwarded_envars = NULL; char **orte_forwarded_envars = NULL;
/* map-reduce mode */
bool orte_map_reduce = false;
bool orte_staged_execution = false;
/* map stddiag output to stderr so it isn't forwarded to mpirun */ /* map stddiag output to stderr so it isn't forwarded to mpirun */
bool orte_map_stddiag_to_stderr = false; bool orte_map_stddiag_to_stderr = false;
@ -196,9 +193,6 @@ int orte_max_vm_size = -1;
/* user debugger */ /* user debugger */
char *orte_base_user_debugger = NULL; char *orte_base_user_debugger = NULL;
/* modex cutoff */
uint32_t orte_direct_modex_cutoff = UINT32_MAX;
int orte_debug_output = -1; int orte_debug_output = -1;
bool orte_debug_daemons_flag = false; bool orte_debug_daemons_flag = false;
bool orte_xml_output = false; bool orte_xml_output = false;

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -466,6 +466,7 @@ ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames; ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
ORTE_DECLSPEC extern bool orte_retain_aliases; ORTE_DECLSPEC extern bool orte_retain_aliases;
ORTE_DECLSPEC extern int orte_use_hostname_alias; ORTE_DECLSPEC extern int orte_use_hostname_alias;
ORTE_DECLSPEC extern int orte_hostname_cutoff;
/* debug flags */ /* debug flags */
ORTE_DECLSPEC extern int orted_debug_failure; ORTE_DECLSPEC extern int orted_debug_failure;
@ -564,10 +565,6 @@ ORTE_DECLSPEC extern int orte_stat_history_size;
/* envars to forward */ /* envars to forward */
ORTE_DECLSPEC extern char **orte_forwarded_envars; ORTE_DECLSPEC extern char **orte_forwarded_envars;
/* map-reduce mode */
ORTE_DECLSPEC extern bool orte_map_reduce;
ORTE_DECLSPEC extern bool orte_staged_execution;
/* map stddiag output to stderr so it isn't forwarded to mpirun */ /* map stddiag output to stderr so it isn't forwarded to mpirun */
ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr; ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr;
@ -582,9 +579,6 @@ ORTE_DECLSPEC extern char *orte_base_user_debugger;
*/ */
ORTE_DECLSPEC extern char *orte_daemon_cores; ORTE_DECLSPEC extern char *orte_daemon_cores;
/* cutoff for collective modex */
ORTE_DECLSPEC extern uint32_t orte_direct_modex_cutoff;
END_C_DECLS END_C_DECLS
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */ #endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. * Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved * All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -429,6 +429,13 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_retain_aliases); &orte_retain_aliases);
orte_hostname_cutoff = 1000;
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_cutoff",
"Pass hostnames to all procs when #nodes is less than cutoff [default:1000]",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY,
&orte_hostname_cutoff);
/* which alias to use in MPIR_proctab */ /* which alias to use in MPIR_proctab */
orte_use_hostname_alias = 1; orte_use_hostname_alias = 1;
(void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index", (void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index",
@ -659,13 +666,6 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_allowed_exit_without_sync); &orte_allowed_exit_without_sync);
orte_staged_execution = false;
(void) mca_base_var_register ("orte", "orte", NULL, "staged_execution",
"Staged execution is being used",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_staged_execution);
orte_report_child_jobs_separately = false; orte_report_child_jobs_separately = false;
(void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately", (void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately",
"Return the exit status of the primary job only", "Return the exit status of the primary job only",
@ -754,17 +754,6 @@ int orte_register_params(void)
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
&orte_daemon_cores); &orte_daemon_cores);
/* cutoff for full modex */
orte_direct_modex_cutoff = UINT32_MAX;
id = mca_base_var_register ("orte", "orte", NULL, "direct_modex_cutoff",
"If the number of processes in the application exceeds the provided value,"
"modex will be done upon demand [default: UINT32_MAX]",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_direct_modex_cutoff);
/* register a synonym for old name */
mca_base_var_register_synonym (id, "ompi", "ompi", "hostname", "cutoff", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
/* get the conduit params */ /* get the conduit params */
orte_coll_transport = "fabric,ethernet"; orte_coll_transport = "fabric,ethernet";
(void) mca_base_var_register("orte", "orte", "coll", "transports", (void) mca_base_var_register("orte", "orte", "coll", "transports",