Roll to PMIx 1.1.4rc2 - replaces some code that was incorrectly removed in prior update
Этот коммит содержится в:
родитель
8cce6df688
Коммит
b009e58d25
@ -23,14 +23,14 @@ release=4
|
||||
# The only requirement is that it must be entirely printable ASCII
|
||||
# characters and have no white space.
|
||||
|
||||
greek=rc1
|
||||
greek=rc2
|
||||
|
||||
# If repo_rev is empty, then the repository version number will be
|
||||
# obtained during "make dist" via the "git describe --tags --always"
|
||||
# command, or with the date (if "git describe" fails) in the form of
|
||||
# "date<date>".
|
||||
|
||||
repo_rev=gitb363c5d
|
||||
repo_rev=gitd9fd3da
|
||||
|
||||
# If tarball_version is not empty, it is used as the version string in
|
||||
# the tarball filename, regardless of all other versions listed in
|
||||
@ -44,7 +44,7 @@ tarball_version=
|
||||
|
||||
# The date when this release was created
|
||||
|
||||
date="Apr 15, 2016"
|
||||
date="Apr 16, 2016"
|
||||
|
||||
# The shared library version of each of PMIx's public libraries.
|
||||
# These versions are maintained in accordance with the "Library
|
||||
|
@ -702,6 +702,10 @@ PMIX_EXPORT pmix_status_t PMIx_Commit(void)
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* if we are a server, or we aren't connected, don't attempt to send */
|
||||
if (pmix_globals.server) {
|
||||
return PMIX_SUCCESS; // not an error
|
||||
@ -784,6 +788,10 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* create a callback object */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
@ -843,6 +851,10 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* create a callback object */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
|
@ -31,6 +31,10 @@ PMIX_EXPORT void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
pmix_errhandler_reg_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* common err handler registration */
|
||||
if (pmix_globals.server) {
|
||||
/* PMIX server: store the error handler, process info keys and call
|
||||
@ -57,6 +61,10 @@ PMIX_EXPORT void PMIx_Deregister_errhandler(int errhandler_ref,
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* common err handler registration */
|
||||
if (pmix_globals.server) {
|
||||
/* PMIX server: store the error handler, process info keys and call
|
||||
@ -82,6 +90,10 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_error(pmix_status_t status,
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
if (pmix_globals.server) {
|
||||
rc = pmix_server_notify_error(status, procs, nprocs, error_procs,
|
||||
error_nprocs, info, ninfo,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -586,17 +586,35 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n
|
||||
static void _deregister_nspace(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
|
||||
pmix_nspace_t *tmp;
|
||||
pmix_nspace_t *nptr;
|
||||
int i;
|
||||
pmix_peer_t *peer;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix:server _deregister_nspace %s",
|
||||
cd->proc.nspace);
|
||||
|
||||
/* see if we already have this nspace */
|
||||
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
|
||||
if (0 == strcmp(tmp->nspace, cd->proc.nspace)) {
|
||||
pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super);
|
||||
PMIX_RELEASE(tmp);
|
||||
PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) {
|
||||
if (0 == strcmp(nptr->nspace, cd->proc.nspace)) {
|
||||
/* find and remove this client from our array of local
|
||||
* peers - remember that it can occur multiple times
|
||||
* if the peer called fork/exec and its children called
|
||||
* PMIx_Init! We have to rely on none of those children
|
||||
* living beyond our child as we otherwise cannot
|
||||
* track them */
|
||||
for (i=0; i < pmix_server_globals.clients.size; i++) {
|
||||
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
|
||||
continue;
|
||||
}
|
||||
if (nptr == peer->info->nptr) {
|
||||
/* remove this entry */
|
||||
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
|
||||
PMIX_RELEASE(peer);
|
||||
}
|
||||
}
|
||||
pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super);
|
||||
PMIX_RELEASE(nptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -620,8 +638,7 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[])
|
||||
PMIX_THREADSHIFT(cd, _deregister_nspace);
|
||||
}
|
||||
|
||||
static void _execute_collective(int sd, short args, void *cbdata)
|
||||
{
|
||||
void pmix_server_execute_collective(int sd, short args, void *cbdata) {
|
||||
pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata;
|
||||
pmix_server_trkr_t *trk = tcd->trk;
|
||||
char *data = NULL;
|
||||
@ -759,7 +776,7 @@ static void _register_client(int sd, short args, void *cbdata)
|
||||
* we don't want to block someone
|
||||
* here, so kick any completed trackers into a
|
||||
* new event for processing */
|
||||
PMIX_EXECUTE_COLLECTIVE(tcd, trk, _execute_collective);
|
||||
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
|
||||
}
|
||||
}
|
||||
/* also check any pending local modex requests to see if
|
||||
@ -803,8 +820,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc,
|
||||
static void _deregister_client(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
|
||||
pmix_rank_info_t *info;
|
||||
pmix_nspace_t *nptr, *tmp;
|
||||
int i;
|
||||
pmix_peer_t *peer;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix:server _deregister_client for nspace %s rank %d",
|
||||
@ -822,15 +840,27 @@ static void _deregister_client(int sd, short args, void *cbdata)
|
||||
/* nothing to do */
|
||||
goto cleanup;
|
||||
}
|
||||
/* find an remove this client */
|
||||
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
|
||||
if (info->rank == cd->proc.rank) {
|
||||
pmix_list_remove_item(&nptr->server->ranks, &info->super);
|
||||
PMIX_RELEASE(info);
|
||||
break;
|
||||
/* find and remove this client from our array of local
|
||||
* peers - remember that it can occur multiple times
|
||||
* if the peer called fork/exec and its children called
|
||||
* PMIx_Init! We have to rely on none of those children
|
||||
* living beyond our child as we otherwise cannot
|
||||
* track them */
|
||||
for (i=0; i < pmix_server_globals.clients.size; i++) {
|
||||
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
|
||||
continue;
|
||||
}
|
||||
if (nptr != peer->info->nptr) {
|
||||
continue;
|
||||
}
|
||||
if (cd->proc.rank == peer->info->rank) {
|
||||
/* remove this entry */
|
||||
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
|
||||
PMIX_RELEASE(peer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cleanup:
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
@ -2139,8 +2169,9 @@ void regevents_cbfunc (pmix_status_t status, void *cbdata)
|
||||
}
|
||||
}
|
||||
reply = PMIX_NEW(pmix_buffer_t);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
|
@ -91,7 +91,7 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
/* set the mode as required */
|
||||
if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) {
|
||||
if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH)) {
|
||||
pmix_output(0, "CANNOT CHMOD %s\n", address->sun_path);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
@ -234,6 +234,8 @@ void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
|
||||
|
||||
void regevents_cbfunc (pmix_status_t status, void *cbdata);
|
||||
|
||||
void pmix_server_execute_collective(int sd, short args, void *cbdata);
|
||||
|
||||
extern pmix_server_module_t pmix_host_server;
|
||||
extern pmix_server_globals_t pmix_server_globals;
|
||||
|
||||
|
@ -50,6 +50,10 @@ static uint32_t current_tag = 1; // 0 is reserved for system purposes
|
||||
|
||||
static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
|
||||
{
|
||||
pmix_server_trkr_t *trk;
|
||||
pmix_rank_info_t *rinfo, *rnext;
|
||||
pmix_trkr_caddy_t *tcd;
|
||||
|
||||
/* stop all events */
|
||||
if (peer->recv_ev_active) {
|
||||
event_del(&peer->recv_event);
|
||||
@ -65,9 +69,42 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
|
||||
}
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
if (pmix_globals.server) {
|
||||
/* if I am a server, then we need to
|
||||
* do some cleanup as the client has
|
||||
* left us */
|
||||
/* if I am a server, then we need to ensure that
|
||||
* we properly account for the loss of this client
|
||||
* from any local collectives in which it was
|
||||
* participating - note that the proc would not
|
||||
* have been added to any collective tracker until
|
||||
* after it successfully connected */
|
||||
PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) {
|
||||
/* see if this proc is participating in this tracker */
|
||||
PMIX_LIST_FOREACH_SAFE(rinfo, rnext, &trk->ranks, pmix_rank_info_t) {
|
||||
if (0 != strncmp(rinfo->nptr->nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN)) {
|
||||
continue;
|
||||
}
|
||||
if (rinfo->rank != peer->info->rank) {
|
||||
continue;
|
||||
}
|
||||
/* it is - adjust the count */
|
||||
--trk->nlocal;
|
||||
/* remove it from the list */
|
||||
pmix_list_remove_item(&trk->ranks, &rinfo->super);
|
||||
PMIX_RELEASE(rinfo);
|
||||
/* check for completion */
|
||||
if (pmix_list_get_size(&trk->local_cbs) == trk->nlocal) {
|
||||
/* complete, so now we need to process it
|
||||
* we don't want to block someone
|
||||
* here, so kick any completed trackers into a
|
||||
* new event for processing */
|
||||
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* remove this proc from the list of ranks for this nspace */
|
||||
pmix_list_remove_item(&(peer->info->nptr->server->ranks), &(peer->info->super));
|
||||
PMIX_RELEASE(peer->info);
|
||||
/* reduce the number of local procs */
|
||||
--peer->info->nptr->server->nlocalprocs;
|
||||
/* do some cleanup as the client has left us */
|
||||
pmix_pointer_array_set_item(&pmix_server_globals.clients,
|
||||
peer->index, NULL);
|
||||
PMIX_RELEASE(peer);
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -157,9 +157,8 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
/* We need to parse thru each registered handler and determine
|
||||
* which one to call for the specific error */
|
||||
int i, idflt;
|
||||
size_t j, k;
|
||||
size_t j;
|
||||
bool fired = false;
|
||||
bool exact_match;
|
||||
pmix_error_reg_info_t *errreg, *errdflt=NULL;
|
||||
pmix_info_t *iptr;
|
||||
|
||||
@ -184,14 +183,12 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
}
|
||||
iptr[0].value.data.integer = i;
|
||||
/* match error name key first */
|
||||
exact_match = false;
|
||||
for (j = 0; j < errreg->ninfo; j++) {
|
||||
if ((0 == strcmp(errreg->info[j].key, PMIX_ERROR_NAME)) &&
|
||||
(status == errreg->info[j].value.data.int32)) {
|
||||
iptr[0].value.data.integer = i;
|
||||
errreg->errhandler(status, procs, nprocs, iptr, ninfo+1);
|
||||
fired = true;
|
||||
exact_match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -305,7 +305,7 @@ static int test_item5(void)
|
||||
const char **ptr = tkeys;
|
||||
|
||||
if (_legacy || !_legacy) {
|
||||
log_error("PMIx and SLURM/PMI1 do not set 'PMI_process_mapping' (Do not mark test as failed)\n");
|
||||
log_error("PMIx and SLURM/PMI1 do not set 'PMI_process_mapping' %s\n", "(Do not mark test as failed)");
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user