1
1

Roll to PMIx 1.1.4rc2 - replaces some code that was incorrectly removed in prior update

Этот коммит содержится в:
Ralph Castain 2016-04-16 18:23:41 -07:00
родитель 8cce6df688
Коммит b009e58d25
9 изменённых файлов: 122 добавлений и 31 удалений

Просмотреть файл

@ -23,14 +23,14 @@ release=4
# The only requirement is that it must be entirely printable ASCII
# characters and have no white space.
greek=rc1
greek=rc2
# If repo_rev is empty, then the repository version number will be
# obtained during "make dist" via the "git describe --tags --always"
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=gitb363c5d
repo_rev=gitd9fd3da
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Apr 15, 2016"
date="Apr 16, 2016"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library

Просмотреть файл

@ -702,6 +702,10 @@ PMIX_EXPORT pmix_status_t PMIx_Commit(void)
pmix_cb_t *cb;
pmix_status_t rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* if we are a server, or we aren't connected, don't attempt to send */
if (pmix_globals.server) {
return PMIX_SUCCESS; // not an error
@ -784,6 +788,10 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
pmix_cb_t *cb;
pmix_status_t rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
@ -843,6 +851,10 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist
pmix_cb_t *cb;
pmix_status_t rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;

Просмотреть файл

@ -31,6 +31,10 @@ PMIX_EXPORT void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo,
pmix_errhandler_reg_cbfunc_t cbfunc,
void *cbdata)
{
if (pmix_globals.init_cntr <= 0) {
return;
}
/* common err handler registration */
if (pmix_globals.server) {
/* PMIX server: store the error handler, process info keys and call
@ -57,6 +61,10 @@ PMIX_EXPORT void PMIx_Deregister_errhandler(int errhandler_ref,
pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
if (pmix_globals.init_cntr <= 0) {
return;
}
/* common err handler registration */
if (pmix_globals.server) {
/* PMIX server: store the error handler, process info keys and call
@ -82,6 +90,10 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_error(pmix_status_t status,
{
int rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
if (pmix_globals.server) {
rc = pmix_server_notify_error(status, procs, nprocs, error_procs,
error_nprocs, info, ninfo,

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
@ -586,17 +586,35 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n
static void _deregister_nspace(int sd, short args, void *cbdata)
{
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_nspace_t *tmp;
pmix_nspace_t *nptr;
int i;
pmix_peer_t *peer;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server _deregister_nspace %s",
cd->proc.nspace);
/* see if we already have this nspace */
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, cd->proc.nspace)) {
pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super);
PMIX_RELEASE(tmp);
PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(nptr->nspace, cd->proc.nspace)) {
/* find and remove this client from our array of local
* peers - remember that it can occur multiple times
* if the peer called fork/exec and its children called
* PMIx_Init! We have to rely on none of those children
* living beyond our child as we otherwise cannot
* track them */
for (i=0; i < pmix_server_globals.clients.size; i++) {
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
continue;
}
if (nptr == peer->info->nptr) {
/* remove this entry */
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
PMIX_RELEASE(peer);
}
}
pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super);
PMIX_RELEASE(nptr);
break;
}
}
@ -620,8 +638,7 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[])
PMIX_THREADSHIFT(cd, _deregister_nspace);
}
static void _execute_collective(int sd, short args, void *cbdata)
{
void pmix_server_execute_collective(int sd, short args, void *cbdata) {
pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata;
pmix_server_trkr_t *trk = tcd->trk;
char *data = NULL;
@ -759,7 +776,7 @@ static void _register_client(int sd, short args, void *cbdata)
* we don't want to block someone
* here, so kick any completed trackers into a
* new event for processing */
PMIX_EXECUTE_COLLECTIVE(tcd, trk, _execute_collective);
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
}
}
/* also check any pending local modex requests to see if
@ -803,8 +820,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc,
static void _deregister_client(int sd, short args, void *cbdata)
{
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_rank_info_t *info;
pmix_nspace_t *nptr, *tmp;
int i;
pmix_peer_t *peer;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server _deregister_client for nspace %s rank %d",
@ -822,15 +840,27 @@ static void _deregister_client(int sd, short args, void *cbdata)
/* nothing to do */
goto cleanup;
}
/* find an remove this client */
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
if (info->rank == cd->proc.rank) {
pmix_list_remove_item(&nptr->server->ranks, &info->super);
PMIX_RELEASE(info);
break;
/* find and remove this client from our array of local
* peers - remember that it can occur multiple times
* if the peer called fork/exec and its children called
* PMIx_Init! We have to rely on none of those children
* living beyond our child as we otherwise cannot
* track them */
for (i=0; i < pmix_server_globals.clients.size; i++) {
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
continue;
}
if (nptr != peer->info->nptr) {
continue;
}
if (cd->proc.rank == peer->info->rank) {
/* remove this entry */
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
PMIX_RELEASE(peer);
}
}
cleanup:
PMIX_RELEASE(cd);
}
@ -2139,8 +2169,9 @@ void regevents_cbfunc (pmix_status_t status, void *cbdata)
}
}
reply = PMIX_NEW(pmix_buffer_t);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
PMIX_ERROR_LOG(rc);
}
// send reply
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
PMIX_RELEASE(cd);

Просмотреть файл

@ -91,7 +91,7 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
return PMIX_ERROR;
}
/* set the mode as required */
if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) {
if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH)) {
pmix_output(0, "CANNOT CHMOD %s\n", address->sun_path);
return PMIX_ERROR;
}

Просмотреть файл

@ -234,6 +234,8 @@ void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
void regevents_cbfunc (pmix_status_t status, void *cbdata);
void pmix_server_execute_collective(int sd, short args, void *cbdata);
extern pmix_server_module_t pmix_host_server;
extern pmix_server_globals_t pmix_server_globals;

Просмотреть файл

@ -50,6 +50,10 @@ static uint32_t current_tag = 1; // 0 is reserved for system purposes
static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
{
pmix_server_trkr_t *trk;
pmix_rank_info_t *rinfo, *rnext;
pmix_trkr_caddy_t *tcd;
/* stop all events */
if (peer->recv_ev_active) {
event_del(&peer->recv_event);
@ -65,9 +69,42 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
}
CLOSE_THE_SOCKET(peer->sd);
if (pmix_globals.server) {
/* if I am a server, then we need to
* do some cleanup as the client has
* left us */
/* if I am a server, then we need to ensure that
* we properly account for the loss of this client
* from any local collectives in which it was
* participating - note that the proc would not
* have been added to any collective tracker until
* after it successfully connected */
PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) {
/* see if this proc is participating in this tracker */
PMIX_LIST_FOREACH_SAFE(rinfo, rnext, &trk->ranks, pmix_rank_info_t) {
if (0 != strncmp(rinfo->nptr->nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN)) {
continue;
}
if (rinfo->rank != peer->info->rank) {
continue;
}
/* it is - adjust the count */
--trk->nlocal;
/* remove it from the list */
pmix_list_remove_item(&trk->ranks, &rinfo->super);
PMIX_RELEASE(rinfo);
/* check for completion */
if (pmix_list_get_size(&trk->local_cbs) == trk->nlocal) {
/* complete, so now we need to process it
* we don't want to block someone
* here, so kick any completed trackers into a
* new event for processing */
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
}
}
}
/* remove this proc from the list of ranks for this nspace */
pmix_list_remove_item(&(peer->info->nptr->server->ranks), &(peer->info->super));
PMIX_RELEASE(peer->info);
/* reduce the number of local procs */
--peer->info->nptr->server->nlocalprocs;
/* do some cleanup as the client has left us */
pmix_pointer_array_set_item(&pmix_server_globals.clients,
peer->index, NULL);
PMIX_RELEASE(peer);

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -157,9 +157,8 @@ void pmix_errhandler_invoke(pmix_status_t status,
/* We need to parse thru each registered handler and determine
* which one to call for the specific error */
int i, idflt;
size_t j, k;
size_t j;
bool fired = false;
bool exact_match;
pmix_error_reg_info_t *errreg, *errdflt=NULL;
pmix_info_t *iptr;
@ -184,14 +183,12 @@ void pmix_errhandler_invoke(pmix_status_t status,
}
iptr[0].value.data.integer = i;
/* match error name key first */
exact_match = false;
for (j = 0; j < errreg->ninfo; j++) {
if ((0 == strcmp(errreg->info[j].key, PMIX_ERROR_NAME)) &&
(status == errreg->info[j].value.data.int32)) {
iptr[0].value.data.integer = i;
errreg->errhandler(status, procs, nprocs, iptr, ninfo+1);
fired = true;
exact_match = true;
break;
}
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
@ -305,7 +305,7 @@ static int test_item5(void)
const char **ptr = tkeys;
if (_legacy || !_legacy) {
log_error("PMIx and SLURM/PMI1 do not set 'PMI_process_mapping' (Do not mark test as failed)\n");
log_error("PMIx and SLURM/PMI1 do not set 'PMI_process_mapping' %s\n", "(Do not mark test as failed)");
return rc;
}