Update to official PMIx 1.1.4rc1
Этот коммит содержится в:
родитель
fde5183f90
Коммит
8ff114e668
@ -23,6 +23,27 @@ current release as well as the "stable" bug fix release branch.
|
||||
Master (not on release branches yet)
|
||||
------------------------------------
|
||||
|
||||
1.1.4
|
||||
-----
|
||||
- Properly increment the reference count for PMIx_Init
|
||||
- Fix examples so all run properly
|
||||
- Fix/complete PMI2 backward compatibility support to handle
|
||||
keys that are not associated with a specific rank
|
||||
- Do a better job of hiding non-API symbols
|
||||
- Correct handling of semi-colon terminations on macros.
|
||||
Thanks to Ashley Pittman for the patch
|
||||
- Add more man pages
|
||||
- Improve error checking and messages for connection
|
||||
attempts from client to server
|
||||
- If the tmpdir name is too long, provide an appropriate
|
||||
help message to the user (particularly relevant on
|
||||
Mac OSX). Thanks to Rainer Keller for the patch.
|
||||
- Fix some C++ compatibility issues
|
||||
- Fix/complete PMI-1 backward compatibility support
|
||||
- Do not install internal headers unless specifically
|
||||
requested to do so
|
||||
- Add support for multiple calls to Put/Commit
|
||||
|
||||
|
||||
1.1.3
|
||||
-----
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
major=1
|
||||
minor=1
|
||||
release=3
|
||||
release=4
|
||||
|
||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||
# it will be appended to the version number. It does not have to be
|
||||
@ -30,7 +30,7 @@ greek=rc1
|
||||
# command, or with the date (if "git describe" fails) in the form of
|
||||
# "date<date>".
|
||||
|
||||
repo_rev=git17fb9c5
|
||||
repo_rev=gitb363c5d
|
||||
|
||||
# If tarball_version is not empty, it is used as the version string in
|
||||
# the tarball filename, regardless of all other versions listed in
|
||||
@ -44,7 +44,7 @@ tarball_version=
|
||||
|
||||
# The date when this release was created
|
||||
|
||||
date="Apr 14, 2016"
|
||||
date="Apr 15, 2016"
|
||||
|
||||
# The shared library version of each of PMIx's public libraries.
|
||||
# These versions are maintained in accordance with the "Library
|
||||
@ -75,4 +75,4 @@ date="Apr 14, 2016"
|
||||
# Version numbers are described in the Libtool current:revision:age
|
||||
# format.
|
||||
|
||||
libpmix_so_version=2:2:0
|
||||
libpmix_so_version=2:3:0
|
||||
|
@ -702,10 +702,6 @@ PMIX_EXPORT pmix_status_t PMIx_Commit(void)
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* if we are a server, or we aren't connected, don't attempt to send */
|
||||
if (pmix_globals.server) {
|
||||
return PMIX_SUCCESS; // not an error
|
||||
@ -788,10 +784,6 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* create a callback object */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
@ -851,10 +843,6 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* create a callback object */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
@ -1213,10 +1201,17 @@ void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr)
|
||||
"connection to server aborted by OS - retrying");
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
continue;
|
||||
} else {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"Connect failed: %s (%d)", strerror(pmix_socket_errno),
|
||||
pmix_socket_errno);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
/* otherwise, the connect succeeded - so break out of the loop */
|
||||
break;
|
||||
}
|
||||
/* otherwise, the connect succeeded - so break out of the loop */
|
||||
break;
|
||||
}
|
||||
|
||||
if (retries == PMIX_MAX_RETRIES || sd < 0){
|
||||
@ -1410,7 +1405,8 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
PMIX_RELEASE(msg);
|
||||
pmix_remove_errhandler(errhandler_ref);
|
||||
cbfunc(PMIX_ERR_PACK_FAILURE, cbdata);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
@ -1421,9 +1417,9 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb);
|
||||
}
|
||||
} else {
|
||||
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
|
||||
}
|
||||
else
|
||||
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
|
||||
}
|
||||
|
||||
static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
|
@ -31,10 +31,6 @@ PMIX_EXPORT void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
pmix_errhandler_reg_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* common err handler registration */
|
||||
if (pmix_globals.server) {
|
||||
/* PMIX server: store the error handler, process info keys and call
|
||||
@ -61,10 +57,6 @@ PMIX_EXPORT void PMIx_Deregister_errhandler(int errhandler_ref,
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* common err handler registration */
|
||||
if (pmix_globals.server) {
|
||||
/* PMIX server: store the error handler, process info keys and call
|
||||
@ -90,10 +82,6 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_error(pmix_status_t status,
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
if (pmix_globals.server) {
|
||||
rc = pmix_server_notify_error(status, procs, nprocs, error_procs,
|
||||
error_nprocs, info, ninfo,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -144,6 +144,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||
int debug_level;
|
||||
char *tdir, *evar;
|
||||
pid_t pid;
|
||||
char * pmix_pid;
|
||||
|
||||
/* initialize the output system */
|
||||
if (!pmix_output_init()) {
|
||||
@ -217,7 +218,16 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||
/* now set the address - we use the pid here to reduce collisions */
|
||||
memset(&myaddress, 0, sizeof(struct sockaddr_un));
|
||||
myaddress.sun_family = AF_UNIX;
|
||||
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid);
|
||||
asprintf(&pmix_pid, "pmix-%d", pid);
|
||||
// If the above set temporary directory name plus the pmix-PID string
|
||||
// plus the '/' separator are too long, just fail, so the caller
|
||||
// may provide the user with a proper help... *Cough*, *Cough* OSX...
|
||||
if ((strlen(tdir) + strlen(pmix_pid) + 1) > sizeof(myaddress.sun_path)-1) {
|
||||
free(pmix_pid);
|
||||
return PMIX_ERR_INVALID_LENGTH;
|
||||
}
|
||||
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/%s", tdir, pmix_pid);
|
||||
free(pmix_pid);
|
||||
asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path);
|
||||
|
||||
|
||||
@ -576,35 +586,17 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n
|
||||
static void _deregister_nspace(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
|
||||
pmix_nspace_t *nptr;
|
||||
int i;
|
||||
pmix_peer_t *peer;
|
||||
pmix_nspace_t *tmp;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix:server _deregister_nspace %s",
|
||||
cd->proc.nspace);
|
||||
|
||||
/* see if we already have this nspace */
|
||||
PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) {
|
||||
if (0 == strcmp(nptr->nspace, cd->proc.nspace)) {
|
||||
/* find and remove this client from our array of local
|
||||
* peers - remember that it can occur multiple times
|
||||
* if the peer called fork/exec and its children called
|
||||
* PMIx_Init! We have to rely on none of those children
|
||||
* living beyond our child as we otherwise cannot
|
||||
* track them */
|
||||
for (i=0; i < pmix_server_globals.clients.size; i++) {
|
||||
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
|
||||
continue;
|
||||
}
|
||||
if (nptr == peer->info->nptr) {
|
||||
/* remove this entry */
|
||||
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
|
||||
PMIX_RELEASE(peer);
|
||||
}
|
||||
}
|
||||
pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super);
|
||||
PMIX_RELEASE(nptr);
|
||||
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
|
||||
if (0 == strcmp(tmp->nspace, cd->proc.nspace)) {
|
||||
pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super);
|
||||
PMIX_RELEASE(tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -628,7 +620,7 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[])
|
||||
PMIX_THREADSHIFT(cd, _deregister_nspace);
|
||||
}
|
||||
|
||||
void pmix_server_execute_collective(int sd, short args, void *cbdata)
|
||||
static void _execute_collective(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata;
|
||||
pmix_server_trkr_t *trk = tcd->trk;
|
||||
@ -767,7 +759,7 @@ static void _register_client(int sd, short args, void *cbdata)
|
||||
* we don't want to block someone
|
||||
* here, so kick any completed trackers into a
|
||||
* new event for processing */
|
||||
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
|
||||
PMIX_EXECUTE_COLLECTIVE(tcd, trk, _execute_collective);
|
||||
}
|
||||
}
|
||||
/* also check any pending local modex requests to see if
|
||||
@ -813,8 +805,6 @@ static void _deregister_client(int sd, short args, void *cbdata)
|
||||
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
|
||||
pmix_rank_info_t *info;
|
||||
pmix_nspace_t *nptr, *tmp;
|
||||
int i;
|
||||
pmix_peer_t *peer;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix:server _deregister_client for nspace %s rank %d",
|
||||
@ -832,27 +822,7 @@ static void _deregister_client(int sd, short args, void *cbdata)
|
||||
/* nothing to do */
|
||||
goto cleanup;
|
||||
}
|
||||
/* find and remove this client from our array of local
|
||||
* peers - remember that it can occur multiple times
|
||||
* if the peer called fork/exec and its children called
|
||||
* PMIx_Init! We have to rely on none of those children
|
||||
* living beyond our child as we otherwise cannot
|
||||
* track them */
|
||||
for (i=0; i < pmix_server_globals.clients.size; i++) {
|
||||
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
|
||||
continue;
|
||||
}
|
||||
if (nptr != peer->info->nptr) {
|
||||
continue;
|
||||
}
|
||||
if (cd->proc.rank == peer->info->rank) {
|
||||
/* remove this entry */
|
||||
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
|
||||
PMIX_RELEASE(peer);
|
||||
}
|
||||
}
|
||||
|
||||
/* find and remove this client from its nspace */
|
||||
/* find an remove this client */
|
||||
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
|
||||
if (info->rank == cd->proc.rank) {
|
||||
pmix_list_remove_item(&nptr->server->ranks, &info->super);
|
||||
@ -2183,9 +2153,8 @@ static void deregevents_cbfunc (pmix_status_t status, void *cbdata)
|
||||
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server:deregevents_cbfunc called status = %d", status);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
@ -2289,7 +2258,6 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
|
||||
proc.rank = peer->info->rank;
|
||||
if (PMIX_SUCCESS != (rc = pmix_host_server.client_finalized(&proc, peer->info->server_object,
|
||||
op_cbfunc, cd))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -75,41 +75,41 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
|
||||
{
|
||||
int flags;
|
||||
pmix_status_t rc;
|
||||
unsigned int addrlen;
|
||||
socklen_t addrlen;
|
||||
char *ptr;
|
||||
|
||||
/* create a listen socket for incoming connection attempts */
|
||||
pmix_server_globals.listen_socket = socket(PF_UNIX, SOCK_STREAM, 0);
|
||||
if (pmix_server_globals.listen_socket < 0) {
|
||||
printf("%s:%d socket() failed", __FILE__, __LINE__);
|
||||
printf("%s:%d socket() failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
addrlen = sizeof(struct sockaddr_un);
|
||||
if (bind(pmix_server_globals.listen_socket, (struct sockaddr*)address, addrlen) < 0) {
|
||||
printf("%s:%d bind() failed", __FILE__, __LINE__);
|
||||
printf("%s:%d bind() failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
/* set the mode as required */
|
||||
if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) {
|
||||
pmix_output(0, "CANNOT CHMOD %s", address->sun_path);
|
||||
pmix_output(0, "CANNOT CHMOD %s\n", address->sun_path);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
/* setup listen backlog to maximum allowed by kernel */
|
||||
if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) {
|
||||
printf("%s:%d listen() failed", __FILE__, __LINE__);
|
||||
printf("%s:%d listen() failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
/* set socket up to be non-blocking, otherwise accept could block */
|
||||
if ((flags = fcntl(pmix_server_globals.listen_socket, F_GETFL, 0)) < 0) {
|
||||
printf("%s:%d fcntl(F_GETFL) failed", __FILE__, __LINE__);
|
||||
printf("%s:%d fcntl(F_GETFL) failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
flags |= O_NONBLOCK;
|
||||
if (fcntl(pmix_server_globals.listen_socket, F_SETFL, flags) < 0) {
|
||||
printf("%s:%d fcntl(F_SETFL) failed", __FILE__, __LINE__);
|
||||
printf("%s:%d fcntl(F_SETFL) failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
|
@ -234,8 +234,6 @@ void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
|
||||
|
||||
void regevents_cbfunc (pmix_status_t status, void *cbdata);
|
||||
|
||||
void pmix_server_execute_collective(int sd, short args, void *cbdata);
|
||||
|
||||
extern pmix_server_module_t pmix_host_server;
|
||||
extern pmix_server_globals_t pmix_server_globals;
|
||||
|
||||
|
@ -50,10 +50,6 @@ static uint32_t current_tag = 1; // 0 is reserved for system purposes
|
||||
|
||||
static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
|
||||
{
|
||||
pmix_server_trkr_t *trk;
|
||||
pmix_rank_info_t *rinfo, *rnext;
|
||||
pmix_trkr_caddy_t *tcd;
|
||||
|
||||
/* stop all events */
|
||||
if (peer->recv_ev_active) {
|
||||
event_del(&peer->recv_event);
|
||||
@ -69,42 +65,9 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
|
||||
}
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
if (pmix_globals.server) {
|
||||
/* if I am a server, then we need to ensure that
|
||||
* we properly account for the loss of this client
|
||||
* from any local collectives in which it was
|
||||
* participating - note that the proc would not
|
||||
* have been added to any collective tracker until
|
||||
* after it successfully connected */
|
||||
PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) {
|
||||
/* see if this proc is participating in this tracker */
|
||||
PMIX_LIST_FOREACH_SAFE(rinfo, rnext, &trk->ranks, pmix_rank_info_t) {
|
||||
if (0 != strncmp(rinfo->nptr->nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN)) {
|
||||
continue;
|
||||
}
|
||||
if (rinfo->rank != peer->info->rank) {
|
||||
continue;
|
||||
}
|
||||
/* it is - adjust the count */
|
||||
--trk->nlocal;
|
||||
/* remove it from the list */
|
||||
pmix_list_remove_item(&trk->ranks, &rinfo->super);
|
||||
PMIX_RELEASE(rinfo);
|
||||
/* check for completion */
|
||||
if (pmix_list_get_size(&trk->local_cbs) == trk->nlocal) {
|
||||
/* complete, so now we need to process it
|
||||
* we don't want to block someone
|
||||
* here, so kick any completed trackers into a
|
||||
* new event for processing */
|
||||
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* remove this proc from the list of ranks for this nspace */
|
||||
pmix_list_remove_item(&(peer->info->nptr->server->ranks), &(peer->info->super));
|
||||
PMIX_RELEASE(peer->info);
|
||||
/* reduce the number of local procs */
|
||||
--peer->info->nptr->server->nlocalprocs;
|
||||
/* do some cleanup as the client has left us */
|
||||
/* if I am a server, then we need to
|
||||
* do some cleanup as the client has
|
||||
* left us */
|
||||
pmix_pointer_array_set_item(&pmix_server_globals.clients,
|
||||
peer->index, NULL);
|
||||
PMIX_RELEASE(peer);
|
||||
|
@ -195,18 +195,6 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!exact_match) {
|
||||
/* if no exact match was found, then we will fire the errhandler
|
||||
* for any matching info key. This may be too lax and need to be adjusted
|
||||
* later */
|
||||
for (k = 0; k < errreg->ninfo; k++) {
|
||||
if ((0 == strcmp(errreg->info[j].key, info[k].key)) &&
|
||||
(pmix_value_cmp(&errreg->info[j].value, &info[k].value))) {
|
||||
errreg->errhandler(status, procs, nprocs, iptr, ninfo+1);
|
||||
fired = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* if nothing fired and we found a general err handler, then fire it */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user