1
1

Update to official PMIx 1.1.4rc1

Этот коммит содержится в:
Ralph Castain 2016-04-15 18:40:25 -07:00
родитель fde5183f90
Коммит 8ff114e668
9 изменённых файлов: 70 добавлений и 148 удалений

Просмотреть файл

@ -23,6 +23,27 @@ current release as well as the "stable" bug fix release branch.
Master (not on release branches yet)
------------------------------------
1.1.4
-----
- Properly increment the reference count for PMIx_Init
- Fix examples so all run properly
- Fix/complete PMI2 backward compatibility support to handle
keys that are not associated with a specific rank
- Do a better job of hiding non-API symbols
- Correct handling of semi-colon terminations on macros.
Thanks to Ashley Pittman for the patch
- Add more man pages
- Improve error checking and messages for connection
attempts from client to server
- If the tmpdir name is too long, provide an appropriate
help message to the user (particularly relevant on
Mac OSX). Thanks to Rainer Keller for the patch.
- Fix some C++ compatibility issues
- Fix/complete PMI-1 backward compatibility support
- Do not install internal headers unless specifically
requested to do so
- Add support for multiple calls to Put/Commit
1.1.3
-----

Просмотреть файл

@ -15,7 +15,7 @@
major=1
minor=1
release=3
release=4
# greek is used for alpha or beta release tags. If it is non-empty,
# it will be appended to the version number. It does not have to be
@ -30,7 +30,7 @@ greek=rc1
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=git17fb9c5
repo_rev=gitb363c5d
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Apr 14, 2016"
date="Apr 15, 2016"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library
@ -75,4 +75,4 @@ date="Apr 14, 2016"
# Version numbers are described in the Libtool current:revision:age
# format.
libpmix_so_version=2:2:0
libpmix_so_version=2:3:0

Просмотреть файл

@ -702,10 +702,6 @@ PMIX_EXPORT pmix_status_t PMIx_Commit(void)
pmix_cb_t *cb;
pmix_status_t rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* if we are a server, or we aren't connected, don't attempt to send */
if (pmix_globals.server) {
return PMIX_SUCCESS; // not an error
@ -788,10 +784,6 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
pmix_cb_t *cb;
pmix_status_t rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
@ -851,10 +843,6 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist
pmix_cb_t *cb;
pmix_status_t rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
@ -1213,10 +1201,17 @@ void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr)
"connection to server aborted by OS - retrying");
CLOSE_THE_SOCKET(sd);
continue;
} else {
pmix_output_verbose(2, pmix_globals.debug_output,
"Connect failed: %s (%d)", strerror(pmix_socket_errno),
pmix_socket_errno);
CLOSE_THE_SOCKET(sd);
continue;
}
} else {
/* otherwise, the connect succeeded - so break out of the loop */
break;
}
/* otherwise, the connect succeeded - so break out of the loop */
break;
}
if (retries == PMIX_MAX_RETRIES || sd < 0){
@ -1410,7 +1405,8 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
PMIX_RELEASE(msg);
pmix_remove_errhandler(errhandler_ref);
cbfunc(PMIX_ERR_PACK_FAILURE, cbdata);
} else {
}
else {
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
* the server acks/nacks the register events request*/
@ -1421,9 +1417,9 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
/* push the message into our event base to send to the server */
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb);
}
} else {
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
}
else
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
}
static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,

Просмотреть файл

@ -31,10 +31,6 @@ PMIX_EXPORT void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo,
pmix_errhandler_reg_cbfunc_t cbfunc,
void *cbdata)
{
if (pmix_globals.init_cntr <= 0) {
return;
}
/* common err handler registration */
if (pmix_globals.server) {
/* PMIX server: store the error handler, process info keys and call
@ -61,10 +57,6 @@ PMIX_EXPORT void PMIx_Deregister_errhandler(int errhandler_ref,
pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
if (pmix_globals.init_cntr <= 0) {
return;
}
/* common err handler registration */
if (pmix_globals.server) {
/* PMIX server: store the error handler, process info keys and call
@ -90,10 +82,6 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_error(pmix_status_t status,
{
int rc;
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
if (pmix_globals.server) {
rc = pmix_server_notify_error(status, procs, nprocs, error_procs,
error_nprocs, info, ninfo,

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
@ -144,6 +144,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
int debug_level;
char *tdir, *evar;
pid_t pid;
char * pmix_pid;
/* initialize the output system */
if (!pmix_output_init()) {
@ -217,7 +218,16 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
/* now set the address - we use the pid here to reduce collisions */
memset(&myaddress, 0, sizeof(struct sockaddr_un));
myaddress.sun_family = AF_UNIX;
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid);
asprintf(&pmix_pid, "pmix-%d", pid);
// If the above set temporary directory name plus the pmix-PID string
// plus the '/' separator are too long, just fail, so the caller
// may provide the user with a proper help... *Cough*, *Cough* OSX...
if ((strlen(tdir) + strlen(pmix_pid) + 1) > sizeof(myaddress.sun_path)-1) {
free(pmix_pid);
return PMIX_ERR_INVALID_LENGTH;
}
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/%s", tdir, pmix_pid);
free(pmix_pid);
asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path);
@ -576,35 +586,17 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n
static void _deregister_nspace(int sd, short args, void *cbdata)
{
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_nspace_t *nptr;
int i;
pmix_peer_t *peer;
pmix_nspace_t *tmp;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server _deregister_nspace %s",
cd->proc.nspace);
/* see if we already have this nspace */
PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(nptr->nspace, cd->proc.nspace)) {
/* find and remove this client from our array of local
* peers - remember that it can occur multiple times
* if the peer called fork/exec and its children called
* PMIx_Init! We have to rely on none of those children
* living beyond our child as we otherwise cannot
* track them */
for (i=0; i < pmix_server_globals.clients.size; i++) {
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
continue;
}
if (nptr == peer->info->nptr) {
/* remove this entry */
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
PMIX_RELEASE(peer);
}
}
pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super);
PMIX_RELEASE(nptr);
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, cd->proc.nspace)) {
pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super);
PMIX_RELEASE(tmp);
break;
}
}
@ -628,7 +620,7 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[])
PMIX_THREADSHIFT(cd, _deregister_nspace);
}
void pmix_server_execute_collective(int sd, short args, void *cbdata)
static void _execute_collective(int sd, short args, void *cbdata)
{
pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata;
pmix_server_trkr_t *trk = tcd->trk;
@ -767,7 +759,7 @@ static void _register_client(int sd, short args, void *cbdata)
* we don't want to block someone
* here, so kick any completed trackers into a
* new event for processing */
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
PMIX_EXECUTE_COLLECTIVE(tcd, trk, _execute_collective);
}
}
/* also check any pending local modex requests to see if
@ -813,8 +805,6 @@ static void _deregister_client(int sd, short args, void *cbdata)
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_rank_info_t *info;
pmix_nspace_t *nptr, *tmp;
int i;
pmix_peer_t *peer;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server _deregister_client for nspace %s rank %d",
@ -832,27 +822,7 @@ static void _deregister_client(int sd, short args, void *cbdata)
/* nothing to do */
goto cleanup;
}
/* find and remove this client from our array of local
* peers - remember that it can occur multiple times
* if the peer called fork/exec and its children called
* PMIx_Init! We have to rely on none of those children
* living beyond our child as we otherwise cannot
* track them */
for (i=0; i < pmix_server_globals.clients.size; i++) {
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
continue;
}
if (nptr != peer->info->nptr) {
continue;
}
if (cd->proc.rank == peer->info->rank) {
/* remove this entry */
pmix_pointer_array_set_item(&pmix_server_globals.clients, i, NULL);
PMIX_RELEASE(peer);
}
}
/* find and remove this client from its nspace */
/* find an remove this client */
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
if (info->rank == cd->proc.rank) {
pmix_list_remove_item(&nptr->server->ranks, &info->super);
@ -2183,9 +2153,8 @@ static void deregevents_cbfunc (pmix_status_t status, void *cbdata)
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
pmix_output_verbose(2, pmix_globals.debug_output,
"server:deregevents_cbfunc called status = %d", status);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
PMIX_ERROR_LOG(rc);
}
// send reply
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
PMIX_RELEASE(cd);
@ -2289,7 +2258,6 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
proc.rank = peer->info->rank;
if (PMIX_SUCCESS != (rc = pmix_host_server.client_finalized(&proc, peer->info->server_object,
op_cbfunc, cd))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(cd);
}
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
@ -75,41 +75,41 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
{
int flags;
pmix_status_t rc;
unsigned int addrlen;
socklen_t addrlen;
char *ptr;
/* create a listen socket for incoming connection attempts */
pmix_server_globals.listen_socket = socket(PF_UNIX, SOCK_STREAM, 0);
if (pmix_server_globals.listen_socket < 0) {
printf("%s:%d socket() failed", __FILE__, __LINE__);
printf("%s:%d socket() failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}
addrlen = sizeof(struct sockaddr_un);
if (bind(pmix_server_globals.listen_socket, (struct sockaddr*)address, addrlen) < 0) {
printf("%s:%d bind() failed", __FILE__, __LINE__);
printf("%s:%d bind() failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}
/* set the mode as required */
if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) {
pmix_output(0, "CANNOT CHMOD %s", address->sun_path);
pmix_output(0, "CANNOT CHMOD %s\n", address->sun_path);
return PMIX_ERROR;
}
/* setup listen backlog to maximum allowed by kernel */
if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) {
printf("%s:%d listen() failed", __FILE__, __LINE__);
printf("%s:%d listen() failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}
/* set socket up to be non-blocking, otherwise accept could block */
if ((flags = fcntl(pmix_server_globals.listen_socket, F_GETFL, 0)) < 0) {
printf("%s:%d fcntl(F_GETFL) failed", __FILE__, __LINE__);
printf("%s:%d fcntl(F_GETFL) failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}
flags |= O_NONBLOCK;
if (fcntl(pmix_server_globals.listen_socket, F_SETFL, flags) < 0) {
printf("%s:%d fcntl(F_SETFL) failed", __FILE__, __LINE__);
printf("%s:%d fcntl(F_SETFL) failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}

Просмотреть файл

@ -234,8 +234,6 @@ void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
void regevents_cbfunc (pmix_status_t status, void *cbdata);
void pmix_server_execute_collective(int sd, short args, void *cbdata);
extern pmix_server_module_t pmix_host_server;
extern pmix_server_globals_t pmix_server_globals;

Просмотреть файл

@ -50,10 +50,6 @@ static uint32_t current_tag = 1; // 0 is reserved for system purposes
static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
{
pmix_server_trkr_t *trk;
pmix_rank_info_t *rinfo, *rnext;
pmix_trkr_caddy_t *tcd;
/* stop all events */
if (peer->recv_ev_active) {
event_del(&peer->recv_event);
@ -69,42 +65,9 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
}
CLOSE_THE_SOCKET(peer->sd);
if (pmix_globals.server) {
/* if I am a server, then we need to ensure that
* we properly account for the loss of this client
* from any local collectives in which it was
* participating - note that the proc would not
* have been added to any collective tracker until
* after it successfully connected */
PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) {
/* see if this proc is participating in this tracker */
PMIX_LIST_FOREACH_SAFE(rinfo, rnext, &trk->ranks, pmix_rank_info_t) {
if (0 != strncmp(rinfo->nptr->nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN)) {
continue;
}
if (rinfo->rank != peer->info->rank) {
continue;
}
/* it is - adjust the count */
--trk->nlocal;
/* remove it from the list */
pmix_list_remove_item(&trk->ranks, &rinfo->super);
PMIX_RELEASE(rinfo);
/* check for completion */
if (pmix_list_get_size(&trk->local_cbs) == trk->nlocal) {
/* complete, so now we need to process it
* we don't want to block someone
* here, so kick any completed trackers into a
* new event for processing */
PMIX_EXECUTE_COLLECTIVE(tcd, trk, pmix_server_execute_collective);
}
}
}
/* remove this proc from the list of ranks for this nspace */
pmix_list_remove_item(&(peer->info->nptr->server->ranks), &(peer->info->super));
PMIX_RELEASE(peer->info);
/* reduce the number of local procs */
--peer->info->nptr->server->nlocalprocs;
/* do some cleanup as the client has left us */
/* if I am a server, then we need to
* do some cleanup as the client has
* left us */
pmix_pointer_array_set_item(&pmix_server_globals.clients,
peer->index, NULL);
PMIX_RELEASE(peer);

Просмотреть файл

@ -195,18 +195,6 @@ void pmix_errhandler_invoke(pmix_status_t status,
break;
}
}
if (!exact_match) {
/* if no exact match was found, then we will fire the errhandler
* for any matching info key. This may be too lax and need to be adjusted
* later */
for (k = 0; k < errreg->ninfo; k++) {
if ((0 == strcmp(errreg->info[j].key, info[k].key)) &&
(pmix_value_cmp(&errreg->info[j].value, &info[k].value))) {
errreg->errhandler(status, procs, nprocs, iptr, ninfo+1);
fired = true;
}
}
}
}
/* if nothing fired and we found a general err handler, then fire it */