From 35578b4009dee248652148764fc267705230a8b8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 13 Feb 2017 20:59:07 -0800 Subject: [PATCH] Update to lastest PMIx master Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/VERSION | 6 +- .../pmix/pmix2x/pmix/src/client/pmix_client.c | 27 ++- .../pmix/src/client/pmix_client_connect.c | 6 +- .../pmix/src/client/pmix_client_spawn.c | 6 +- .../pmix2x/pmix/src/common/pmix_jobdata.c | 8 +- .../pmix/pmix2x/pmix/src/dstore/pmix_esh.c | 185 ++++++++++++------ .../pmix/pmix2x/pmix/src/dstore/pmix_esh.h | 2 + .../pmix/src/event/pmix_event_notification.c | 10 +- .../pmix/src/event/pmix_event_registration.c | 26 +-- .../pmix2x/pmix/src/include/pmix_globals.c | 12 +- .../pmix/src/mca/ptl/base/ptl_base_frame.c | 4 +- .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 9 +- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 2 +- .../src/mca/ptl/usock/ptl_usock_component.c | 6 + .../pmix/src/runtime/pmix_progress_threads.c | 7 +- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 10 +- .../pmix2x/pmix/src/server/pmix_server_ops.c | 33 ++-- 17 files changed, 237 insertions(+), 122 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 18046b9b8d..0088699fbf 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -23,14 +23,14 @@ release=0 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=a1 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git6ed27be +repo_rev=gitbf86f3a # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Feb 08, 2017" +date="Feb 13, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index ebe5788412..663aa78c8a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. @@ -236,6 +236,21 @@ static void evhandler_reg_callbk(pmix_status_t status, *active = status; } +static void _destruct_my_server_fn(int sd, short args, void *cbdata) +{ + pmix_cb_t *cb= (pmix_cb_t *)cbdata; + PMIX_DESTRUCT(&pmix_client_globals.myserver); + cb->active = false; +} + + +static void pmix_destruct_my_server(void) +{ + pmix_cb_t cb; + PMIX_THREADSHIFT(&cb, _destruct_my_server_fn); + PMIX_WAIT_FOR_COMPLETION(cb.active); +} + PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_info_t info[], size_t ninfo) { @@ -458,7 +473,15 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) "pmix:client finalize sync received"); } - PMIX_DESTRUCT(&pmix_client_globals.myserver); + pmix_destruct_my_server(); + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (0 > (rc = pmix_dstore_nspace_del(pmix_globals.myid.nspace))) { + PMIX_ERROR_LOG(rc); + return rc; + } +#endif + pmix_rte_finalize(); PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c index 1519191e27..1859b47b07 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. @@ -325,6 +325,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, #if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) pmix_job_data_htable_store(nspace, bptr); #endif + free(nspace); PMIX_RELEASE(bptr); } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { @@ -335,6 +336,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, if (NULL != cb->op_cbfunc) { cb->op_cbfunc(ret, cb->cbdata); } + PMIX_RELEASE(cb); } static void op_cbfunc(pmix_status_t status, void *cbdata) diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c index 48188c872a..66844efb42 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. @@ -225,6 +225,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr, if (NULL != cb->spawn_cbfunc) { cb->spawn_cbfunc(ret, nspace, cb->cbdata); } + cb->cbdata = NULL; + PMIX_RELEASE(cb); } static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata) diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c index e655606419..a1c2fd5711 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Mellanox Technologies, Inc. + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -381,6 +381,12 @@ static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) exit: #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (NULL != cb->bufs) { + size_t size = pmix_value_array_get_size(cb->bufs); + size_t i; + for (i = 0; i < size; i++) { + pmix_buffer_t *tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, i)); + PMIX_DESTRUCT(tmp); + } PMIX_RELEASE(cb->bufs); } #endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c index 04ee21965a..938c1528fc 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c +++ b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c @@ -1,9 +1,11 @@ /* - * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,18 +72,7 @@ pmix_dstore_base_module_t pmix_dstore_esh_module = { #define ESH_ENV_NS_DATA_SEG_SIZE "NS_DATA_SEG_SIZE" #define ESH_ENV_LINEAR "SM_USE_LINEAR_SEARCH" -#define ESH_MIN_KEY_LEN (sizeof(ESH_REGION_INVALIDATED) + 1) - -#define EXT_SLOT_SIZE(key) (strlen(key) + 1 + 2*sizeof(size_t)) /* in ext slot new offset will be stored in case if new data were added for the same process during next commit */ - -#define ESH_KEY_SIZE(key, size) \ -__extension__ ({ \ - size_t len = sizeof(size_t) + size; \ - size_t kname_len = strlen(key) + 1; \ - len += (kname_len < ESH_MIN_KEY_LEN) ? \ - ESH_MIN_KEY_LEN : kname_len; \ - len; \ -}) +#define ESH_MIN_KEY_LEN (sizeof(ESH_REGION_INVALIDATED)) #define ESH_KV_SIZE(addr) \ __extension__ ({ \ @@ -118,6 +109,20 @@ __extension__ ({ \ data_size; \ }) +#define ESH_KEY_SIZE(key, size) \ +__extension__ ({ \ + size_t len = sizeof(size_t) + ESH_KNAME_LEN(key) + size;\ + len; \ +}) + +/* in ext slot new offset will be stored in case if + * new data were added for the same process during + * next commit + */ +#define EXT_SLOT_SIZE() \ + (ESH_KEY_SIZE(ESH_REGION_EXTENSION, sizeof(size_t))) + + #define ESH_PUT_KEY(addr, key, buffer, size) \ __extension__ ({ \ size_t sz = ESH_KEY_SIZE(key, size); \ @@ -255,9 +260,11 @@ static pmix_value_array_t *_ns_map_array = NULL; static pmix_value_array_t *_ns_track_array = NULL; ns_map_data_t * (*_esh_session_map_search)(const char *nspace) = NULL; +int (*_esh_lock_init)(size_t idx) = NULL; #define _ESH_SESSION_path(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].nspace_path) #define _ESH_SESSION_lockfile(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].lockfile) +#define _ESH_SESSION_setjobuid(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].setjobuid) #define _ESH_SESSION_jobuid(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].jobuid) #define _ESH_SESSION_sm_seg_first(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_first) #define _ESH_SESSION_sm_seg_last(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_last) @@ -308,8 +315,51 @@ static inline void _esh_session_map_clean(ns_map_t *m) { m->data.track_idx = -1; } +#ifdef ESH_FCNTL_LOCK +static inline int _flock_init(size_t idx) { + pmix_status_t rc = PMIX_SUCCESS; + + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_CREAT | O_RDWR | O_EXCL, 0600); + + /* if previous launch was crashed, the lockfile might not be deleted and unlocked, + * so we delete it and create a new one. */ + if (_ESH_SESSION_lock(idx) < 0) { + unlink(_ESH_SESSION_lockfile(idx)); + _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_CREAT | O_RDWR, 0600); + if (_ESH_SESSION_lock(idx) < 0) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + if (_ESH_SESSION_setjobuid(idx) > 0) { + if (0 > chown(_ESH_SESSION_lockfile(idx), (uid_t) _ESH_SESSION_jobuid(idx), (gid_t) -1)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + if (0 > chmod(_ESH_SESSION_lockfile(idx), S_IRUSR | S_IWGRP | S_IRGRP)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + } + else { + _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_RDONLY); + if (-1 == _ESH_SESSION_lock(idx)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + return rc; +} +#endif + #ifdef ESH_PTHREAD_LOCK -static inline int _rwlock_init(size_t idx, char *lockfile) { +static inline int _rwlock_init(size_t idx) { pmix_status_t rc = PMIX_SUCCESS; size_t size = _lock_segment_size; pthread_rwlockattr_t attr; @@ -325,10 +375,23 @@ static inline int _rwlock_init(size_t idx, char *lockfile) { } if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - if (PMIX_SUCCESS != (rc = pmix_sm_segment_create(_ESH_SESSION_pthread_seg(idx), lockfile, size))) { + if (PMIX_SUCCESS != (rc = pmix_sm_segment_create(_ESH_SESSION_pthread_seg(idx), _ESH_SESSION_lockfile(idx), size))) { return rc; } memset(_ESH_SESSION_pthread_seg(idx)->seg_base_addr, 0, size); + if (_ESH_SESSION_setjobuid(idx) > 0) { + if (0 > chown(_ESH_SESSION_lockfile(idx), (uid_t) _ESH_SESSION_jobuid(idx), (gid_t) -1)){ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + /* set the mode as required */ + if (0 > chmod(_ESH_SESSION_lockfile(idx), S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP )) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } _ESH_SESSION_pthread_rwlock(idx) = (pthread_rwlock_t *)_ESH_SESSION_pthread_seg(idx)->seg_base_addr; if (0 != pthread_rwlockattr_init(&attr)) { @@ -364,7 +427,7 @@ static inline int _rwlock_init(size_t idx, char *lockfile) { } else { _ESH_SESSION_pthread_seg(idx)->seg_size = size; - snprintf(_ESH_SESSION_pthread_seg(idx)->seg_name, PMIX_PATH_MAX, "%s", lockfile); + snprintf(_ESH_SESSION_pthread_seg(idx)->seg_name, PMIX_PATH_MAX, "%s", _ESH_SESSION_lockfile(idx)); if (PMIX_SUCCESS != (rc = pmix_sm_segment_attach(_ESH_SESSION_pthread_seg(idx), PMIX_SM_RW))) { return rc; } @@ -732,6 +795,7 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, session_t *s = &(PMIX_VALUE_ARRAY_GET_ITEM(_session_array, session_t, idx)); pmix_status_t rc = PMIX_SUCCESS; + s->setjobuid = setjobuid; s->jobuid = jobuid; s->nspace_path = strdup(_base_path); @@ -754,31 +818,8 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, return rc; } } - s->lockfd = open(s->lockfile, O_CREAT | O_RDWR | O_EXCL, 0600); - - /* if previous launch was crashed, the lockfile might not be deleted and unlocked, - * so we delete it and create a new one. */ - if (s->lockfd < 0) { - unlink(s->lockfile); - s->lockfd = open(s->lockfile, O_CREAT | O_RDWR, 0600); - if (s->lockfd < 0) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - } - if (setjobuid > 0){ - if (0 > chown(s->nspace_path, (uid_t) jobuid, (gid_t) -1)){ - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - if (0 > chown(s->lockfile, (uid_t) jobuid, (gid_t) -1)) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - if (0 > chmod(s->lockfile, S_IRUSR | S_IWGRP | S_IRGRP)) { + if (s->setjobuid > 0){ + if (0 > chown(s->nspace_path, (uid_t) s->jobuid, (gid_t) -1)){ rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); return rc; @@ -792,12 +833,6 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, } } else { - s->lockfd = open(s->lockfile, O_RDONLY); - if (-1 == s->lockfd) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } seg = _attach_new_segment(INITIAL_SEGMENT, m, 0); if( NULL == seg ){ rc = PMIX_ERR_OUT_OF_RESOURCE; @@ -806,12 +841,16 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, } } -#ifdef ESH_PTHREAD_LOCK - if ( PMIX_SUCCESS != (rc = _rwlock_init(m->tbl_idx, s->lockfile))) { + if (NULL == _esh_lock_init) { + rc = PMIX_ERR_INIT; PMIX_ERROR_LOG(rc); return rc; } -#endif + if ( PMIX_SUCCESS != (rc = _esh_lock_init(m->tbl_idx))) { + PMIX_ERROR_LOG(rc); + return rc; + } + s->sm_seg_first = seg; s->sm_seg_last = s->sm_seg_first; return PMIX_SUCCESS; @@ -859,6 +898,13 @@ int _esh_init(pmix_info_t info[], size_t ninfo) _jobuid = getuid(); _setjobuid = 0; +#ifdef ESH_PTHREAD_LOCK + _esh_lock_init = _rwlock_init; +#endif +#ifdef ESH_FCNTL_LOCK + _esh_lock_init = _flock_init; +#endif + if (PMIX_SUCCESS != (rc = _esh_tbls_init())) { PMIX_ERROR_LOG(rc); goto err_exit; @@ -1618,9 +1664,10 @@ static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *n } memset(new_seg->seg_info.seg_base_addr, 0, size); - if (_setjobuid > 0){ + + if (_ESH_SESSION_setjobuid(ns_map->tbl_idx) > 0){ rc = PMIX_ERR_PERM; - if (0 > chown(file_name, (uid_t) _jobuid, (gid_t) -1)){ + if (0 > chown(file_name, (uid_t) _ESH_SESSION_jobuid(ns_map->tbl_idx), (gid_t) -1)){ PMIX_ERROR_LOG(rc); goto err_exit; } @@ -2110,7 +2157,7 @@ static int put_empty_ext_slot(seg_desc_t *dataseg) uint8_t *addr; global_offset = get_free_offset(dataseg); rel_offset = global_offset % _data_segment_size; - if (rel_offset + EXT_SLOT_SIZE(ESH_REGION_EXTENSION) > _data_segment_size) { + if (rel_offset + EXT_SLOT_SIZE() > _data_segment_size) { PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } @@ -2118,7 +2165,7 @@ static int put_empty_ext_slot(seg_desc_t *dataseg) ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&val, sizeof(size_t)); /* update offset at the beginning of current segment */ - data_ended = rel_offset + EXT_SLOT_SIZE(ESH_REGION_EXTENSION); + data_ended = rel_offset + EXT_SLOT_SIZE(); addr = (uint8_t*)(addr - rel_offset); memcpy(addr, &data_ended, sizeof(size_t)); return PMIX_SUCCESS; @@ -2126,9 +2173,8 @@ static int put_empty_ext_slot(seg_desc_t *dataseg) static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, char *key, void *buffer, size_t size) { - size_t offset; + size_t offset, id = 0; seg_desc_t *tmp; - int id = 0; size_t global_offset, data_ended; uint8_t *addr; @@ -2144,21 +2190,30 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, global_offset = get_free_offset(dataseg); offset = global_offset % _data_segment_size; - /* We should provide additional space at the end of segment to place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ - if (sizeof(size_t) + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE(key) > _data_segment_size) { + /* We should provide additional space at the end of segment to + * place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ + if ((sizeof(size_t) + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) { /* this is an error case: segment is so small that cannot place evem a single key-value pair. * warn a user about it and fail. */ offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ pmix_output(0, "PLEASE set NS_DATA_SEG_SIZE to value which is larger when %lu.", - sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE(key)); + sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE()); return offset; } - if (offset + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE(key) > _data_segment_size) { + + /* check the corner case that was observed at large scales: + * https://github.com/pmix/master/pull/282#issuecomment-277454198 + * + * if last time we stopped exactly on the border of the segment + * new segment wasn't allocated to us but (global_offset % _data_segment_size) == 0 + * so if offset is 0 here - we need to allocate the segment as well + */ + if ( (0 == offset) || ( (offset + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) ) { id++; /* create a new data segment. */ tmp = extend_segment(tmp, &ns_info->ns_map); if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERROR); + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ return offset; } @@ -2166,11 +2221,11 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, /* update_ns_info_in_initial_segment */ ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); if (NULL == elem) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ + return offset; } elem->num_data_seg++; - offset = sizeof(size_t); } global_offset = offset + id * _data_segment_size; @@ -2443,7 +2498,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix * */ rc = put_empty_ext_slot(ns_info->data_seg); if (PMIX_SUCCESS != rc) { - if (NULL != rinfo) { + if ((0 == data_exist) && NULL != rinfo) { free(rinfo); } PMIX_ERROR_LOG(rc); diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.h b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.h index dd588d64db..b285cf6b7c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.h +++ b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.h @@ -3,6 +3,7 @@ * All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,6 +64,7 @@ typedef struct ns_map_s ns_map_t; struct session_s { int in_use; uid_t jobuid; + char setjobuid; char *nspace_path; char *lockfile; #ifdef ESH_PTHREAD_LOCK diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 9e1062be61..0ea3acfbf5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -1,6 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -245,7 +247,6 @@ static void progress_local_event_hdlr(pmix_status_t status, while (pmix_list_get_end(&pmix_globals.events.single_events) != (nxt = pmix_list_get_next(&chain->sing->super))) { sing = (pmix_single_event_t*)nxt; if (sing->code == chain->status) { - PMIX_RETAIN(chain); chain->sing = sing; /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = sing->cbobject; @@ -272,7 +273,6 @@ static void progress_local_event_hdlr(pmix_status_t status, if (multi->codes[n] == chain->status) { /* found it - invoke the handler, pointing its * callback function to our progression function */ - PMIX_RETAIN(chain); chain->multi = multi; /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = multi->cbobject; @@ -300,7 +300,6 @@ static void progress_local_event_hdlr(pmix_status_t status, if (NULL != chain->def) { if (pmix_list_get_end(&pmix_globals.events.default_events) != (nxt = pmix_list_get_next(&chain->def->super))) { def = (pmix_default_event_t*)nxt; - PMIX_RETAIN(chain); chain->def = def; /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = def->cbobject; @@ -369,7 +368,6 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) if (sing->code == chain->status) { /* found it - invoke the handler, pointing its * callback function to our progression function */ - PMIX_RETAIN(chain); chain->sing = sing; /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = sing->cbobject; @@ -392,7 +390,6 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) if (multi->codes[i] == chain->status) { /* found it - invoke the handler, pointing its * callback function to our progression function */ - PMIX_RETAIN(chain); chain->multi = multi; /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = multi->cbobject; @@ -416,7 +413,6 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) /* finally, pass it to any default handlers */ PMIX_LIST_FOREACH(def, &pmix_globals.events.default_events, pmix_default_event_t) { - PMIX_RETAIN(chain); chain->def = def; /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = def->cbobject; diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 3cb3923bed..5b932942d5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -1,6 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -260,18 +262,20 @@ static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, if (PMIX_PROC_SERVER == pmix_globals.proc_type && cd->enviro && NULL != pmix_host_server.register_events) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: _add_hdlr registering with server"); - if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(cd->codes, cd->ncodes, - cd2->info, cd2->ninfo, - reg_cbfunc, cd2))) { - PMIX_RELEASE(cd2); - pmix_list_remove_item(list, item); - PMIX_RELEASE(item); - return rc; - } - return PMIX_ERR_WOULD_BLOCK; + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: _add_hdlr registering with server"); + if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(cd->codes, cd->ncodes, + cd2->info, cd2->ninfo, + reg_cbfunc, cd2))) { + PMIX_RELEASE(cd2); + pmix_list_remove_item(list, item); + PMIX_RELEASE(item); + return rc; } + return PMIX_ERR_WOULD_BLOCK; + } else { + PMIX_RELEASE(cd2); + } return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index 339b374edd..e3f8adb7be 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. @@ -179,9 +179,17 @@ static void sncon(pmix_server_nspace_t *p) } static void sndes(pmix_server_nspace_t *p) { + uint64_t key; + pmix_peer_t * peer; PMIX_DESTRUCT(&p->job_info); PMIX_LIST_DESTRUCT(&p->ranks); + PMIX_HASH_TABLE_FOREACH(key, uint64, peer, &p->mylocal) { + PMIX_RELEASE(peer); + } PMIX_DESTRUCT(&p->mylocal); + PMIX_HASH_TABLE_FOREACH(key, uint64, peer, &p->myremote) { + PMIX_RELEASE(peer); + } PMIX_DESTRUCT(&p->myremote); PMIX_DESTRUCT(&p->remote); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index cc9557c777..5ca552543d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -83,7 +83,7 @@ static pmix_status_t pmix_ptl_close(void) } /* the components will cleanup when closed */ - PMIX_DESTRUCT(&pmix_ptl_globals.actives); + PMIX_LIST_DESTRUCT(&pmix_ptl_globals.actives); PMIX_LIST_DESTRUCT(&pmix_ptl_globals.posted_recvs); PMIX_LIST_DESTRUCT(&pmix_ptl_globals.listeners); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index 169a607ccf..58bc759a6b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -61,6 +61,7 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) pmix_trkr_caddy_t *tcd; pmix_regevents_info_t *reginfoptr, *regnext; pmix_peer_events_info_t *pr, *pnext; + pmix_rank_info_t *info, *pinfo; /* stop all events */ if (peer->recv_ev_active) { @@ -108,8 +109,12 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) } } } - /* remove this proc from the list of ranks for this nspace */ - pmix_list_remove_item(&(peer->info->nptr->server->ranks), &(peer->info->super)); + /* remove this proc from the list of ranks for this nspace if it is still there */ + PMIX_LIST_FOREACH_SAFE(info, pinfo, &(peer->info->nptr->server->ranks), pmix_rank_info_t) { + if (info == peer->info) { + pmix_list_remove_item(&(peer->info->nptr->server->ranks), &(peer->info->super)); + } + } /* reduce the number of local procs */ --peer->info->nptr->server->nlocalprocs; /* now decrease the refcount - might actually free the object */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index 6e2283b885..74853cad1d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -91,7 +91,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, .pmix_mca_register_component_params = component_register, .pmix_mca_query_component = component_query }, - .priority = 10, + .priority = 30, .uri = NULL, .setup_listener = setup_listener }, diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c index 7a5c0a3815..7de9ad1cc9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -13,6 +13,8 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -502,6 +504,7 @@ static void connection_handler(int sd, short args, void *cbdata) * Create the tracker for this peer */ psave = PMIX_NEW(pmix_peer_t); if (NULL == psave) { + free(msg); rc = PMIX_ERR_NOMEM; goto error; } @@ -522,6 +525,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* get the appropriate compatibility modules */ if (PMIX_SUCCESS != pmix_psec.assign_module((struct pmix_peer_t*)psave, sec_mode)) { + free(msg); info->proc_cnt--; PMIX_RELEASE(info); PMIX_RELEASE(psave); @@ -545,6 +549,7 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of client credentials failed: %s", PMIx_Error_string(rc)); + free(msg); info->proc_cnt--; PMIX_RELEASE(info); PMIX_RELEASE(psave); @@ -552,6 +557,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* send an error reply to the client */ goto error; } + free(msg); /* send them success */ rc = PMIX_SUCCESS; diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c index 5c419757f3..cbb95acf86 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c @@ -1,6 +1,8 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -271,6 +273,9 @@ int pmix_progress_thread_stop(const char *name) if (trk->ev_active) { stop_progress_engine(trk); } + pmix_list_remove_item(&tracking, &trk->super); + PMIX_RELEASE(trk); + return PMIX_SUCCESS; } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 6d3eee90a4..97bc0fcc16 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -2290,17 +2290,15 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_CONNECTNB_CMD == cmd) { PMIX_PEER_CADDY(cd, peer, tag); - if (PMIX_SUCCESS != (rc = pmix_server_connect(cd, buf, false, cnct_cbfunc))) { - PMIX_RELEASE(cd); - } + rc = pmix_server_connect(cd, buf, false, cnct_cbfunc); + PMIX_RELEASE(cd); return rc; } if (PMIX_DISCONNECTNB_CMD == cmd) { PMIX_PEER_CADDY(cd, peer, tag); - if (PMIX_SUCCESS != (rc = pmix_server_connect(cd, buf, true, cnct_cbfunc))) { - PMIX_RELEASE(cd); - } + rc = pmix_server_connect(cd, buf, true, cnct_cbfunc); + PMIX_RELEASE(cd); return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index cb30a13a3c..c52c03e7ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -5,7 +5,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -189,12 +189,11 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) if (PMIX_SUCCESS != (rc = pmix_dstore_store(nptr->nspace, info->rank, kp))) { PMIX_ERROR_LOG(rc); } - PMIX_RELEASE(kp); - kp = PMIX_NEW(pmix_kval_t); - kp->key = strdup("modex"); - PMIX_VALUE_CREATE(kp->value, 1); - kp->value->type = PMIX_BYTE_OBJECT; + /* restore the buffer for subsequent processing */ + PMIX_LOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); + kp->value->data.bo.bytes = NULL; + kp->value->data.bo.size = 0; } #endif /* PMIX_ENABLE_DSTORE */ @@ -896,7 +895,7 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, { int32_t cnt; pmix_status_t rc; - pmix_proc_t *procs; + pmix_proc_t *procs = NULL; size_t nprocs; pmix_server_trkr_t *trk; pmix_info_t *info = NULL; @@ -916,7 +915,7 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, cnt = 1; if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nprocs, &cnt, PMIX_SIZE))) { PMIX_ERROR_LOG(rc); - return rc; + goto cleanup; } /* there must be at least one proc - we do not allow the client * to send us NULL proc as the server has no idea what to do @@ -925,7 +924,8 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, * spans all procs in that namespace */ if (nprocs < 1) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; + rc = PMIX_ERR_BAD_PARAM; + goto cleanup; } /* unpack the procs */ @@ -933,7 +933,7 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, cnt = nprocs; if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, procs, &cnt, PMIX_PROC))) { PMIX_ERROR_LOG(rc); - return rc; + goto cleanup; } /* unpack the number of provided info structs */ @@ -970,7 +970,7 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, /* add this contributor to the tracker so they get * notified when we are done */ - PMIX_RETAIN(cd); + PMIX_RETAIN(cd); // prevent the caddy from being released when we return pmix_list_append(&trk->local_cbs, &cd->super); /* if all local contributions have been received, * let the local host's server know that we are at the @@ -987,10 +987,13 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, rc = PMIX_SUCCESS; } - cleanup: - PMIX_PROC_FREE(procs, nprocs); - PMIX_INFO_FREE(info, ninfo); - PMIX_RELEASE(cd); + cleanup: + if (NULL != procs) { + PMIX_PROC_FREE(procs, nprocs); + } + if (NULL != info) { + PMIX_INFO_FREE(info, ninfo); + } return rc; }