1
1

Update to latest PMIx master - equivalent to 2.0rc2. Update the thread support in the opal/pmix framework to protect the framework-level structures.

This now passes the loop test, and so we believe it resolves the random hangs in finalize.

Changes in PMIx master that are included here:

* Fixed a bug in the PMIx_Get logic
* Fixed self-notification procedure
* Made pmix_output functions thread safe
* Fixed a number of thread safety issues
* Updated configury to use 'uname -n' when hostname is unavailable

Work on cleaning up the event handler thread safety problem
Rarely used functions, but protect them anyway
Fix the last part of the intercomm problem
Ensure we don't cover any PMIx calls with the framework-level lock.
Protect against NULL argv comm_spawn

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-06-12 16:54:35 -07:00
родитель 70107b3e52
Коммит 952726c121
62 изменённых файлов: 2458 добавлений и 1671 удалений

Просмотреть файл

@ -155,10 +155,9 @@ int ompi_interlib_declare(int threadlevel, char *version)
}
opal_list_append(&info, &kv->super);
/* call pmix to initialize these values */
if (OPAL_SUCCESS != (ret = opal_pmix.init(&info))) {
OPAL_LIST_DESTRUCT(&info);
return ret;
}
ret = opal_pmix.init(&info);
OPAL_LIST_DESTRUCT(&info);
return OMPI_SUCCESS;
/* account for our refcount on pmix_init */
opal_pmix.finalize();
return ret;
}

Просмотреть файл

@ -277,9 +277,6 @@ int ompi_mpi_finalize(void)
}
}
/* account for our refcount on pmix_init */
opal_pmix.finalize();
/* check for timing request - get stop time and report elapsed
time if so */
//OPAL_TIMING_DELTAS(ompi_enable_timing, &tm);

Просмотреть файл

@ -14,7 +14,7 @@
#include "opal_config.h"
#include "opal/types.h"
#include "opal/threads/threads.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_framework.h"
@ -55,13 +55,133 @@ OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info,
OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase);
#define opal_pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread)
typedef pthread_cond_t opal_pmix_condition_t;
#define opal_pmix_condition_broadcast(a) pthread_cond_broadcast(a)
#define opal_pmix_condition_signal(a) pthread_cond_signal(a)
#define OPAL_PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER
typedef struct {
opal_mutex_t mutex;
opal_pmix_condition_t cond;
volatile bool active;
} opal_pmix_lock_t;
typedef struct {
opal_event_base_t *evbase;
int timeout;
int initialized;
opal_pmix_lock_t lock;
} opal_pmix_base_t;
extern opal_pmix_base_t opal_pmix_base;
#define OPAL_PMIX_CONSTRUCT_LOCK(l) \
do { \
OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \
pthread_cond_init(&(l)->cond, NULL); \
(l)->active = true; \
} while(0)
#define OPAL_PMIX_DESTRUCT_LOCK(l) \
do { \
OBJ_DESTRUCT(&(l)->mutex); \
pthread_cond_destroy(&(l)->cond); \
} while(0)
#if OPAL_ENABLE_DEBUG
#define OPAL_PMIX_ACQUIRE_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
if (opal_debug_threads) { \
opal_output(0, "Waiting for thread %s:%d", \
__FILE__, __LINE__); \
} \
while ((lck)->active) { \
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
if (opal_debug_threads) { \
opal_output(0, "Thread obtained %s:%d", \
__FILE__, __LINE__); \
} \
(lck)->active = true; \
} while(0)
#else
#define OPAL_PMIX_ACQUIRE_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
while ((lck)->active) { \
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
(lck)->active = true; \
} while(0)
#endif
#if OPAL_ENABLE_DEBUG
#define OPAL_PMIX_WAIT_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
if (opal_debug_threads) { \
opal_output(0, "Waiting for thread %s:%d", \
__FILE__, __LINE__); \
} \
while ((lck)->active) { \
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
if (opal_debug_threads) { \
opal_output(0, "Thread obtained %s:%d", \
__FILE__, __LINE__); \
} \
OPAL_ACQUIRE_OBJECT(&lck); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#else
#define OPAL_PMIX_WAIT_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
while ((lck)->active) { \
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
OPAL_ACQUIRE_OBJECT(lck); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#endif
#if OPAL_ENABLE_DEBUG
#define OPAL_PMIX_RELEASE_THREAD(lck) \
do { \
if (opal_debug_threads) { \
opal_output(0, "Releasing thread %s:%d", \
__FILE__, __LINE__); \
} \
(lck)->active = false; \
opal_pmix_condition_broadcast(&(lck)->cond); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#else
#define OPAL_PMIX_RELEASE_THREAD(lck) \
do { \
assert(0 != opal_mutex_trylock(&(lck)->mutex)); \
(lck)->active = false; \
opal_pmix_condition_broadcast(&(lck)->cond); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#endif
#define OPAL_PMIX_WAKEUP_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
(lck)->active = false; \
OPAL_POST_OBJECT(lck); \
opal_pmix_condition_broadcast(&(lck)->cond); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
END_C_DECLS
#endif

Просмотреть файл

@ -92,39 +92,6 @@ int opal_pmix_base_notify_event(int status,
return OPAL_SUCCESS;
}
struct lookup_caddy_t {
volatile bool active;
int status;
opal_pmix_pdata_t *pdat;
};
/******** DATA EXCHANGE ********/
static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
{
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
cd->status = status;
if (OPAL_SUCCESS == status && NULL != data) {
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
if (NULL != p) {
cd->pdat->proc = p->proc;
if (p->value.type == cd->pdat->value.type) {
if (NULL != cd->pdat->value.key) {
free(cd->pdat->value.key);
}
(void)opal_value_xfer(&cd->pdat->value, &p->value);
}
}
}
cd->active = false;
}
static void opcbfunc(int status, void *cbdata)
{
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
cd->status = status;
cd->active = false;
}
int opal_pmix_base_exchange(opal_value_t *indat,
opal_pmix_pdata_t *outdat,
int timeout)
@ -133,8 +100,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
opal_list_t ilist, mlist;
opal_value_t *info;
opal_pmix_pdata_t *pdat;
struct lookup_caddy_t caddy;
char **keys;
/* protect the incoming value */
opal_dss.copy((void**)&info, indat, OPAL_VALUE);
@ -148,29 +113,10 @@ int opal_pmix_base_exchange(opal_value_t *indat,
opal_list_append(&ilist, &info->super);
/* publish it with "session" scope */
if (NULL == opal_pmix.publish_nb) {
rc = opal_pmix.publish(&ilist);
OPAL_LIST_DESTRUCT(&ilist);
if (OPAL_SUCCESS != rc) {
return rc;
}
} else {
caddy.status = -1;
caddy.active = true;
caddy.pdat = NULL;
rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy);
if (OPAL_SUCCESS != rc) {
OPAL_LIST_DESTRUCT(&ilist);
return rc;
}
while (caddy.active) {
usleep(10);
}
OPAL_LIST_DESTRUCT(&ilist);
if (OPAL_SUCCESS != caddy.status) {
OPAL_ERROR_LOG(caddy.status);
return caddy.status;
}
rc = opal_pmix.publish(&ilist);
OPAL_LIST_DESTRUCT(&ilist);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* lookup the other side's info - if a non-blocking form
@ -204,43 +150,20 @@ int opal_pmix_base_exchange(opal_value_t *indat,
/* if a non-blocking version of lookup isn't
* available, then use the blocking version */
if (NULL == opal_pmix.lookup_nb) {
OBJ_CONSTRUCT(&ilist, opal_list_t);
opal_list_append(&ilist, &pdat->super);
rc = opal_pmix.lookup(&ilist, &mlist);
OPAL_LIST_DESTRUCT(&mlist);
OBJ_CONSTRUCT(&ilist, opal_list_t);
opal_list_append(&ilist, &pdat->super);
rc = opal_pmix.lookup(&ilist, &mlist);
OPAL_LIST_DESTRUCT(&mlist);
if (OPAL_SUCCESS != rc) {
OPAL_LIST_DESTRUCT(&ilist);
if (OPAL_SUCCESS != rc) {
return rc;
}
} else {
caddy.status = -1;
caddy.active = true;
caddy.pdat = pdat;
keys = NULL;
opal_argv_append_nosize(&keys, pdat->value.key);
rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy);
if (OPAL_SUCCESS != rc) {
OPAL_LIST_DESTRUCT(&mlist);
opal_argv_free(keys);
return rc;
}
while (caddy.active) {
usleep(10);
}
opal_argv_free(keys);
OPAL_LIST_DESTRUCT(&mlist);
if (OPAL_SUCCESS != caddy.status) {
OPAL_ERROR_LOG(caddy.status);
return caddy.status;
}
return rc;
}
/* pass back the result */
outdat->proc = pdat->proc;
free(outdat->value.key);
rc = opal_value_xfer(&outdat->value, &pdat->value);
OBJ_RELEASE(pdat);
OPAL_LIST_DESTRUCT(&ilist);
return rc;
}

Просмотреть файл

@ -13,6 +13,7 @@
#include "opal/constants.h"
#include "opal/mca/mca.h"
#include "opal/threads/thread_usage.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
@ -35,7 +36,16 @@ opal_pmix_base_module_t opal_pmix = { 0 };
bool opal_pmix_collect_all_data = true;
int opal_pmix_verbose_output = -1;
bool opal_pmix_base_async_modex = false;
opal_pmix_base_t opal_pmix_base = {0};
opal_pmix_base_t opal_pmix_base = {
.evbase = NULL,
.timeout = 0,
.initialized = 0,
.lock = {
.mutex = OPAL_MUTEX_STATIC_INIT,
.cond = OPAL_PMIX_CONDITION_STATIC_INIT,
.active = false
}
};
static int opal_pmix_base_frame_register(mca_base_register_flag_t flags)
{

Просмотреть файл

@ -146,7 +146,7 @@ extern int opal_pmix_base_exchange(opal_value_t *info,
OPAL_NAME_PRINT(*(p)), (s))); \
OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
_info = OBJ_NEW(opal_value_t); \
_info->key = strdup(OPAL_PMIX_OPTIONAL); \
_info->key = strdup(OPAL_PMIX_IMMEDIATE); \
_info->type = OPAL_BOOL; \
_info->data.flag = true; \
opal_list_append(&(_ilist), &(_info)->super); \

Просмотреть файл

@ -9,22 +9,31 @@ Email Name Affiliation(s)
alinask Elena Shipunova Mellanox
annu13 Annapurna Dasari Intel
artpol84 Artem Polyakov Mellanox
ashleypittman Ashley Pittman Intel
dsolt Dave Solt IBM
garlick Jim Garlick LLNL
ggouaillardet Gilles Gouaillardet RIST
hjelmn Nathan Hjelm LANL
igor-ivanov Igor Ivanov Mellanox
jladd-mlnx Joshua Ladd Mellanox
jsquyres Jeff Squyres Cisco, IU
jjhursey Joshua Hursey IBM
jsquyres Jeff Squyres Cisco
karasevb Boris Karasev Mellanox
kawashima-fj Takahiro Kawashima Fujitsu
nkogteva Nadezhda Kogteva Mellanox
rhc54 Ralph Castain LANL, Cisco, Intel
nysal Nysal Jan KA IBM
PHHargrove Paul Hargrove LBNL
rhc54 Ralph Castain Intel
------------------------------- --------------------------- -------------------
Affiliation abbreviations:
--------------------------
Cisco = Cisco Systems, Inc.
Fujitsu = Fujitsu
IBM = International Business Machines, Inc.
Intel = Intel, Inc.
IU = Indiana University
LANL = Los Alamos National Laboratory
LBNL = Lawrence Berkeley National Laboratory
LLNL = Lawrence Livermore National Laboratory
Mellanox = Mellanox
RIST = Research Organization for Information Science and Technology

Просмотреть файл

@ -9,7 +9,7 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Copyright (c) 2004-2005 The Regents of the University of California.
All rights reserved.
Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved.
Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
$COPYRIGHT$
Additional copyrights may follow
@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing
the PMIx library. Much more information is available on the
PMIx web site (e.g., see the FAQ section):
http://pmix.github.io/pmix/master
http://pmix.github.io/pmix/pmix
Developer Builds
@ -34,7 +34,7 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked
out from Git), you should read the HACKING file before attempting to
build PMIx. You must then run:
shell$ ./autogen.sh
shell$ ./autogen.pl
You will need very recent versions of GNU Autoconf, Automake, and
Libtool. If autogen.sh fails, read the HACKING file. If anything
@ -85,4 +85,3 @@ shell$ make install
Parallel make is generally only helpful in the build phase; the
installation process is mostly serial and does not benefit much from
parallel make.

Просмотреть файл

@ -24,6 +24,65 @@ current release as well as the "stable" bug fix release branch.
Master (not on release branches yet)
------------------------------------
2.0.0
------
**** NOTE: This release implements the complete PMIX v2.0 Standard
**** and therefore includes a number of new APIs and features. These
**** can be tracked by their RFC's in the RFC repository at:
**** https://github.com/pmix/RFCs. A formal standards document will
**** be included in a later v2.x release. Some of the changes are
**** identified below.
- Added the Modular Component Architecture (MCA) plugin manager and
converted a number of operations to plugins, thereby allowing easy
customization and extension (including proprietary offerings)
- Added support for TCP sockets instead of Unix domain sockets for
client-server communications
- Added support for on-the-fly Allocation requests, including requests
for additional resources, extension of time for currently allocated
resources, and return of identified allocated resources to the scheduler
(RFC 0005 - https://github.com/pmix/RFCs/blob/master/RFC0005.md)
- Tightened rules on the processing of PMIx_Get requests, including
reservation of the "pmix" prefix for attribute keys and specifying
behaviors associated with the PMIX_RANK_WILDCARD value
(RFC 0009 - https://github.com/pmix/RFCs/blob/master/RFC0009.md)
- Extended support for tool interactions with a PMIx server aimed at
meeting the needs of debuggers and other tools. Includes support
for rendezvousing with a system-level PMIx server for interacting
with the system management stack (SMS) outside of an allocated
session, and adds two new APIs:
- PMIx_Query: request general information such as the process
table for a specified job, and available SMS capabilities
- PMIx_Log: log messages (e.g., application progress) to a
system-hosted persistent store
(RFC 0010 - https://github.com/pmix/RFCs/blob/master/RFC0010.md)
- Added support for fabric/network interactions associated with
"instant on" application startup
(RFC 0012 - https://github.com/pmix/RFCs/blob/master/RFC0012.md)
- Added an attribute to support getting the time remaining in an
allocation via the PMIx_Query interface
(RFC 0013 - https://github.com/pmix/RFCs/blob/master/RFC0013.md)
- Added interfaces to support job control and monitoring requests,
including heartbeat and file monitors to detect stalled applications.
Job control interface supports standard signal-related operations
(pause, kill, resume, etc.) as well as checkpoint/restart requests.
The interface can also be used by an application to indicate it is
willing to be pre-empted, with the host RM providing an event
notification when the preemption is desired.
(RFC 0015 - https://github.com/pmix/RFCs/blob/master/RFC0015.md)
- Extended the event notification system to support notifications
across threads in the same process, and the ability to direct
ordering of notifications when registering event handlers.
(RFC 0018 - https://github.com/pmix/RFCs/blob/master/RFC0018.md)
- Expose the buffer manipulation functions via a new set of APIs
to support heterogeneous data transfers within the host RM
environment
(RFC 0020 - https://github.com/pmix/RFCs/blob/master/RFC0020.md)
- Fix a number of race condition issues that arose at scale
- Enable PMIx servers to generate notifications to the host RM
and to themselves
1.2.2 -- 21 March 2017
----------------------
- Compiler fix for Sun/Oracle CC (PR #322)

Просмотреть файл

@ -30,7 +30,7 @@ greek=
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=git071ebc3
repo_rev=git6fb501d
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Jun 06, 2017"
date="Jun 19, 2017"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library
@ -75,4 +75,4 @@ date="Jun 06, 2017"
# Version numbers are described in the Libtool current:revision:age
# format.
libpmix_so_version=0:0:0
libpmix_so_version=3:0:1

Просмотреть файл

@ -167,6 +167,8 @@ AC_DEFUN([PMIX_SETUP_CORE],[
############################################################################
pmix_show_title "Compiler and preprocessor tests"
PMIX_SETUP_CC
#
# Check for some types
#

Просмотреть файл

@ -95,7 +95,7 @@ EOF
#
PMIX_CONFIGURE_USER="`whoami`"
PMIX_CONFIGURE_HOST="`hostname | head -n 1`"
PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`"
PMIX_CONFIGURE_DATE="`date`"
#
@ -115,7 +115,7 @@ AC_DEFUN([PMIX_BASIC_SETUP],[
#
PMIX_CONFIGURE_USER="`whoami`"
PMIX_CONFIGURE_HOST="`hostname | head -n 1`"
PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`"
PMIX_CONFIGURE_DATE="`date`"
#

Просмотреть файл

@ -892,81 +892,83 @@ typedef struct pmix_value {
} while (0)
/* release the memory in the value struct data field */
#define PMIX_VALUE_DESTRUCT(m) \
do { \
size_t _n; \
if (PMIX_STRING == (m)->type) { \
if (NULL != (m)->data.string) { \
free((m)->data.string); \
} \
} else if ((PMIX_BYTE_OBJECT == (m)->type) || \
(PMIX_COMPRESSED_STRING == (m)->type)) { \
if (NULL != (m)->data.bo.bytes) { \
free((m)->data.bo.bytes); \
} \
} else if (PMIX_DATA_ARRAY == (m)->type) { \
if (PMIX_STRING == (m)->data.darray->type) { \
char **_str = (char**)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
if (NULL != _str[_n]) { \
free(_str[_n]); \
} \
} \
} else if (PMIX_PROC_INFO == (m)->data.darray->type) { \
pmix_proc_info_t *_info = \
(pmix_proc_info_t*)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \
} \
} else if (PMIX_INFO == (m)->data.darray->type) { \
pmix_info_t *_info = \
(pmix_info_t*)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
/* cannot use info destruct as that loops back */ \
if (PMIX_STRING == _info[_n].value.type) { \
if (NULL != _info[_n].value.data.string) { \
free(_info[_n].value.data.string); \
} \
} else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \
if (NULL != _info[_n].value.data.bo.bytes) { \
free(_info[_n].value.data.bo.bytes); \
} \
} else if (PMIX_PROC_INFO == _info[_n].value.type) { \
PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \
} \
} \
} else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \
pmix_byte_object_t *_obj = \
(pmix_byte_object_t*)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
if (NULL != _obj[_n].bytes) { \
free(_obj[_n].bytes); \
} \
} \
} \
if (NULL != (m)->data.darray->array) { \
free((m)->data.darray->array); \
} \
free((m)->data.darray); \
/**** DEPRECATED ****/ \
} else if (PMIX_INFO_ARRAY == (m)->type) { \
pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \
for (_n=0; _n < (m)->data.array->size; _n++) { \
if (PMIX_STRING == _p[_n].value.type) { \
if (NULL != _p[_n].value.data.string) { \
free(_p[_n].value.data.string); \
} \
} else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \
if (NULL != _p[_n].value.data.bo.bytes) { \
free(_p[_n].value.data.bo.bytes); \
} \
} else if (PMIX_PROC_INFO == _p[_n].value.type) { \
PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \
} \
} \
free(_p); \
/********************/ \
} \
#define PMIX_VALUE_DESTRUCT(m) \
do { \
size_t _n; \
if (PMIX_STRING == (m)->type) { \
if (NULL != (m)->data.string) { \
free((m)->data.string); \
} \
} else if ((PMIX_BYTE_OBJECT == (m)->type) || \
(PMIX_COMPRESSED_STRING == (m)->type)) { \
if (NULL != (m)->data.bo.bytes) { \
free((m)->data.bo.bytes); \
} \
} else if (PMIX_DATA_ARRAY == (m)->type) { \
if (NULL != (m)->data.darray) { \
if (PMIX_STRING == (m)->data.darray->type) { \
char **_str = (char**)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
if (NULL != _str[_n]) { \
free(_str[_n]); \
} \
} \
} else if (PMIX_PROC_INFO == (m)->data.darray->type) { \
pmix_proc_info_t *_info = \
(pmix_proc_info_t*)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \
} \
} else if (PMIX_INFO == (m)->data.darray->type) { \
pmix_info_t *_info = \
(pmix_info_t*)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
/* cannot use info destruct as that loops back */ \
if (PMIX_STRING == _info[_n].value.type) { \
if (NULL != _info[_n].value.data.string) { \
free(_info[_n].value.data.string); \
} \
} else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \
if (NULL != _info[_n].value.data.bo.bytes) { \
free(_info[_n].value.data.bo.bytes); \
} \
} else if (PMIX_PROC_INFO == _info[_n].value.type) { \
PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \
} \
} \
} \
} else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \
pmix_byte_object_t *_obj = \
(pmix_byte_object_t*)(m)->data.darray->array; \
for (_n=0; _n < (m)->data.darray->size; _n++) { \
if (NULL != _obj[_n].bytes) { \
free(_obj[_n].bytes); \
} \
} \
} \
if (NULL != (m)->data.darray->array) { \
free((m)->data.darray->array); \
} \
free((m)->data.darray); \
/**** DEPRECATED ****/ \
} else if (PMIX_INFO_ARRAY == (m)->type) { \
pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \
for (_n=0; _n < (m)->data.array->size; _n++) { \
if (PMIX_STRING == _p[_n].value.type) { \
if (NULL != _p[_n].value.data.string) { \
free(_p[_n].value.data.string); \
} \
} else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \
if (NULL != _p[_n].value.data.bo.bytes) { \
free(_p[_n].value.data.bo.bytes); \
} \
} else if (PMIX_PROC_INFO == _p[_n].value.type) { \
PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \
} \
} \
free(_p); \
/********************/ \
} \
} while (0)
#define PMIX_VALUE_FREE(m, n) \

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -127,6 +127,7 @@
#include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#include "src/threads/thread_usage.h"
BEGIN_C_DECLS
@ -496,7 +497,7 @@ static inline pmix_object_t *pmix_obj_new(pmix_class_t * cls)
static inline int pmix_obj_update(pmix_object_t *object, int inc) __pmix_attribute_always_inline__;
static inline int pmix_obj_update(pmix_object_t *object, int inc)
{
return object->obj_reference_count += inc;
return PMIX_THREAD_ADD32(&object->obj_reference_count, inc);
}
END_C_DECLS

Просмотреть файл

@ -167,20 +167,18 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer,
}
pmix_client_globals_t pmix_client_globals = {{{0}}};
pmix_mutex_t pmix_client_bootstrap_mutex = PMIX_MUTEX_STATIC_INIT;
pmix_client_globals_t pmix_client_globals = {0};
/* callback for wait completion */
static void wait_cbfunc(struct pmix_peer_t *pr,
pmix_ptl_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
pmix_lock_t *lock = (pmix_lock_t*)cbdata;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:client wait_cbfunc received");
PMIX_POST_OBJECT(active);
*active = false;
PMIX_WAKEUP_THREAD(lock);
}
/* callback to receive job info */
@ -199,7 +197,7 @@ static void job_data(struct pmix_peer_t *pr,
PMIX_ERROR_LOG(rc);
cb->status = PMIX_ERROR;
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
return;
}
assert(NULL != nspace);
@ -211,7 +209,7 @@ static void job_data(struct pmix_peer_t *pr,
#endif
cb->status = PMIX_SUCCESS;
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}
PMIX_EXPORT const char* PMIx_Get_version(void)
@ -219,7 +217,6 @@ PMIX_EXPORT const char* PMIx_Get_version(void)
return pmix_version_string;
}
volatile bool waiting_for_debugger = true;
static void notification_fn(size_t evhdlr_registration_id,
pmix_status_t status,
const pmix_proc_t *source,
@ -228,18 +225,13 @@ static void notification_fn(size_t evhdlr_registration_id,
pmix_event_notification_cbfunc_fn_t cbfunc,
void *cbdata)
{
pmix_lock_t *reglock = (pmix_lock_t*)cbdata;
if (NULL != cbfunc) {
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
}
waiting_for_debugger = false;
}
static void evhandler_reg_callbk(pmix_status_t status,
size_t evhandler_ref,
void *cbdata)
{
volatile int *active = (volatile int*)cbdata;
PMIX_POST_OBJECT(active);
*active = status;
PMIX_WAKEUP_THREAD(reglock);
}
typedef struct {
@ -324,19 +316,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
pmix_cb_t cb;
pmix_buffer_t *req;
pmix_cmd_t cmd = PMIX_REQ_CMD;
volatile int active;
pmix_status_t code = PMIX_ERR_DEBUGGER_RELEASE;
pmix_proc_t wildcard;
pmix_info_t ginfo;
pmix_value_t *val = NULL;
pmix_lock_t reglock;
if (NULL == proc) {
return PMIX_ERR_BAD_PARAM;
}
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) {
if (0 < pmix_globals.init_cntr || PMIX_PROC_IS_SERVER) {
/* since we have been called before, the nspace and
* rank should be known. So return them here if
* requested */
@ -344,19 +336,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
proc->rank = pmix_globals.myid.rank;
}
++pmix_globals.init_cntr;
/* we also need to check the info keys to see if something need
* be done with them - e.g., to notify another library that we
* also have called init */
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (NULL != info) {
_check_for_notify(info, ninfo);
}
++pmix_globals.init_cntr;
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
return PMIX_SUCCESS;
}
/* if we don't see the required info, then we cannot init */
if (NULL == getenv("PMIX_NAMESPACE")) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INVALID_NAMESPACE;
}
@ -365,13 +357,17 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_CLIENT, info, ninfo,
pmix_client_notify_recv))) {
PMIX_ERROR_LOG(rc);
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
/* setup the globals */
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t);
pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t);
if (NULL == pmix_client_globals.myserver) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_NOMEM;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: init called");
@ -379,7 +375,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
/* we require our nspace */
if (NULL == (evar = getenv("PMIX_NAMESPACE"))) {
/* let the caller know that the server isn't available yet */
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INVALID_NAMESPACE;
}
if (NULL != proc) {
@ -393,7 +389,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
/* we also require our rank */
if (NULL == (evar = getenv("PMIX_RANK"))) {
/* let the caller know that the server isn't available yet */
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_DATA_VALUE_NOT_FOUND;
}
pmix_globals.myid.rank = strtol(evar, NULL, 10);
@ -407,25 +403,27 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
* to us at launch */
evar = getenv("PMIX_SECURITY_MODE");
if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, evar))) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* the server will be using the same */
pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec;
pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec;
/* setup the shared memory support */
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
if (PMIX_SUCCESS != (rc = pmix_dstore_init(NULL, 0))) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_DATA_VALUE_NOT_FOUND;
}
#endif /* PMIX_ENABLE_DSTORE */
/* connect to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
/* mark that we are using the same module as used for the server */
pmix_globals.mypeer->compat.ptl = pmix_client_globals.myserver->compat.ptl;
/* send a request for our job info - we do this as a non-blocking
* transaction because some systems cannot handle very large
@ -434,28 +432,28 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(req, &cmd, 1, PMIX_CMD))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(req);
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
/* send to the server */
PMIX_CONSTRUCT(&cb, pmix_cb_t);
cb.active = true;
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, req, job_data, (void*)&cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, req, job_data, (void*)&cb))){
PMIX_DESTRUCT(&cb);
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
/* wait for the data to return */
PMIX_WAIT_FOR_COMPLETION(cb.active);
PMIX_WAIT_THREAD(&cb.lock);
rc = cb.status;
PMIX_DESTRUCT(&cb);
if (PMIX_SUCCESS == rc) {
pmix_globals.init_cntr++;
} else {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* lood for a debugger attach key */
(void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
@ -464,18 +462,13 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
if (PMIX_SUCCESS == PMIx_Get(&wildcard, PMIX_DEBUG_STOP_IN_INIT, &ginfo, 1, &val)) {
PMIX_VALUE_FREE(val, 1); // cleanup memory
/* if the value was found, then we need to wait for debugger attach here */
/* register for the debugger release notificaation */
active = -1;
/* register for the debugger release notification */
PMIX_CONSTRUCT_LOCK(&reglock);
PMIx_Register_event_handler(&code, 1, NULL, 0,
notification_fn, evhandler_reg_callbk, (void*)&active);
while (-1 == active) {
usleep(100);
}
if (0 != active) {
return active;
}
notification_fn, NULL, (void*)&reglock);
/* wait for it to arrive */
PMIX_WAIT_FOR_COMPLETION(waiting_for_debugger);
PMIX_WAIT_THREAD(&reglock);
PMIX_DESTRUCT_LOCK(&reglock);
}
PMIX_INFO_DESTRUCT(&ginfo);
@ -484,46 +477,81 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
_check_for_notify(info, ninfo);
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
return PMIX_SUCCESS;
}
PMIX_EXPORT int PMIx_Initialized(void)
{
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (0 < pmix_globals.init_cntr) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return true;
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return false;
}
typedef struct {
pmix_lock_t lock;
pmix_event_t ev;
bool active;
} pmix_client_timeout_t;
/* timer callback */
static void fin_timeout(int sd, short args, void *cbdata)
{
pmix_client_timeout_t *tev;
tev = (pmix_client_timeout_t*)cbdata;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:client finwait timeout fired");
if (tev->active) {
tev->active = false;
PMIX_WAKEUP_THREAD(&tev->lock);
}
}
/* callback for finalize completion */
static void finwait_cbfunc(struct pmix_peer_t *pr,
pmix_ptl_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
pmix_client_timeout_t *tev;
tev = (pmix_client_timeout_t*)cbdata;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:client finwait_cbfunc received");
if (tev->active) {
tev->active = false;
PMIX_WAKEUP_THREAD(&tev->lock);
}
}
PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo)
{
pmix_buffer_t *msg;
pmix_cmd_t cmd = PMIX_FINALIZE_CMD;
pmix_status_t rc;
size_t n;
volatile bool active;
pmix_client_timeout_t tev;
struct timeval tv = {2, 0};
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (1 != pmix_globals.init_cntr) {
--pmix_globals.init_cntr;
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS;
}
pmix_globals.init_cntr = 0;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:client finalize called");
"%s:%d pmix:client finalize called",
pmix_globals.myid.nspace, pmix_globals.myid.rank);
/* mark that I called finalize */
pmix_globals.mypeer->finalized = true;
if ( 0 <= pmix_client_globals.myserver.sd ) {
if ( 0 <= pmix_client_globals.myserver->sd ) {
/* check to see if we are supposed to execute a
* blocking fence prior to actually finalizing */
if (NULL != info && 0 < ninfo) {
@ -544,7 +572,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo)
}
}
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
/* setup a cmd message to notify the PMIx
* server that we are normally terminating */
@ -558,22 +585,33 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo)
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:client sending finalize sync to server");
"%s:%d pmix:client sending finalize sync to server",
pmix_globals.myid.nspace, pmix_globals.myid.rank);
/* setup a timer to protect ourselves should the server be unable
* to answer for some reason */
PMIX_CONSTRUCT_LOCK(&tev.lock);
pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0,
fin_timeout, &tev);
tev.active = true;
PMIX_POST_OBJECT(&tev);
pmix_event_add(&tev.ev, &tv);
/* send to the server */
active = true;;
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg,
wait_cbfunc, (void*)&active))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg,
finwait_cbfunc, (void*)&tev))){
return rc;
}
/* wait for the ack to return */
PMIX_WAIT_FOR_COMPLETION(active);
PMIX_WAIT_THREAD(&tev.lock);
PMIX_DESTRUCT_LOCK(&tev.lock);
if (tev.active) {
pmix_event_del(&tev.ev);
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:client finalize sync received");
}
else {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
"%s:%d pmix:client finalize sync received",
pmix_globals.myid.nspace, pmix_globals.myid.rank);
}
if (!pmix_globals.external_evbase) {
@ -584,8 +622,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo)
(void)pmix_progress_thread_pause(NULL);
}
PMIX_DESTRUCT(&pmix_client_globals.myserver);
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
if (0 > (rc = pmix_dstore_nspace_del(pmix_globals.myid.nspace))) {
PMIX_ERROR_LOG(rc);
@ -595,11 +631,16 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo)
PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests);
if (0 <= pmix_client_globals.myserver.sd) {
CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd);
if (0 <= pmix_client_globals.myserver->sd) {
CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd);
}
if (NULL != pmix_client_globals.myserver) {
PMIX_RELEASE(pmix_client_globals.myserver);
}
pmix_rte_finalize();
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS;
}
@ -610,23 +651,23 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[],
pmix_buffer_t *bfr;
pmix_cmd_t cmd = PMIX_ABORT_CMD;
pmix_status_t rc;
volatile bool active;
pmix_lock_t reglock;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:client abort called");
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a buffer to hold the message */
bfr = PMIX_NEW(pmix_buffer_t);
@ -664,14 +705,15 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[],
}
/* send to the server */
active = true;
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, bfr,
wait_cbfunc, (void*)&active))){
PMIX_CONSTRUCT_LOCK(&reglock);
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, bfr,
wait_cbfunc, (void*)&reglock))){
return rc;
}
/* wait for the release */
PMIX_WAIT_FOR_COMPLETION(active);
PMIX_WAIT_THREAD(&reglock);
PMIX_DESTRUCT_LOCK(&reglock);
return PMIX_SUCCESS;
}
@ -766,7 +808,7 @@ static void _putfn(int sd, short args, void *cbdata)
cb->pstatus = rc;
/* post the data so the receiving thread can acquire it */
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}
PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val)
@ -778,16 +820,15 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va
"pmix: executing put for key %s type %d",
key, val->type);
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
cb->scope = scope;
cb->key = (char*)key;
cb->value = val;
@ -796,7 +837,7 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va
PMIX_THREADSHIFT(cb, _putfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->pstatus;
PMIX_RELEASE(cb);
@ -854,8 +895,8 @@ static void _commitfn(int sd, short args, void *cbdata)
/* always send, even if we have nothing to contribute, so the server knows
* that we contributed whatever we had */
if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msgout,
wait_cbfunc, (void*)&cb->active))){
if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msgout,
wait_cbfunc, (void*)&cb->lock))){
cb->pstatus = PMIX_SUCCESS;
return;
}
@ -864,7 +905,7 @@ static void _commitfn(int sd, short args, void *cbdata)
cb->pstatus = rc;
/* post the data so the receiving thread can acquire it */
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}
PMIX_EXPORT pmix_status_t PMIx_Commit(void)
@ -872,32 +913,30 @@ static void _commitfn(int sd, short args, void *cbdata)
pmix_cb_t *cb;
pmix_status_t rc;
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we are a server, or we aren't connected, don't attempt to send */
if (PMIX_PROC_SERVER == pmix_globals.proc_type) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS; // not an error
}
if (!pmix_globals.connected) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
/* pass this into the event library for thread protection */
PMIX_THREADSHIFT(cb, _commitfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->pstatus;
PMIX_RELEASE(cb);
@ -974,7 +1013,7 @@ static void _peersfn(int sd, short args, void *cbdata)
cb->pstatus = rc;
/* post the data so the receiving thread can acquire it */
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}
PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
@ -984,16 +1023,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
pmix_cb_t *cb;
pmix_status_t rc;
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
cb->key = (char*)nodename;
if (NULL != nspace) {
(void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN);
@ -1003,7 +1041,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
PMIX_THREADSHIFT(cb, _peersfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->pstatus;
/* transfer the result */
*procs = cb->procs;
@ -1047,7 +1085,7 @@ static void _nodesfn(int sd, short args, void *cbdata)
cb->pstatus = rc;
/* post the data so the receiving thread can acquire it */
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}
PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist)
@ -1055,16 +1093,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist
pmix_cb_t *cb;
pmix_status_t rc;
pmix_mutex_lock(&pmix_client_bootstrap_mutex);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
pmix_mutex_unlock(&pmix_client_bootstrap_mutex);
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
if (NULL != nspace) {
(void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN);
}
@ -1073,7 +1110,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist
PMIX_THREADSHIFT(cb, _nodesfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->pstatus;
*nodelist = cb->key;
PMIX_RELEASE(cb);

Просмотреть файл

@ -70,23 +70,27 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs,
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: connect called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
* the return message is recvd */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = PMIx_Connect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) {
@ -95,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs,
}
/* wait for the connect to complete */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->status;
PMIX_RELEASE(cb);
@ -114,17 +118,22 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: connect called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* check for bozo input */
if (NULL == procs || 0 >= nprocs) {
@ -170,7 +179,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro
cb->cbdata = cbdata;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -179,25 +188,28 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro
}
PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs,
const pmix_info_t info[], size_t ninfo)
const pmix_info_t info[], size_t ninfo)
{
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
* the return message is recvd */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Disconnect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) {
PMIX_RELEASE(cb);
@ -205,7 +217,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t npro
}
/* wait for the connect to complete */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->status;
PMIX_RELEASE(cb);
@ -224,17 +236,22 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: disconnect called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* check for bozo input */
if (NULL == procs || 0 >= nprocs) {
@ -280,7 +297,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n
cb->cbdata = cbdata;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -347,5 +364,5 @@ static void op_cbfunc(pmix_status_t status, void *cbdata)
cb->status = status;
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -66,28 +66,32 @@ static void wait_cbfunc(struct pmix_peer_t *pr,
static void op_cbfunc(pmix_status_t status, void *cbdata);
PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
const pmix_info_t info[], size_t ninfo)
const pmix_info_t info[], size_t ninfo)
{
pmix_cb_t *cb;
pmix_status_t rc;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: executing fence");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
* the return message is recvd */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(procs, nprocs, info, ninfo,
@ -97,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
}
/* wait for the fence to complete */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->status;
PMIX_RELEASE(cb);
@ -108,8 +112,8 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
}
PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_buffer_t *msg;
pmix_cmd_t cmd = PMIX_FENCENB_CMD;
@ -118,17 +122,22 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs
pmix_proc_t rg, *rgs;
size_t nrg;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: fence_nb called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* check for bozo input */
if (NULL == procs && 0 != nprocs) {
@ -160,7 +169,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs
cb->cbdata = cbdata;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -252,6 +261,5 @@ static void op_cbfunc(pmix_status_t status, void *cbdata)
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
cb->status = status;
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}

Просмотреть файл

@ -87,22 +87,25 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[],
pmix_cb_t *cb;
pmix_status_t rc;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
* the return message is recvd */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, _value_cbfunc, cb))) {
PMIX_RELEASE(cb);
return rc;
}
/* wait for the data to return */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->status;
*val = cb->value;
PMIX_RELEASE(cb);
@ -121,9 +124,13 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
int rank;
char *nm;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* if the proc is NULL, then the caller is assuming
* that the key is universally unique within the caller's
@ -169,7 +176,6 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
/* thread-shift so we can check global objects */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
(void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN);
cb->rank = rank;
cb->key = (char*)key;
@ -195,12 +201,12 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata)
}
}
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}
static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank,
const pmix_info_t info[], size_t ninfo,
pmix_cmd_t cmd)
const pmix_info_t info[], size_t ninfo,
pmix_cmd_t cmd)
{
pmix_buffer_t *msg;
pmix_status_t rc;
@ -620,8 +626,8 @@ static void _getnbfn(int fd, short flags, void *cbdata)
rc = pmix_dstore_fetch(cb->nspace, cb->rank, cb->key, &val);
#endif
if( PMIX_SUCCESS != rc && !my_nspace ){
/* we are asking about the job-level info from other
* namespace. It seems tha we don't have it - go and
/* we are asking about the job-level info from another
* namespace. It seems that we don't have it - go and
* ask server
*/
goto request;
@ -687,12 +693,12 @@ static void _getnbfn(int fd, short flags, void *cbdata)
goto respond;
}
request:
request:
/* if we got here, then we don't have the data for this proc. If we
* are a server, or we are a client and not connected, then there is
* nothing more we can do */
if (PMIX_PROC_SERVER == pmix_globals.proc_type ||
(PMIX_PROC_SERVER != pmix_globals.proc_type && !pmix_globals.connected)) {
if (PMIX_PROC_IS_SERVER ||
(!PMIX_PROC_IS_SERVER && !pmix_globals.connected)) {
rc = PMIX_ERR_NOT_FOUND;
goto respond;
}
@ -700,13 +706,14 @@ request:
/* we also have to check the user's directives to see if they do not want
* us to attempt to retrieve it from the server */
for (n=0; n < cb->ninfo; n++) {
if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) &&
if ((0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) || (0 == strcmp(cb->info[n].key, PMIX_IMMEDIATE))) &&
(PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) {
/* they don't want us to try and retrieve it */
pmix_output_verbose(2, pmix_globals.debug_output,
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",
cb->key, cb->rank, cb->nspace);
rc = PMIX_ERR_NOT_FOUND;
val = NULL;
goto respond;
}
}
@ -740,7 +747,7 @@ request:
/* track the callback object */
pmix_list_append(&pmix_client_globals.pending_requests, &cb->super);
/* send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){
pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super);
rc = PMIX_ERROR;
goto respond;
@ -775,5 +782,4 @@ request:
}
PMIX_RELEASE(cb);
return;
}

Просмотреть файл

@ -20,14 +20,12 @@
BEGIN_C_DECLS
typedef struct {
pmix_peer_t myserver; // messaging support to/from my server
pmix_peer_t *myserver; // messaging support to/from my server
pmix_list_t pending_requests; // list of pmix_cb_t pending data requests
} pmix_client_globals_t;
PMIX_EXPORT extern pmix_client_globals_t pmix_client_globals;
PMIX_EXPORT extern pmix_mutex_t pmix_client_bootstrap_mutex;
END_C_DECLS
#endif /* PMIX_CLIENT_OPS_H */

Просмотреть файл

@ -72,21 +72,25 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[],
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: publish called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object to let us know when it is done */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) {
PMIX_ERROR_LOG(rc);
@ -95,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[],
}
/* wait for the server to ack our request */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = (pmix_status_t)cb->status;
PMIX_RELEASE(cb);
@ -110,17 +114,22 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: publish called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* check for bozo cases */
if (NULL == info) {
@ -163,10 +172,9 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo
cb = PMIX_NEW(pmix_cb_t);
cb->op_cbfunc = cbfunc;
cb->cbdata = cbdata;
cb->active = true;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -182,9 +190,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata,
char **keys = NULL;
size_t i;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: lookup called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* bozo protection */
if (NULL == pdata) {
return PMIX_ERR_BAD_PARAM;
@ -203,7 +225,6 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata,
cb = PMIX_NEW(pmix_cb_t);
cb->cbdata = (void*)pdata;
cb->nvals = ndata;
cb->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(keys, info, ninfo,
lookup_cbfunc, cb))) {
@ -213,7 +234,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata,
}
/* wait for the server to ack our request */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
/* the data has been stored in the info array by lookup_cbfunc, so
* nothing more for us to do */
@ -232,13 +253,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys,
pmix_cb_t *cb;
size_t nkeys, n;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: lookup called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* check for bozo cases */
if (NULL == keys) {
return PMIX_ERR_BAD_PARAM;
@ -296,7 +327,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys,
cb->cbdata = cbdata;
/* send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -311,14 +342,27 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys,
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: unpublish called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
* the return message is recvd */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) {
@ -327,7 +371,7 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys,
}
/* wait for the server to ack our request */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->status;
PMIX_RELEASE(cb);
@ -344,13 +388,23 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys,
pmix_cb_t *cb;
size_t i, j;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: unpublish called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* create the unpublish cmd */
msg = PMIX_NEW(pmix_buffer_t);
/* pack the cmd */
@ -399,10 +453,9 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys,
cb = PMIX_NEW(pmix_cb_t);
cb->op_cbfunc = cbfunc;
cb->cbdata = cbdata;
cb->active = true;
/* send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -442,7 +495,7 @@ static void op_cbfunc(pmix_status_t status, void *cbdata)
cb->status = status;
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}
static void wait_lookup_cbfunc(struct pmix_peer_t *pr,
@ -539,5 +592,5 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda
}
}
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}

Просмотреть файл

@ -69,17 +69,23 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo,
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: spawn called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* ensure the nspace (if provided) is initialized */
if (NULL != nspace) {
@ -88,7 +94,6 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo,
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Spawn_nb(job_info, ninfo, apps, napps, spawn_cbfunc, cb))) {
PMIX_RELEASE(cb);
@ -96,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo,
}
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
PMIX_WAIT_THREAD(&cb->lock);
rc = cb->status;
if (NULL != nspace) {
(void)strncpy(nspace, cb->nspace, PMIX_MAX_NSLEN);
@ -115,17 +120,22 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin
pmix_status_t rc;
pmix_cb_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: spawn called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
msg = PMIX_NEW(pmix_buffer_t);
/* pack the cmd */
@ -171,7 +181,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin
cb->cbdata = cbdata;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -242,5 +252,5 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata)
(void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN);
}
PMIX_POST_OBJECT(cb);
cb->active = false;
PMIX_WAKEUP_THREAD(&cb->lock);
}

Просмотреть файл

@ -21,6 +21,7 @@
#include <pmix_server.h>
#include <pmix_rename.h>
#include "src/threads/threads.h"
#include "src/util/argv.h"
#include "src/util/error.h"
#include "src/util/output.h"
@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_
pmix_status_t rc;
pmix_query_caddy_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: job control called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* if we are the server, then we just issue the request and
* return the response */
if (PMIX_PROC_SERVER == pmix_globals.proc_type) {
@ -125,12 +136,6 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_
}
/* if we are a client, then relay this request to the server */
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
return PMIX_ERR_UNREACH;
}
msg = PMIX_NEW(pmix_buffer_t);
/* pack the cmd */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) {
@ -178,7 +183,7 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_
cb->cbdata = cbdata;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}
@ -195,13 +200,23 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm
pmix_status_t rc;
pmix_query_caddy_t *cb;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: monitor called");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* if we are the server, then we just issue the request and
* return the response */
if (PMIX_PROC_SERVER == pmix_globals.proc_type) {
@ -217,12 +232,6 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm
}
/* if we are a client, then relay this request to the server */
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
return PMIX_ERR_UNREACH;
}
msg = PMIX_NEW(pmix_buffer_t);
/* pack the cmd */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) {
@ -267,7 +276,7 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm
cb->cbdata = cbdata;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -21,6 +21,7 @@
#include <pmix_server.h>
#include <pmix_rename.h>
#include "src/threads/threads.h"
#include "src/util/argv.h"
#include "src/util/error.h"
#include "src/util/output.h"
@ -61,13 +62,23 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata,
pmix_buffer_t *msg;
pmix_status_t rc;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:log non-blocking");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (0 == ndata || NULL == data) {
return PMIX_ERR_BAD_PARAM;
}
@ -126,7 +137,7 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata,
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query sending to server");
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){
PMIX_RELEASE(cd);
}
}

Просмотреть файл

@ -21,6 +21,7 @@
#include <pmix_server.h>
#include <pmix_rename.h>
#include "src/threads/threads.h"
#include "src/util/argv.h"
#include "src/util/error.h"
#include "src/util/output.h"
@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque
pmix_buffer_t *msg;
pmix_status_t rc;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query non-blocking");
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (0 == nqueries || NULL == queries) {
return PMIX_ERR_BAD_PARAM;
}
@ -127,12 +138,6 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque
rc = PMIX_SUCCESS;
} else {
/* if we are a client, then relay this request to the server */
/* if we aren't connected, don't attempt to send */
if (!pmix_globals.connected) {
return PMIX_ERR_UNREACH;
}
cd = PMIX_NEW(pmix_query_caddy_t);
cd->cbfunc = cbfunc;
cd->cbdata = cbdata;
@ -157,7 +162,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query sending to server");
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){
PMIX_RELEASE(cd);
}
}
@ -240,7 +245,7 @@ PMIX_EXPORT pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t dire
cb->cbdata = cbdata;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){
PMIX_RELEASE(msg);
PMIX_RELEASE(cb);
}

Просмотреть файл

@ -165,6 +165,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg);
ch->timer_active = true; \
pmix_event_assign(&ch->ev, pmix_globals.evbase, -1, 0, \
pmix_event_timeout_cb, ch); \
PMIX_POST_OBJECT(ch); \
pmix_event_add(&ch->ev, &pmix_globals.event_window); \
} else { \
/* add this peer to the array of sources */ \
@ -183,6 +184,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg);
ch->ninfo = ninfo; \
/* reset the timer */ \
pmix_event_del(&ch->ev); \
PMIX_POST_OBJECT(ch); \
pmix_event_add(&ch->ev, &pmix_globals.event_window); \
} \
} while(0)

Просмотреть файл

@ -18,6 +18,7 @@
#include <pmix_server.h>
#include <pmix_rename.h>
#include "src/threads/threads.h"
#include "src/util/error.h"
#include "src/util/output.h"
@ -44,7 +45,22 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status,
{
int rc;
if (PMIX_PROC_SERVER == pmix_globals.proc_type) {
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* if we aren't connected, don't attempt to send */
if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_UNREACH;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (PMIX_PROC_IS_SERVER) {
rc = pmix_server_notify_client_of_event(status, source, range,
info, ninfo,
cbfunc, cbdata);
@ -103,10 +119,6 @@ static pmix_status_t notify_server_of_event(pmix_status_t status,
pmix_globals.myid.nspace, pmix_globals.myid.rank,
PMIx_Error_string(status));
if (!pmix_globals.connected) {
return PMIX_ERR_UNREACH;
}
if (PMIX_RANGE_PROC_LOCAL != range) {
/* create the msg object */
msg = PMIX_NEW(pmix_buffer_t);
@ -175,12 +187,11 @@ static pmix_status_t notify_server_of_event(pmix_status_t status,
cd->source.rank = source->rank;
}
cd->range = range;
/* check for directives */
if (NULL != info) {
if (0 < chain->ninfo) {
cd->ninfo = chain->ninfo;
PMIX_INFO_CREATE(cd->info, cd->ninfo);
for (n=0; n < chain->ninfo; n++) {
/* need to copy the info */
for (n=0; n < cd->ninfo; n++) {
PMIX_INFO_XFER(&cd->info[n], &chain->info[n]);
if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
cd->nondefault = true;
@ -205,6 +216,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status,
}
}
}
/* add to our cache */
rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd);
/* if an older event was bumped, release it */
@ -225,7 +237,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status,
pmix_output_verbose(2, pmix_globals.debug_output,
"client: notifying server %s:%d - sending",
pmix_globals.myid.nspace, pmix_globals.myid.rank);
rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, notify_event_cbfunc, cb);
rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, notify_event_cbfunc, cb);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(cb);
@ -288,6 +300,7 @@ static void progress_local_event_hdlr(pmix_status_t status,
++cnt;
}
}
/* save this handler's returned status */
if (NULL != chain->evhdlr->name) {
(void)strncpy(newinfo[cnt].key, chain->evhdlr->name, PMIX_MAX_KEYLEN);

Просмотреть файл

@ -16,6 +16,7 @@
#include <pmix_server.h>
#include <pmix_rename.h>
#include "src/threads/threads.h"
#include "src/util/error.h"
#include "src/util/output.h"
@ -67,6 +68,7 @@ PMIX_CLASS_INSTANCE(pmix_rshift_caddy_t,
pmix_object_t,
rscon, rsdes);
static void check_cached_events(pmix_rshift_caddy_t *cd);
static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
@ -107,6 +109,11 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr,
if (NULL != cd && NULL != cd->evregcbfn) {
cd->evregcbfn(ret, index, cd->cbdata);
}
if (NULL != cd) {
/* check this event against anything in our cache */
check_cached_events(cd);
}
/* release any info we brought along as they are
* internally generated and not provided by the caller */
if (NULL!= rb->info) {
@ -148,6 +155,7 @@ static void reg_cbfunc(pmix_status_t status, void *cbdata)
/* pass back our local index */
cd->evregcbfn(rc, index, cd->cbdata);
}
/* release any info we brought along as they are
* internally generated and not provided by the caller */
if (NULL!= rb->info) {
@ -197,7 +205,7 @@ static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd)
return rc;
}
}
rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, regevents_cbfunc, rcd);
rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, regevents_cbfunc, rcd);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
@ -280,7 +288,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer)
/* if we are a client, and we haven't already registered a handler of this
* type with our server, or if we have directives, then we need to notify
* the server */
if (PMIX_PROC_SERVER != pmix_globals.proc_type &&
if (!PMIX_PROC_IS_SERVER &&
(need_register || 0 < pmix_list_get_size(xfer))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: _add_hdlr sending to server");
@ -301,7 +309,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer)
/* if we are a server and are registering for events, then we only contact
* our host if we want environmental events */
if (PMIX_PROC_SERVER == pmix_globals.proc_type && cd->enviro &&
if (PMIX_PROC_IS_SERVER && cd->enviro &&
NULL != pmix_host_server.register_events) {
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: _add_hdlr registering with server");
@ -325,24 +333,91 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer)
return PMIX_SUCCESS;
}
static void check_cached_events(pmix_rshift_caddy_t *cd)
{
size_t i, n;
pmix_notify_caddy_t *ncd;
bool found, matched;
pmix_event_chain_t *chain;
for (i=0; i < (size_t)pmix_globals.notifications.size; i++) {
if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) {
continue;
}
found = false;
if (NULL == cd->codes) {
/* they registered a default event handler - always matches */
found = true;
} else {
for (n=0; n < cd->ncodes; n++) {
if (cd->codes[n] == ncd->status) {
found = true;
break;
}
}
}
if (found) {
/* if we were given specific targets, check if we are one */
if (NULL != ncd->targets) {
matched = false;
for (n=0; n < ncd->ntargets; n++) {
if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) {
continue;
}
if (PMIX_RANK_WILDCARD == ncd->targets[n].rank ||
pmix_globals.myid.rank == ncd->targets[n].rank) {
matched = true;
break;
}
}
if (!matched) {
/* do not notify this one */
continue;
}
}
/* all matches - notify */
chain = PMIX_NEW(pmix_event_chain_t);
chain->status = ncd->status;
(void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
chain->source.rank = pmix_globals.myid.rank;
/* we already left space for evhandler name plus
* a callback object when we cached the notification */
chain->ninfo = ncd->ninfo;
PMIX_INFO_CREATE(chain->info, chain->ninfo);
if (0 < cd->ninfo) {
/* need to copy the info */
for (n=0; n < ncd->ninfo; n++) {
PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]);
if (0 == strncmp(chain->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
chain->nondefault = true;
}
}
}
/* we don't want this chain to propagate, so indicate it
* should only be run as a single-shot */
chain->endchain = true;
/* now notify any matching registered callbacks we have */
pmix_invoke_local_event_hdlr(chain);
}
}
}
static void reg_event_hdlr(int sd, short args, void *cbdata)
{
pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)cbdata;
size_t index = 0, n, i;
size_t index = 0, n;
pmix_status_t rc;
pmix_event_hdlr_t *evhdlr, *ev;
uint8_t location = PMIX_EVENT_ORDER_NONE;
char *name = NULL, *locator = NULL;
bool firstoverall=false, lastoverall=false;
bool found, matched;
bool found;
pmix_list_t xfer;
pmix_info_caddy_t *ixfer;
void *cbobject = NULL;
pmix_data_range_t range = PMIX_RANGE_UNDEF;
pmix_proc_t *parray = NULL;
size_t nprocs;
pmix_notify_caddy_t *ncd;
pmix_event_chain_t *chain;
/* need to acquire the object from its originating thread */
PMIX_ACQUIRE_OBJECT(cd);
@ -680,63 +755,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
}
/* check if any matching notifications have been cached */
for (i=0; i < (size_t)pmix_globals.notifications.size; i++) {
if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) {
break;
}
found = false;
if (NULL == cd->codes) {
/* they registered a default event handler - always matches */
found = true;
} else {
for (n=0; n < cd->ncodes; n++) {
if (cd->codes[n] == ncd->status) {
found = true;
break;
}
}
}
if (found) {
/* if we were given specific targets, check if we are one */
if (NULL != ncd->targets) {
matched = false;
for (n=0; n < ncd->ntargets; n++) {
if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) {
continue;
}
if (PMIX_RANK_WILDCARD == ncd->targets[n].rank ||
pmix_globals.myid.rank == ncd->targets[n].rank) {
matched = true;
break;
}
}
if (!matched) {
/* do not notify this one */
continue;
}
}
/* all matches - notify */
chain = PMIX_NEW(pmix_event_chain_t);
chain->status = ncd->status;
(void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
chain->source.rank = pmix_globals.myid.rank;
/* we already left space for evhandler name plus
* a callback object when we cached the notification */
chain->ninfo = ncd->ninfo;
PMIX_INFO_CREATE(chain->info, chain->ninfo);
if (0 < cd->ninfo) {
/* need to copy the info */
for (n=0; n < ncd->ninfo; n++) {
PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]);
}
}
/* we don't want this chain to propagate, so indicate it
* should only be run as a single-shot */
chain->endchain = true;
/* now notify any matching registered callbacks we have */
pmix_invoke_local_event_hdlr(chain);
}
}
check_cached_events(cd);
/* all done */
PMIX_RELEASE(cd);
@ -750,6 +769,17 @@ PMIX_EXPORT void PMIx_Register_event_handler(pmix_status_t codes[], size_t ncode
{
pmix_rshift_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (NULL != cbfunc) {
cbfunc(PMIX_ERR_INIT, 0, cbdata);
}
return;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* need to thread shift this request so we can access
* our global data to register this *local* event handler */
cd = PMIX_NEW(pmix_rshift_caddy_t);
@ -783,7 +813,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata)
/* if I am not the server, then I need to notify the server
* to remove my registration */
if (PMIX_PROC_SERVER != pmix_globals.proc_type) {
if (!PMIX_PROC_IS_SERVER) {
msg = PMIX_NEW(pmix_buffer_t);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) {
PMIX_RELEASE(msg);
@ -928,7 +958,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata)
report:
if (NULL != msg) {
/* send to the server */
rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, NULL, NULL);
rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, NULL, NULL);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
}
@ -948,6 +978,16 @@ PMIX_EXPORT void PMIx_Deregister_event_handler(size_t event_hdlr_ref,
{
pmix_shift_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (NULL != cbfunc) {
cbfunc(PMIX_ERR_INIT, cbdata);
}
return;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* need to thread shift this request */
cd = PMIX_NEW(pmix_shift_caddy_t);
cd->cbfunc.opcbfn = cbfunc;

Просмотреть файл

@ -40,10 +40,17 @@
#include "src/buffer_ops/types.h"
#include "src/class/pmix_hash_table.h"
#include "src/class/pmix_list.h"
#include "src/threads/threads.h"
pmix_lock_t pmix_global_lock = {
.mutex = PMIX_MUTEX_STATIC_INIT,
.cond = PMIX_CONDITION_STATIC_INIT,
.active = false
};
static void cbcon(pmix_cb_t *p)
{
p->active = false;
PMIX_CONSTRUCT_LOCK(&p->lock);
p->checked = false;
PMIX_CONSTRUCT(&p->data, pmix_buffer_t);
p->cbfunc = NULL;
@ -63,6 +70,7 @@ static void cbcon(pmix_cb_t *p)
}
static void cbdes(pmix_cb_t *p)
{
PMIX_DESTRUCT_LOCK(&p->lock);
PMIX_DESTRUCT(&p->data);
}
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t,
@ -220,7 +228,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_rank_info_t,
static void scon(pmix_shift_caddy_t *p)
{
p->active = false;
PMIX_CONSTRUCT_LOCK(&p->lock);
p->codes = NULL;
p->ncodes = 0;
p->nspace = NULL;
@ -242,6 +250,7 @@ static void scon(pmix_shift_caddy_t *p)
}
static void scdes(pmix_shift_caddy_t *p)
{
PMIX_DESTRUCT_LOCK(&p->lock);
if (NULL != p->kv) {
PMIX_RELEASE(p->kv);
}
@ -256,6 +265,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_info_caddy_t,
static void qcon(pmix_query_caddy_t *p)
{
PMIX_CONSTRUCT_LOCK(&p->lock);
p->queries = NULL;
p->nqueries = 0;
p->targets = NULL;
@ -266,9 +276,13 @@ static void qcon(pmix_query_caddy_t *p)
p->cbdata = NULL;
p->relcbfunc = NULL;
}
PMIX_CLASS_INSTANCE(pmix_query_caddy_t,
pmix_object_t,
qcon, NULL);
static void qdes(pmix_query_caddy_t *p)
{
PMIX_DESTRUCT_LOCK(&p->lock);
}
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t,
pmix_object_t,
qcon, qdes);
static void jdcon(pmix_job_data_caddy_t *p)
{

Просмотреть файл

@ -214,7 +214,7 @@ PMIX_CLASS_DECLARATION(pmix_server_caddy_t);
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_lock_t lock;
pmix_status_t status;
pmix_query_t *queries;
size_t nqueries;
@ -234,7 +234,7 @@ typedef struct {
pmix_cmd_t type;
pmix_proc_t *pcs; // copy of the original array of participants
size_t npcs; // number of procs in the array
volatile bool active; // flag for waiting for completion
pmix_lock_t lock; // flag for waiting for completion
bool def_complete; // all local procs have been registered and the trk definition is complete
pmix_list_t ranks; // list of pmix_rank_info_t of the local participants
pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants
@ -271,7 +271,7 @@ PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t);
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_lock_t lock;
pmix_status_t status;
pmix_status_t *codes;
size_t ncodes;
@ -305,7 +305,7 @@ PMIX_CLASS_DECLARATION(pmix_shift_caddy_t);
typedef struct {
pmix_list_item_t super;
pmix_event_t ev;
volatile bool active;
pmix_lock_t lock;
bool checked;
int status;
pmix_status_t pstatus;
@ -340,7 +340,6 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t);
#define PMIX_THREADSHIFT(r, c) \
do { \
(r)->active = true; \
pmix_event_assign(&((r)->ev), pmix_globals.evbase, \
-1, EV_WRITE, (c), (r)); \
PMIX_POST_OBJECT((r)); \
@ -356,6 +355,24 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t);
PMIX_ACQUIRE_OBJECT((a)); \
} while (0)
typedef struct {
pmix_object_t super;
pmix_event_t ev;
pmix_lock_t lock;
pmix_status_t status;
pmix_proc_t source;
pmix_data_range_t range;
pmix_proc_t *targets;
size_t ntargets;
bool nondefault;
pmix_info_t *info;
size_t ninfo;
pmix_buffer_t *buf;
pmix_op_cbfunc_t cbfunc;
void *cbdata;
} pmix_notify_caddy_t;
PMIX_CLASS_DECLARATION(pmix_notify_caddy_t);
/**** GLOBAL STORAGE ****/
/* define a global construct that includes values that must be shared
@ -384,6 +401,7 @@ typedef struct {
PMIX_EXPORT extern pmix_globals_t pmix_globals;
PMIX_EXPORT extern pmix_lock_t pmix_global_lock;
END_C_DECLS

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -20,6 +20,8 @@
#ifndef PMIX_PREFETCH_H
#define PMIX_PREFETCH_H
#include <src/include/pmix_config.h>
#if PMIX_C_HAVE_BUILTIN_EXPECT
#define PMIX_LIKELY(expression) __builtin_expect(!!(expression), 1)
#define PMIX_UNLIKELY(expression) __builtin_expect(!!(expression), 0)

Просмотреть файл

@ -82,9 +82,11 @@ static pmix_status_t pmix_ptl_close(void)
/* ensure the listen thread has been shut down */
pmix_ptl.stop_listening();
if (0 <= pmix_client_globals.myserver.sd) {
CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd);
pmix_client_globals.myserver.sd = -1;
if (NULL != pmix_client_globals.myserver) {
if (0 <= pmix_client_globals.myserver->sd) {
CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd);
pmix_client_globals.myserver->sd = -1;
}
}
/* the components will cleanup when closed */
@ -105,7 +107,6 @@ static pmix_status_t pmix_ptl_open(pmix_mca_base_open_flag_t flags)
PMIX_CONSTRUCT(&pmix_ptl_globals.unexpected_msgs, pmix_list_t);
pmix_ptl_globals.listen_thread_active = false;
PMIX_CONSTRUCT(&pmix_ptl_globals.listeners, pmix_list_t);
pmix_client_globals.myserver.sd = -1;
/* Open up all available components */
return pmix_mca_base_framework_components_open(&pmix_ptl_base_framework, flags);
@ -142,6 +143,7 @@ PMIX_CLASS_INSTANCE(pmix_ptl_send_t,
static void rcon(pmix_ptl_recv_t *p)
{
p->peer = NULL;
memset(&p->hdr, 0, sizeof(pmix_ptl_hdr_t));
p->hdr.tag = UINT32_MAX;
p->hdr.nbytes = 0;
@ -150,9 +152,15 @@ static void rcon(pmix_ptl_recv_t *p)
p->rdptr = NULL;
p->rdbytes = 0;
}
static void rdes(pmix_ptl_recv_t *p)
{
if (NULL != p->peer) {
PMIX_RELEASE(p->peer);
}
}
PMIX_CLASS_INSTANCE(pmix_ptl_recv_t,
pmix_list_item_t,
rcon, NULL);
rcon, rdes);
static void prcon(pmix_ptl_posted_recv_t *p)
{

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -70,7 +70,7 @@ int pmix_ptl_base_select(void)
PMIX_LIST_FOREACH(active, &pmix_ptl_globals.actives, pmix_ptl_base_active_t) {
if (newactive->pri > active->pri) {
pmix_list_insert_pos(&pmix_ptl_globals.actives,
(pmix_list_item_t*)active, &newactive->super);
&active->super, &newactive->super);
inserted = true;
break;
}

Просмотреть файл

@ -173,7 +173,7 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
PMIX_DESTRUCT(&buf);
/* if I called finalize, then don't generate an event */
if (!pmix_globals.mypeer->finalized) {
PMIX_REPORT_EVENT(err, &pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete);
PMIX_REPORT_EVENT(err, pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete);
}
}
}
@ -183,6 +183,7 @@ static pmix_status_t send_msg(int sd, pmix_ptl_send_t *msg)
struct iovec iov[2];
int iov_count;
ssize_t remain = msg->sdbytes, rc;
iov[0].iov_base = msg->sdptr;
iov[0].iov_len = msg->sdbytes;
if (!msg->hdr_sent && NULL != msg->data) {
@ -297,7 +298,7 @@ static pmix_status_t read_bytes(int sd, char **buf, size_t *remain)
ptr += rc;
}
/* we read the full data block */
exit:
exit:
*buf = ptr;
return ret;
}
@ -316,7 +317,8 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata)
PMIX_ACQUIRE_OBJECT(peer);
pmix_output_verbose(2, pmix_globals.debug_output,
"ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg",
"%s:%d ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
peer->info->nptr->nspace, peer->info->rank,
(NULL == msg) ? UINT_MAX : ntohl(msg->hdr.tag),
(NULL == msg) ? "NULL" : "NON-NULL");
@ -335,14 +337,24 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata)
/* exit this event and let the event lib progress */
pmix_output_verbose(2, pmix_globals.debug_output,
"ptl:base:send_handler RES BUSY OR WOULD BLOCK");
/* ensure we post the modified peer object before another thread
* picks it back up */
PMIX_POST_OBJECT(peer);
return;
} else {
pmix_output_verbose(5, pmix_globals.debug_output,
"%s:%d SEND ERROR %s",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
PMIx_Error_string(rc));
// report the error
pmix_event_del(&peer->send_event);
peer->send_ev_active = false;
PMIX_RELEASE(msg);
peer->send_msg = NULL;
lost_connection(peer, rc);
/* ensure we post the modified peer object before another thread
* picks it back up */
PMIX_POST_OBJECT(peer);
return;
}
@ -361,6 +373,9 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata)
pmix_event_del(&peer->send_event);
peer->send_ev_active = false;
}
/* ensure we post the modified peer object before another thread
* picks it back up */
PMIX_POST_OBJECT(peer);
}
/*
@ -381,7 +396,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata)
PMIX_ACQUIRE_OBJECT(peer);
pmix_output_verbose(2, pmix_globals.debug_output,
"ptl:base:recv:handler called with peer %s:%d",
"%s:%d ptl:base:recv:handler called with peer %s:%d",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
(NULL == peer) ? "NULL" : peer->info->nptr->nspace,
(NULL == peer) ? PMIX_RANK_UNDEF : peer->info->rank);
@ -397,6 +413,7 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata)
pmix_output(0, "sptl:base:recv_handler: unable to allocate recv message\n");
goto err_close;
}
PMIX_RETAIN(peer);
peer->recv_msg->peer = peer; // provide a handle back to the peer object
/* start by reading the header */
peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
@ -430,6 +447,11 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata)
peer->recv_msg->data = NULL; // make sure
peer->recv_msg->rdptr = NULL;
peer->recv_msg->rdbytes = 0;
/* post it for delivery */
PMIX_ACTIVATE_POST_MSG(peer->recv_msg);
peer->recv_msg = NULL;
PMIX_POST_OBJECT(peer);
return;
} else {
pmix_output_verbose(2, pmix_globals.debug_output,
"ptl:base:recv:handler allocate data region of size %lu",
@ -451,7 +473,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata)
* and let the caller know
*/
pmix_output_verbose(2, pmix_globals.debug_output,
"ptl:base:msg_recv: peer closed connection");
"ptl:base:msg_recv: peer %s:%d closed connection",
peer->info->nptr->nspace, peer->info->rank);
goto err_close;
}
}
@ -464,29 +487,39 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata)
if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) {
/* we recvd all of the message */
pmix_output_verbose(2, pmix_globals.debug_output,
"RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d",
"%s:%d RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
(int)peer->recv_msg->hdr.nbytes,
peer->recv_msg->hdr.tag, peer->sd);
/* post it for delivery */
PMIX_ACTIVATE_POST_MSG(peer->recv_msg);
peer->recv_msg = NULL;
/* ensure we post the modified peer object before another thread
* picks it back up */
PMIX_POST_OBJECT(peer);
return;
} else if (PMIX_ERR_RESOURCE_BUSY == rc ||
PMIX_ERR_WOULD_BLOCK == rc) {
/* exit this event and let the event lib progress */
/* ensure we post the modified peer object before another thread
* picks it back up */
PMIX_POST_OBJECT(peer);
return;
} else {
/* the remote peer closed the connection - report that condition
* and let the caller know
*/
pmix_output_verbose(2, pmix_globals.debug_output,
"ptl:base:msg_recv: peer closed connection");
"%s:%d ptl:base:msg_recv: peer %s:%d closed connection",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
peer->info->nptr->nspace, peer->info->rank);
goto err_close;
}
}
/* success */
return;
err_close:
err_close:
/* stop all events */
if (peer->recv_ev_active) {
pmix_event_del(&peer->recv_event);
@ -501,6 +534,9 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata)
peer->recv_msg = NULL;
}
lost_connection(peer, PMIX_ERR_UNREACH);
/* ensure we post the modified peer object before another thread
* picks it back up */
PMIX_POST_OBJECT(peer);
}
void pmix_ptl_base_send(int sd, short args, void *cbdata)
@ -515,6 +551,9 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata)
NULL == queue->peer->info || NULL == queue->peer->info->nptr) {
/* this peer has lost connection */
PMIX_RELEASE(queue);
/* ensure we post the object before another thread
* picks it back up */
PMIX_POST_OBJECT(queue);
return;
}
@ -542,10 +581,12 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata)
}
/* ensure the send event is active */
if (!(queue->peer)->send_ev_active) {
pmix_event_add(&(queue->peer)->send_event, 0);
(queue->peer)->send_ev_active = true;
PMIX_POST_OBJECT(queue->peer);
pmix_event_add(&(queue->peer)->send_event, 0);
}
PMIX_RELEASE(queue);
PMIX_POST_OBJECT(snd);
}
void pmix_ptl_base_send_recv(int fd, short args, void *cbdata)
@ -561,6 +602,9 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata)
if (ms->peer->sd < 0) {
/* this peer's socket has been closed */
PMIX_RELEASE(ms);
/* ensure we post the object before another thread
* picks it back up */
PMIX_POST_OBJECT(NULL);
return;
}
@ -577,6 +621,7 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata)
req->tag = tag;
req->cbfunc = ms->cbfunc;
req->cbdata = ms->cbdata;
pmix_output_verbose(5, pmix_globals.debug_output,
"posting recv on tag %d", req->tag);
/* add it to the list of recvs - we cannot have unexpected messages
@ -606,11 +651,13 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata)
}
/* ensure the send event is active */
if (!ms->peer->send_ev_active) {
pmix_event_add(&ms->peer->send_event, 0);
ms->peer->send_ev_active = true;
PMIX_POST_OBJECT(snd);
pmix_event_add(&ms->peer->send_event, 0);
}
/* cleanup */
PMIX_RELEASE(ms);
PMIX_POST_OBJECT(snd);
}
void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata)
@ -623,7 +670,8 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata)
PMIX_ACQUIRE_OBJECT(msg);
pmix_output_verbose(5, pmix_globals.debug_output,
"message received %d bytes for tag %u on socket %d",
"%s:%d message received %d bytes for tag %u on socket %d",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
(int)msg->hdr.nbytes, msg->hdr.tag, msg->sd);
/* see if we have a waiting recv for this message */
@ -643,7 +691,14 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata)
buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used;
}
msg->data = NULL; // protect the data region
pmix_output_verbose(5, pmix_globals.debug_output,
"%s:%d EXECUTE CALLBACK for tag %u",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
msg->hdr.tag);
rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata);
pmix_output_verbose(5, pmix_globals.debug_output,
"%s:%d CALLBACK COMPLETE",
pmix_globals.myid.nspace, pmix_globals.myid.rank);
PMIX_DESTRUCT(&buf); // free's the msg data
}
/* done with the recv if it is a dynamic tag */
@ -668,4 +723,7 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata)
/* it is possible that someone may post a recv for this message
* at some point, so we have to hold onto it */
pmix_list_append(&pmix_ptl_globals.unexpected_msgs, &msg->super);
/* ensure we post the modified object before another thread
* picks it back up */
PMIX_POST_OBJECT(msg);
}

Просмотреть файл

@ -247,11 +247,11 @@ PMIX_CLASS_DECLARATION(pmix_listener_t);
/* add it to the queue */ \
pmix_list_append(&(p)->send_queue, &snd->super); \
} \
PMIX_POST_OBJECT(snd); \
/* ensure the send event is active */ \
if (!(p)->send_ev_active && 0 <= (p)->sd) { \
pmix_event_add(&(p)->send_event, 0); \
(p)->send_ev_active = true; \
PMIX_POST_OBJECT(snd); \
pmix_event_add(&(p)->send_event, 0); \
} \
} while (0)

Просмотреть файл

@ -123,7 +123,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
/* if I am a client, then we need to look for the appropriate
* connection info in the environment */
if (PMIX_PROC_CLIENT == pmix_globals.proc_type) {
if (PMIX_PROC_IS_CLIENT) {
if (NULL == (evar = getenv("PMIX_SERVER_URI2"))) {
/* not us */
return PMIX_ERR_NOT_SUPPORTED;
@ -149,12 +149,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
}
*p2 = '\0';
++p2;
pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, p, PMIX_MAX_NSLEN);
pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, p, PMIX_MAX_NSLEN);
/* set the server rank */
pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10);
pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10);
/* save the URI, but do not overwrite what we may have received from
* the info-key directives */
@ -163,7 +163,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
}
pmix_argv_free(uri);
} else if (PMIX_PROC_TOOL == pmix_globals.proc_type) {
} else if (PMIX_PROC_IS_TOOL) {
/* if we already have a URI, then look no further */
if (NULL == mca_ptl_tcp_component.super.uri) {
/* we have to discover the connection info,
@ -208,19 +208,16 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
*p2 = '\0';
++p2;
/* set the server nspace */
pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, srvr, PMIX_MAX_NSLEN);
pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10);
pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, srvr, PMIX_MAX_NSLEN);
pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10);
/* now parse the uti itself */
mca_ptl_tcp_component.super.uri = strdup(p);
free(srvr);
}
}
/* mark that we are the active module for this server */
pmix_client_globals.myserver.compat.ptl = &pmix_ptl_tcp_module;
/* setup the path to the daemon rendezvous point */
memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage));
if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) {
@ -285,7 +282,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
PMIX_ERROR_LOG(rc);
return rc;
}
pmix_client_globals.myserver.sd = sd;
pmix_client_globals.myserver->sd = sd;
/* send our identity and any authentication credentials to the server */
if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
@ -310,21 +307,22 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
pmix_ptl_base_set_nonblocking(sd);
/* setup recv event */
pmix_event_assign(&pmix_client_globals.myserver.recv_event,
pmix_event_assign(&pmix_client_globals.myserver->recv_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
pmix_client_globals.myserver->sd,
EV_READ | EV_PERSIST,
pmix_ptl_base_recv_handler, &pmix_client_globals.myserver);
pmix_event_add(&pmix_client_globals.myserver.recv_event, 0);
pmix_client_globals.myserver.recv_ev_active = true;
pmix_ptl_base_recv_handler, pmix_client_globals.myserver);
pmix_client_globals.myserver->recv_ev_active = true;
PMIX_POST_OBJECT(pmix_client_globals.myserver);
pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);
/* setup send event */
pmix_event_assign(&pmix_client_globals.myserver.send_event,
pmix_event_assign(&pmix_client_globals.myserver->send_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
pmix_client_globals.myserver->sd,
EV_WRITE|EV_PERSIST,
pmix_ptl_base_send_handler, &pmix_client_globals.myserver);
pmix_client_globals.myserver.send_ev_active = false;
pmix_ptl_base_send_handler, pmix_client_globals.myserver);
pmix_client_globals.myserver->send_ev_active = false;
return PMIX_SUCCESS;
}
@ -403,7 +401,7 @@ static pmix_status_t send_connect_ack(int sd)
* local PMIx server, if known. Now use that module to
* get a credential, if the security system provides one. Not
* every psec module will do so, thus we must first check */
if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver,
if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver,
PMIX_PROTOCOL_V2, &cred, &len))) {
return rc;
}
@ -551,7 +549,7 @@ static pmix_status_t recv_connect_ack(int sd)
if (PMIX_PROC_IS_CLIENT) {
/* see if they want us to do the handshake */
if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) {
if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) {
return rc;
}
} else if (PMIX_SUCCESS != reply) {
@ -588,16 +586,16 @@ static pmix_status_t recv_connect_ack(int sd)
pmix_globals.myid.rank = 0;
/* get the server's nspace and rank so we can send to it */
pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver.info->nptr->nspace, PMIX_MAX_NSLEN+1);
pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver.info->rank), sizeof(int));
pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t);
pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver->info->nptr->nspace, PMIX_MAX_NSLEN+1);
pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver->info->rank), sizeof(int));
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
pmix_client_globals.myserver.info->nptr->nspace,
pmix_client_globals.myserver.info->rank);
pmix_client_globals.myserver->info->nptr->nspace,
pmix_client_globals.myserver->info->rank);
/* get the returned status from the security handshake */
pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t));
@ -607,7 +605,7 @@ static pmix_status_t recv_connect_ack(int sd)
if (NULL == pmix_psec.client_handshake) {
return PMIX_ERR_HANDSHAKE_FAILED;
}
if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) {
if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) {
return reply;
}
/* if the handshake succeeded, then fall thru to the next step */

Просмотреть файл

@ -116,12 +116,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
}
/* set the server nspace */
pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, uri[0], PMIX_MAX_NSLEN);
pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, uri[0], PMIX_MAX_NSLEN);
/* set the server rank */
pmix_client_globals.myserver.info->rank = strtoull(uri[1], NULL, 10);
pmix_client_globals.myserver->info->rank = strtoull(uri[1], NULL, 10);
/* setup the path to the daemon rendezvous point */
memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage));
@ -141,7 +141,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
PMIX_ERROR_LOG(rc);
return rc;
}
pmix_client_globals.myserver.sd = sd;
pmix_client_globals.myserver->sd = sd;
/* send our identity and any authentication credentials to the server */
if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
@ -164,21 +164,21 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
pmix_ptl_base_set_nonblocking(sd);
/* setup recv event */
pmix_event_assign(&pmix_client_globals.myserver.recv_event,
pmix_event_assign(&pmix_client_globals.myserver->recv_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
pmix_client_globals.myserver->sd,
EV_READ | EV_PERSIST,
pmix_ptl_base_recv_handler, &pmix_client_globals.myserver);
pmix_event_add(&pmix_client_globals.myserver.recv_event, 0);
pmix_client_globals.myserver.recv_ev_active = true;
pmix_event_add(&pmix_client_globals.myserver->recv_event, 0);
pmix_client_globals.myserver->recv_ev_active = true;
/* setup send event */
pmix_event_assign(&pmix_client_globals.myserver.send_event,
pmix_event_assign(&pmix_client_globals.myserver->send_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
pmix_client_globals.myserver->sd,
EV_WRITE|EV_PERSIST,
pmix_ptl_base_send_handler, &pmix_client_globals.myserver);
pmix_client_globals.myserver.send_ev_active = false;
pmix_client_globals.myserver->send_ev_active = false;
return PMIX_SUCCESS;
}
@ -244,7 +244,7 @@ static pmix_status_t send_connect_ack(int sd)
/* get a credential, if the security system provides one. Not
* every SPC will do so, thus we must first check */
if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver,
if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver,
PMIX_PROTOCOL_V1, &cred, &len))) {
return rc;
}
@ -331,7 +331,7 @@ static pmix_status_t recv_connect_ack(int sd)
/* see if they want us to do the handshake */
if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) {
if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) {
return rc;
}
} else if (PMIX_SUCCESS != reply) {

Просмотреть файл

@ -106,7 +106,6 @@ static void* progress_engine(pmix_object_t *obj)
pmix_progress_tracker_t *trk = (pmix_progress_tracker_t*)t->t_arg;
while (trk->ev_active) {
pmix_event_loop(trk->ev_base, PMIX_EVLOOP_ONCE);
}

Просмотреть файл

@ -141,9 +141,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
NULL
};
if (0 < pmix_globals.init_cntr) {
return PMIX_SUCCESS;
}
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server init called");
@ -152,15 +150,18 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
* opens and initializes the required frameworks */
if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_SERVER, info, ninfo, NULL))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
if (0 != (rc = initialize_server_base(module))) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
#endif /* PMIX_ENABLE_DSTORE */
@ -174,7 +175,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) {
pmix_show_help("help-pmix-server.txt", "listener-thread-start", true);
PMIx_server_finalize();
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
@ -204,7 +205,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
kv.key = NULL;
kv.value = NULL;
PMIX_DESTRUCT(&kv);
PMIx_server_finalize();
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
}
@ -220,7 +221,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
/* get our available ptl modules */
ptl_mode = pmix_ptl.get_available_modules();
/* just in case, assign our own default modules */
if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
++pmix_globals.init_cntr;
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS;
}
@ -230,11 +238,19 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void)
int i;
pmix_peer_t *peer;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
if (1 != pmix_globals.init_cntr) {
--pmix_globals.init_cntr;
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS;
}
pmix_globals.init_cntr = 0;
PMIX_RELEASE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server finalize called");
@ -500,6 +516,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n
{
pmix_setup_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
cd = PMIX_NEW(pmix_setup_caddy_t);
(void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN);
cd->nlocalprocs = nlocalprocs;
@ -562,6 +585,16 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[],
"pmix:server deregister nspace %s",
nspace);
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (NULL != cbfunc) {
cbfunc(PMIX_ERR_INIT, cbdata);
}
return;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
cd = PMIX_NEW(pmix_setup_caddy_t);
(void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN);
cd->opcbfunc = cbfunc;
@ -780,6 +813,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc,
{
pmix_setup_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server register client %s:%d",
proc->nspace, proc->rank);
@ -844,6 +884,16 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc,
{
pmix_setup_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
if (NULL != cbfunc) {
cbfunc(PMIX_ERR_INIT, cbdata);
}
return;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server deregister client %s:%d",
proc->nspace, proc->rank);
@ -866,6 +916,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char *
pmix_listener_t *lt;
pmix_status_t rc;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server setup_fork for nspace %s rank %d",
proc->nspace, proc->rank);
@ -944,7 +1001,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
PMIX_RETAIN(cd);
dcd->cd = cd;
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
cd->active = false; // ensure the request doesn't hang
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
return;
}
@ -956,8 +1013,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
/* execute the callback */
cd->cbfunc(PMIX_SUCCESS, data, sz, cd->cbdata);
cd->active = false;
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
return;
}
@ -976,7 +1032,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
PMIX_RETAIN(cd);
dcd->cd = cd;
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
cd->active = false; // ensure the request doesn't hang
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
return;
}
@ -989,7 +1045,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
PMIX_RETAIN(cd);
dcd->cd = cd;
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
cd->active = false; // ensure the request doesn't hang
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
return;
}
@ -1012,7 +1068,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
if (NULL != data) {
free(data);
}
cd->active = false;
PMIX_WAKEUP_THREAD(&cd->lock);
}
PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc,
@ -1021,6 +1077,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc,
{
pmix_setup_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* protect against bozo */
if (NULL == cbfunc || NULL == proc) {
return PMIX_ERR_BAD_PARAM;
@ -1040,7 +1103,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc,
* potential threading issues */
PMIX_THREADSHIFT(cd, _dmodex_req);
PMIX_WAIT_FOR_COMPLETION(cd->active);
PMIX_WAIT_THREAD(&cd->lock);
PMIX_RELEASE(cd);
return PMIX_SUCCESS;
}
@ -1065,7 +1128,9 @@ static void _store_internal(int sd, short args, void *cbdata)
} else {
cd->status = pmix_hash_store(&ns->internal, cd->rank, cd->kv);
}
cd->active = false;
if (cd->lock.active) {
PMIX_WAKEUP_THREAD(&cd->lock);
}
}
PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc,
@ -1074,6 +1139,13 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc,
pmix_shift_caddy_t *cd;
pmix_status_t rc;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* setup to thread shift this request */
cd = PMIX_NEW(pmix_shift_caddy_t);
cd->nspace = proc->nspace;
@ -1089,12 +1161,8 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc,
return rc;
}
if (PMIX_PROC_SERVER == pmix_globals.proc_type) {
PMIX_THREADSHIFT(cd, _store_internal);
PMIX_WAIT_FOR_COMPLETION(cd->active);
} else {
_store_internal(0, 0, cd);
}
PMIX_THREADSHIFT(cd, _store_internal);
PMIX_WAIT_THREAD(&cd->lock);
rc = cd->status;
PMIX_RELEASE(cd);
@ -1116,6 +1184,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_regex(const char *input, char **regexp)
char **regexargs = NULL, *tmp, *tmp2;
char *cptr;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* define the default */
*regexp = NULL;
@ -1341,6 +1416,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_ppn(const char *input, char **regexp)
char *tmp, *tmp2;
char *cptr;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* define the default */
*regexp = NULL;
@ -1523,6 +1605,13 @@ pmix_status_t PMIx_server_setup_application(const char nspace[],
{
pmix_setup_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* need to threadshift this request */
cd = PMIX_NEW(pmix_setup_caddy_t);
if (NULL == cd) {
@ -1567,6 +1656,13 @@ pmix_status_t PMIx_server_setup_local_support(const char nspace[],
{
pmix_setup_caddy_t *cd;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (pmix_globals.init_cntr <= 0) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
/* need to threadshift this request */
cd = PMIX_NEW(pmix_setup_caddy_t);
if (NULL == cd) {
@ -1636,7 +1732,7 @@ static void _spcb(int sd, short args, void *cbdata)
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &cd->status, 1, PMIX_STATUS))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(cd->cd);
cd->active = false;
PMIX_WAKEUP_THREAD(&cd->lock);
return;
}
if (PMIX_SUCCESS == cd->status) {
@ -1666,7 +1762,7 @@ static void _spcb(int sd, short args, void *cbdata)
PMIX_SERVER_QUEUE_REPLY(cd->cd->peer, cd->cd->hdr.tag, reply);
/* cleanup */
PMIX_RELEASE(cd->cd);
cd->active = false;
PMIX_WAKEUP_THREAD(&cd->lock);
}
static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata)
@ -1680,7 +1776,7 @@ static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata)
cd->cd = (pmix_server_caddy_t*)cbdata;;
PMIX_THREADSHIFT(cd, _spcb);
PMIX_WAIT_FOR_COMPLETION(cd->active);
PMIX_WAIT_THREAD(&cd->lock);
PMIX_RELEASE(cd);
}

Просмотреть файл

@ -1675,7 +1675,7 @@ static void tcon(pmix_server_trkr_t *t)
{
t->pcs = NULL;
t->npcs = 0;
t->active = true;
PMIX_CONSTRUCT_LOCK(&t->lock);
t->def_complete = false;
PMIX_CONSTRUCT(&t->ranks, pmix_list_t);
PMIX_CONSTRUCT(&t->local_cbs, pmix_list_t);
@ -1690,6 +1690,7 @@ static void tcon(pmix_server_trkr_t *t)
}
static void tdes(pmix_server_trkr_t *t)
{
PMIX_DESTRUCT_LOCK(&t->lock);
if (NULL != t->pcs) {
free(t->pcs);
}
@ -1725,7 +1726,7 @@ PMIX_CLASS_INSTANCE(pmix_snd_caddy_t,
static void scadcon(pmix_setup_caddy_t *p)
{
memset(&p->proc, 0, sizeof(pmix_proc_t));
p->active = true;
PMIX_CONSTRUCT_LOCK(&p->lock);
p->nspace = NULL;
p->server_object = NULL;
p->nlocalprocs = 0;
@ -1738,6 +1739,7 @@ static void scadcon(pmix_setup_caddy_t *p)
}
static void scaddes(pmix_setup_caddy_t *p)
{
PMIX_DESTRUCT_LOCK(&p->lock);
}
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t,
pmix_object_t,
@ -1745,7 +1747,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t,
static void ncon(pmix_notify_caddy_t *p)
{
p->active = true;
PMIX_CONSTRUCT_LOCK(&p->lock);
memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1);
p->source.rank = PMIX_RANK_UNDEF;
p->range = PMIX_RANGE_UNDEF;
@ -1758,6 +1760,7 @@ static void ncon(pmix_notify_caddy_t *p)
}
static void ndes(pmix_notify_caddy_t *p)
{
PMIX_DESTRUCT_LOCK(&p->lock);
if (NULL != p->info) {
PMIX_INFO_FREE(p->info, p->ninfo);
}

Просмотреть файл

@ -19,6 +19,7 @@
#include <pmix_common.h>
#include <src/class/pmix_ring_buffer.h>
#include <pmix_server.h>
#include "src/threads/threads.h"
#include "src/util/hash.h"
typedef struct {
@ -31,7 +32,7 @@ PMIX_CLASS_DECLARATION(pmix_trkr_caddy_t);
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_lock_t lock;
char *nspace;
pmix_status_t status;
pmix_proc_t proc;
@ -48,24 +49,6 @@ typedef struct {
} pmix_setup_caddy_t;
PMIX_CLASS_DECLARATION(pmix_setup_caddy_t);
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_status_t status;
pmix_proc_t source;
pmix_data_range_t range;
pmix_proc_t *targets;
size_t ntargets;
bool nondefault;
pmix_info_t *info;
size_t ninfo;
pmix_buffer_t *buf;
pmix_op_cbfunc_t cbfunc;
void *cbdata;
} pmix_notify_caddy_t;
PMIX_CLASS_DECLARATION(pmix_notify_caddy_t);
typedef struct {
pmix_list_item_t super;
pmix_setup_caddy_t *cd;

Просмотреть файл

@ -25,7 +25,6 @@
# Source code files
headers += \
threads/condition.h \
threads/mutex.h \
threads/mutex_unix.h \
threads/threads.h \
@ -34,7 +33,6 @@ headers += \
threads/thread_usage.h
libpmix_la_SOURCES += \
threads/condition.c \
threads/mutex.c \
threads/thread.c \
threads/wait_sync.c

Просмотреть файл

@ -1,39 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "pmix_config.h"
#include "src/threads/condition.h"
static void pmix_condition_construct(pmix_condition_t *c)
{
c->c_waiting = 0;
c->c_signaled = 0;
}
static void pmix_condition_destruct(pmix_condition_t *c)
{
}
PMIX_CLASS_INSTANCE(pmix_condition_t,
pmix_object_t,
pmix_condition_construct,
pmix_condition_destruct);

Просмотреть файл

@ -1,78 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef PMIX_CONDITION_SPINLOCK_H
#define PMIX_CONDITION_SPINLOCK_H
#include "pmix_config.h"
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include <time.h>
#include <pthread.h>
#include "src/threads/mutex.h"
BEGIN_C_DECLS
struct pmix_condition_t {
pmix_object_t super;
volatile int c_waiting;
volatile int c_signaled;
};
typedef struct pmix_condition_t pmix_condition_t;
PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_condition_t);
static inline int pmix_condition_wait(pmix_condition_t *c, pmix_mutex_t *m)
{
int rc = 0;
c->c_waiting++;
if (c->c_signaled) {
c->c_waiting--;
return 0;
}
c->c_signaled--;
c->c_waiting--;
return rc;
}
static inline int pmix_condition_signal(pmix_condition_t *c)
{
if (c->c_waiting) {
c->c_signaled++;
}
return 0;
}
static inline int pmix_condition_broadcast(pmix_condition_t *c)
{
c->c_signaled = c->c_waiting;
return 0;
}
END_C_DECLS
#endif

Просмотреть файл

@ -35,7 +35,6 @@
#endif
#include "mutex.h"
#include "condition.h"
BEGIN_C_DECLS
@ -59,61 +58,125 @@ PMIX_EXPORT extern bool pmix_debug_threads;
PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_thread_t);
#define pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread)
typedef pthread_cond_t pmix_condition_t;
#define pmix_condition_broadcast(a) pthread_cond_broadcast(a)
#define pmix_condition_signal(a) pthread_cond_signal(a)
#define PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER
typedef struct {
pmix_mutex_t mutex;
pmix_condition_t cond;
volatile bool active;
} pmix_lock_t;
#define PMIX_CONSTRUCT_LOCK(l) \
do { \
PMIX_CONSTRUCT(&(l)->mutex, pmix_mutex_t); \
pthread_cond_init(&(l)->cond, NULL); \
(l)->active = true; \
} while(0)
#define PMIX_DESTRUCT_LOCK(l) \
do { \
PMIX_DESTRUCT(&(l)->mutex); \
pthread_cond_destroy(&(l)->cond); \
} while(0)
#if PMIX_ENABLE_DEBUG
#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \
do { \
PMIX_THREAD_LOCK((lck)); \
if (pmix_debug_threads) { \
pmix_output(0, "Waiting for thread %s:%d", \
__FILE__, __LINE__); \
} \
while (*(act)) { \
pmix_condition_wait((cnd), (lck)); \
} \
if (pmix_debug_threads) { \
pmix_output(0, "Thread obtained %s:%d", \
__FILE__, __LINE__); \
} \
*(act) = true; \
} while(0);
#define PMIX_ACQUIRE_THREAD(lck) \
do { \
pmix_mutex_lock(&(lck)->mutex); \
if (pmix_debug_threads) { \
pmix_output(0, "Waiting for thread %s:%d", \
__FILE__, __LINE__); \
} \
while ((lck)->active) { \
pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
if (pmix_debug_threads) { \
pmix_output(0, "Thread obtained %s:%d", \
__FILE__, __LINE__); \
} \
PMIX_ACQUIRE_OBJECT(lck); \
(lck)->active = true; \
} while(0)
#else
#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \
do { \
PMIX_THREAD_LOCK((lck)); \
while (*(act)) { \
pmix_condition_wait((cnd), (lck)); \
} \
*(act) = true; \
} while(0);
#define PMIX_ACQUIRE_THREAD(lck) \
do { \
pmix_mutex_lock(&(lck)->mutex); \
while ((lck)->active) { \
pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
PMIX_ACQUIRE_OBJECT(lck); \
(lck)->active = true; \
} while(0)
#endif
#if PMIX_ENABLE_DEBUG
#define PMIX_RELEASE_THREAD(lck, cnd, act) \
#define PMIX_WAIT_THREAD(lck) \
do { \
pmix_mutex_lock(&(lck)->mutex); \
if (pmix_debug_threads) { \
pmix_output(0, "Waiting for thread %s:%d", \
__FILE__, __LINE__); \
} \
while ((lck)->active) { \
pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
if (pmix_debug_threads) { \
pmix_output(0, "Thread obtained %s:%d", \
__FILE__, __LINE__); \
} \
PMIX_ACQUIRE_OBJECT(lck); \
pmix_mutex_unlock(&(lck)->mutex); \
} while(0)
#else
#define PMIX_WAIT_THREAD(lck) \
do { \
pmix_mutex_lock(&(lck)->mutex); \
while ((lck)->active) { \
pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
PMIX_ACQUIRE_OBJECT(lck); \
pmix_mutex_unlock(&(lck)->mutex); \
} while(0)
#endif
#if PMIX_ENABLE_DEBUG
#define PMIX_RELEASE_THREAD(lck) \
do { \
if (pmix_debug_threads) { \
pmix_output(0, "Releasing thread %s:%d", \
__FILE__, __LINE__); \
} \
*(act) = false; \
pmix_condition_broadcast((cnd)); \
PMIX_THREAD_UNLOCK((lck)); \
} while(0);
(lck)->active = false; \
PMIX_POST_OBJECT(lck); \
pmix_condition_broadcast(&(lck)->cond); \
pmix_mutex_unlock(&(lck)->mutex); \
} while(0)
#else
#define PMIX_RELEASE_THREAD(lck, cnd, act) \
do { \
*(act) = false; \
pmix_condition_broadcast((cnd)); \
PMIX_THREAD_UNLOCK((lck)); \
} while(0);
#define PMIX_RELEASE_THREAD(lck) \
do { \
(lck)->active = false; \
PMIX_POST_OBJECT(lck); \
pmix_condition_broadcast(&(lck)->cond); \
pmix_mutex_unlock(&(lck)->mutex); \
} while(0)
#endif
#define PMIX_WAKEUP_THREAD(cnd, act) \
do { \
*(act) = false; \
pmix_condition_broadcast((cnd)); \
} while(0);
#define PMIX_WAKEUP_THREAD(lck) \
do { \
pmix_mutex_lock(&(lck)->mutex); \
(lck)->active = false; \
PMIX_POST_OBJECT(lck); \
pmix_condition_broadcast(&(lck)->cond); \
pmix_mutex_unlock(&(lck)->mutex); \
} while(0)
/* provide a macro for forward-proofing the shifting

Просмотреть файл

@ -19,8 +19,9 @@
#if !defined(PMIX_THREADS_WAIT_SYNC_H)
#define PMIX_THREADS_WAIT_SYNC_H
#include "src/include/prefetch.h"
#include "src/atomics/sys/atomic.h"
#include "src/threads/condition.h"
#include "src/threads/threads.h"
#include "src/util/error.h"
#include <pthread.h>

Просмотреть файл

@ -158,18 +158,12 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
pmix_nspace_t *nptr, *nsptr;
char hostname[PMIX_MAX_NSLEN];
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (NULL == proc) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_BAD_PARAM;
}
/* if we were given an nspace in the environment, then we
* must have been spawned by a PMIx server - so even though
* we technically will operate as a tool, we are actually
* a "client" of the PMIx server and should connect that way */
if (NULL != getenv("PMIX_NAMESPACE")) {
return PMIx_Init(proc, info, ninfo);
}
if (0 < pmix_globals.init_cntr) {
/* since we have been called before, the nspace and
* rank should be known. So return them here if
@ -179,19 +173,30 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
proc->rank = pmix_globals.myid.rank;
}
++pmix_globals.init_cntr;
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS;
}
/* if we were given an nspace in the environment, then we
* must have been spawned by a PMIx server - so even though
* we technically will operate as a tool, we are actually
* a "client" of the PMIx server and should connect that way */
if (NULL != getenv("PMIX_NAMESPACE")) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIx_Init(proc, info, ninfo);
}
/* setup the runtime - this init's the globals,
* opens and initializes the required frameworks */
if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_TOOL, info, ninfo,
pmix_tool_notify_recv))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t);
pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: init called");
@ -199,13 +204,15 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
/* select our psec module - we take the default as we cannot
* do any better */
if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
/* the server will have to use the same */
pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec;
pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec;
/* connect to the server - returns job info if successful */
if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){
if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
@ -228,6 +235,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
}
}
if (NULL == nsptr) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_NOT_FOUND;
}
@ -239,6 +247,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.string = strdup(nsptr->nspace);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -251,6 +260,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.integer = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -263,6 +273,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -275,6 +286,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -287,6 +299,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.string = strdup("0");
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -299,7 +312,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
PMIX_RELEASE_THREAD(&pmix_global_lock);
}
PMIX_RELEASE(kptr); // maintain accounting
@ -311,6 +324,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -323,6 +337,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -335,6 +350,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -348,6 +364,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -360,6 +377,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -372,6 +390,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -384,6 +403,8 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -395,6 +416,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -407,6 +429,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -425,6 +448,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.string = strdup(hostname);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -442,6 +466,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.string = strdup(hostname);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
@ -455,24 +480,49 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
kptr->value->data.string = strdup("0");
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
/* callback for wait completion */
static void wait_cbfunc(struct pmix_peer_t *pr,
pmix_ptl_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
typedef struct {
pmix_lock_t lock;
pmix_event_t ev;
bool active;
} pmix_tool_timeout_t;
/* timer callback */
static void fin_timeout(int sd, short args, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
pmix_tool_timeout_t *tev;
tev = (pmix_tool_timeout_t*)cbdata;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool wait_cbfunc received");
"pmix:tool finwait timeout fired");
if (tev->active) {
tev->active = false;
PMIX_WAKEUP_THREAD(&tev->lock);
}
}
/* callback for finalize completion */
static void finwait_cbfunc(struct pmix_peer_t *pr,
pmix_ptl_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
pmix_tool_timeout_t *tev;
tev = (pmix_tool_timeout_t*)cbdata;
*active = false;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool finwait_cbfunc received");
if (tev->active) {
tev->active = false;
pmix_event_del(&tev->ev); // stop the timer
PMIX_WAKEUP_THREAD(&tev->lock);
}
}
PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
@ -480,13 +530,17 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
pmix_buffer_t *msg;
pmix_cmd_t cmd = PMIX_FINALIZE_CMD;
pmix_status_t rc;
volatile bool active;
pmix_tool_timeout_t tev;
struct timeval tv = {2, 0};
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (1 != pmix_globals.init_cntr) {
--pmix_globals.init_cntr;
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS;
}
pmix_globals.init_cntr = 0;
PMIX_RELEASE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool finalize called");
@ -505,15 +559,25 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool sending finalize sync to server");
/* send to the server */
active = true;;
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg,
wait_cbfunc, (void*)&active))){
/* setup a timer to protect ourselves should the server be unable
* to answer for some reason */
PMIX_CONSTRUCT_LOCK(&tev.lock);
pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0,
fin_timeout, &tev);
tev.active = true;
PMIX_POST_OBJECT(&tev);
pmix_event_add(&tev.ev, &tv);
if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg,
finwait_cbfunc, (void*)&tev))){
return rc;
}
/* wait for the ack to return */
PMIX_WAIT_FOR_COMPLETION(active);
PMIX_WAIT_THREAD(&tev.lock);
PMIX_DESTRUCT_LOCK(&tev.lock);
if (tev.active) {
pmix_event_del(&tev.ev);
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool finalize sync received");
@ -525,7 +589,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
(void)pmix_progress_thread_pause(NULL);
}
PMIX_DESTRUCT(&pmix_client_globals.myserver);
PMIX_RELEASE(pmix_client_globals.myserver);
PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests);
/* shutdown services */

Просмотреть файл

@ -6,7 +6,7 @@
* reserved.
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -89,7 +89,7 @@ static void construct(pmix_object_t *stream);
static int do_open(int output_id, pmix_output_stream_t * lds);
static int open_file(int i);
static void free_descriptor(int output_id);
static int make_string(char **no_newline_string, output_desc_t *ldi,
static int make_string(char **out, char **no_newline_string, output_desc_t *ldi,
const char *format, va_list arglist);
static int output(int output_id, const char *format, va_list arglist);
@ -111,8 +111,6 @@ int pmix_output_redirected_syslog_pri = 0;
static bool initialized = false;
static int default_stderr_fd = -1;
static output_desc_t info[PMIX_OUTPUT_MAX_STREAMS];
static char *temp_str = 0;
static size_t temp_str_len = 0;
#if defined(HAVE_SYSLOG)
static bool syslog_opened = false;
#endif
@ -356,50 +354,6 @@ void pmix_output_vverbose(int level, int output_id, const char *format,
}
/*
* Send a message to a string if the verbose level is high enough
*/
char *pmix_output_string(int level, int output_id, const char *format, ...)
{
int rc;
char *ret = NULL;
if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS &&
info[output_id].ldi_verbose_level >= level) {
va_list arglist;
va_start(arglist, format);
rc = make_string(&ret, &info[output_id], format, arglist);
va_end(arglist);
if (PMIX_SUCCESS != rc) {
ret = NULL;
}
}
return ret;
}
/*
* Send a message to a string if the verbose level is high enough
*/
char *pmix_output_vstring(int level, int output_id, const char *format,
va_list arglist)
{
int rc;
char *ret = NULL;
if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS &&
info[output_id].ldi_verbose_level >= level) {
rc = make_string(&ret, &info[output_id], format, arglist);
if (PMIX_SUCCESS != rc) {
ret = NULL;
}
}
return ret;
}
/*
* Set the verbosity level of a stream
*/
@ -501,11 +455,6 @@ void pmix_output_finalize(void)
free (output_prefix);
free (output_dir);
if(NULL != temp_str) {
free(temp_str);
temp_str = NULL;
temp_str_len = 0;
}
PMIX_DESTRUCT(&verbose);
}
}
@ -813,14 +762,15 @@ static void free_descriptor(int output_id)
}
static int make_string(char **no_newline_string, output_desc_t *ldi,
static int make_string(char **out, char **no_newline_string, output_desc_t *ldi,
const char *format, va_list arglist)
{
size_t len, total_len;
size_t len, total_len, temp_str_len;
bool want_newline = false;
char *temp_str;
/* Make the formatted string */
*out = NULL;
if (0 > vasprintf(no_newline_string, format, arglist)) {
return PMIX_ERR_NOMEM;
}
@ -844,16 +794,11 @@ static int make_string(char **no_newline_string, output_desc_t *ldi,
if (NULL != ldi->ldi_suffix) {
total_len += strlen(ldi->ldi_suffix);
}
if (temp_str_len < total_len + want_newline) {
if (NULL != temp_str) {
free(temp_str);
}
temp_str = (char *) malloc(total_len * 2);
if (NULL == temp_str) {
return PMIX_ERR_OUT_OF_RESOURCE;
}
temp_str_len = total_len * 2;
temp_str = (char *) malloc(total_len * 2);
if (NULL == temp_str) {
return PMIX_ERR_OUT_OF_RESOURCE;
}
temp_str_len = total_len * 2;
if (NULL != ldi->ldi_prefix && NULL != ldi->ldi_suffix) {
if (want_newline) {
snprintf(temp_str, temp_str_len, "%s%s%s\n",
@ -885,7 +830,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi,
snprintf(temp_str, temp_str_len, "%s", *no_newline_string);
}
}
*out = temp_str;
return PMIX_SUCCESS;
}
@ -897,7 +842,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi,
static int output(int output_id, const char *format, va_list arglist)
{
int rc = PMIX_SUCCESS;
char *str, *out = NULL;
char *str=NULL, *out = NULL;
output_desc_t *ldi;
/* Setup */
@ -913,8 +858,8 @@ static int output(int output_id, const char *format, va_list arglist)
ldi = &info[output_id];
/* Make the strings */
if (PMIX_SUCCESS != (rc = make_string(&str, ldi, format, arglist))) {
return rc;
if (PMIX_SUCCESS != (rc = make_string(&out, &str, ldi, format, arglist))) {
goto cleanup;
}
/* Syslog output -- does not use the newline-appended string */
@ -924,15 +869,11 @@ static int output(int output_id, const char *format, va_list arglist)
}
#endif
/* All others (stdout, stderr, file) use temp_str, potentially
with a newline appended */
out = temp_str;
/* stdout output */
if (ldi->ldi_stdout) {
if (0 > write(fileno(stdout), out, (int)strlen(out))) {
return PMIX_ERROR;
rc = PMIX_ERROR;
goto cleanup;
}
fflush(stdout);
}
@ -942,7 +883,8 @@ static int output(int output_id, const char *format, va_list arglist)
if (0 > write((-1 == default_stderr_fd) ?
fileno(stderr) : default_stderr_fd,
out, (int)strlen(out))) {
return PMIX_ERROR;
rc = PMIX_ERROR;
goto cleanup;
}
fflush(stderr);
}
@ -964,7 +906,8 @@ static int output(int output_id, const char *format, va_list arglist)
"[WARNING: %d lines lost because the PMIx process session directory did\n not exist when pmix_output() was invoked]\n",
ldi->ldi_file_num_lines_lost);
if (0 > write(ldi->ldi_fd, buffer, (int)strlen(buffer))) {
return PMIX_ERROR;
rc = PMIX_ERROR;
goto cleanup;
}
ldi->ldi_file_num_lines_lost = 0;
if (out != buffer) {
@ -974,13 +917,22 @@ static int output(int output_id, const char *format, va_list arglist)
}
if (ldi->ldi_fd != -1) {
if (0 > write(ldi->ldi_fd, out, (int)strlen(out))) {
return PMIX_ERROR;
rc = PMIX_ERROR;
goto cleanup;
}
}
}
free(str);
str = NULL;
}
cleanup:
if (NULL != str) {
free(str);
}
if (NULL != out) {
free(out);
}
return rc;
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -423,29 +423,6 @@ struct pmix_output_stream_t {
void pmix_output_vverbose(int verbose_level, int output_id,
const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0);
/**
* Send output to a string if the verbosity level is high enough.
*
* @param output_id Stream id returned from pmix_output_open().
* @param level Target verbosity level.
* @param format printf-style format string.
* @param varargs printf-style varargs list to fill the string
* specified by the format parameter.
*
* Exactly the same as pmix_output_verbose(), except the output it
* sent to a string instead of to the stream. If the verbose
* level is not high enough, NULL is returned. The caller is
* responsible for free()'ing the returned string.
*/
char *pmix_output_string(int verbose_level, int output_id,
const char *format, ...) __pmix_attribute_format__(__printf__, 3, 4);
/**
* Same as pmix_output_string, but accepts a va_list form of varargs.
*/
char *pmix_output_vstring(int verbose_level, int output_id,
const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0);
/**
* Set the verbosity level for a stream.
*
@ -567,4 +544,3 @@ PMIX_CLASS_DECLARATION(pmix_output_stream_t);
END_C_DECLS
#endif /* PMIX_OUTPUT_H_ */

Просмотреть файл

@ -73,6 +73,54 @@ static void opcbfunc(pmix_status_t status, void *cbdata)
*active = false;
}
/* this is an event notification function that we explicitly request
* be called when the PMIX_MODEL_DECLARED notification is issued.
* We could catch it in the general event notification function and test
* the status to see if the status matched, but it often is simpler
* to declare a use-specific notification callback point. In this case,
* we are asking to know whenever a model is declared as a means
* of testing server self-notification */
static void model_callback(size_t evhdlr_registration_id,
pmix_status_t status,
const pmix_proc_t *source,
pmix_info_t info[], size_t ninfo,
pmix_info_t results[], size_t nresults,
pmix_event_notification_cbfunc_fn_t cbfunc,
void *cbdata)
{
size_t n;
/* just let us know it was received */
fprintf(stderr, "%s:%d Model event handler called with status %d(%s)\n",
myproc.nspace, myproc.rank, status, PMIx_Error_string(status));
for (n=0; n < ninfo; n++) {
if (PMIX_STRING == info[n].value.type) {
fprintf(stderr, "%s:%d\t%s:\t%s\n",
myproc.nspace, myproc.rank,
info[n].key, info[n].value.data.string);
}
}
/* we must NOT tell the event handler state machine that we
* are the last step as that will prevent it from notifying
* anyone else that might be listening for declarations */
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata);
}
}
/* event handler registration is done asynchronously */
static void model_registration_callback(pmix_status_t status,
size_t evhandler_ref,
void *cbdata)
{
volatile int *active = (volatile int*)cbdata;
fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n",
status, (unsigned long)evhandler_ref);
*active = false;
}
int main(int argc, char **argv)
{
int rc;
@ -84,7 +132,9 @@ int main(int argc, char **argv)
int cnt, j;
bool doabort = false;
volatile bool active;
pmix_info_t info;
pmix_info_t info, *iptr;
size_t ninfo;
pmix_status_t code;
if (1 < argc) {
if (0 == strcmp("-abort", argv[1])) {
@ -92,12 +142,16 @@ int main(int argc, char **argv)
}
}
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
/* init us and declare we are a test programming model */
PMIX_INFO_CREATE(iptr, 2);
PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING);
PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING);
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) {
pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s",
myproc.nspace, myproc.rank, PMIx_Error_string(rc));
exit(rc);
}
PMIX_INFO_FREE(iptr, 2);
pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank);
/* test something */
@ -110,6 +164,19 @@ int main(int argc, char **argv)
}
PMIX_VALUE_RELEASE(val);
/* register a handler specifically for when models declare */
active = true;
ninfo = 1;
PMIX_INFO_CREATE(iptr, ninfo);
PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING);
code = PMIX_MODEL_DECLARED;
PMIx_Register_event_handler(&code, 1, iptr, ninfo,
model_callback, model_registration_callback, (void*)&active);
while (active) {
usleep(10);
}
PMIX_INFO_FREE(iptr, ninfo);
/* register our errhandler */
active = true;
PMIx_Register_event_handler(NULL, 0, NULL, 0,

Просмотреть файл

@ -214,9 +214,10 @@ static void model_callback(size_t evhdlr_registration_id,
size_t n;
/* just let us know it was received */
fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status));
fprintf(stderr, "SIMPTEST: Model event handler called with status %d(%s)\n",
status, PMIx_Error_string(status));
for (n=0; n < ninfo; n++) {
if (0 == strncmp(info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) {
if (PMIX_STRING == info[n].value.type) {
fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string);
}
}

Просмотреть файл

@ -120,15 +120,32 @@ const opal_pmix_base_module_t opal_pmix_pmix2x_module = {
.register_jobid = pmix2x_register_jobid
};
static void opcbfunc(pmix_status_t status, void *cbdata)
{
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
OPAL_ACQUIRE_OBJECT(op);
if (NULL != op->opcbfunc) {
op->opcbfunc(pmix2x_convert_rc(status), op->cbdata);
}
OBJ_RELEASE(op);
}
static const char *pmix2x_get_nspace(opal_jobid_t jobid)
{
opal_pmix2x_jobid_trkr_t *jptr;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == jobid) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return jptr->nspace;
}
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return NULL;
}
@ -136,9 +153,12 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace)
{
opal_pmix2x_jobid_trkr_t *jptr;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
/* if we don't already have it, add this to our jobid tracker */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == jobid) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return;
}
}
@ -146,6 +166,7 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace)
(void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN);
jptr->jobid = jobid;
opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
}
static void event_hdlr_complete(pmix_status_t status, void *cbdata)
@ -200,42 +221,6 @@ static void return_local_event_hdlr(int status, opal_list_t *results,
}
}
static void _event_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_event_t *event;
OPAL_ACQUIRE_OBJECT(cd);
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status);
/* cycle thru the registrations */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (cd->id == event->index) {
/* found it - invoke the handler, pointing its
* callback function to our callback function */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (NULL != event->handler) {
event->handler(cd->status, &cd->pname,
cd->info, &cd->results,
return_local_event_hdlr, (void*)cd);
return;
}
}
}
/* if we didn't find a match, we still have to call their final callback */
if (NULL != cd->pmixcbfunc) {
cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata);
}
OPAL_LIST_RELEASE(cd->info);
OBJ_RELEASE(cd);
return;
}
/* this function will be called by the PMIx client library
* whenever it receives notification of an event. The
* notification can come from an ORTE daemon (when launched
@ -253,15 +238,14 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
int rc;
opal_value_t *iptr;
size_t n;
/* this is in the PMIx local thread - need to threadshift to
* our own thread as we will be accessing framework-global
* lists and objects */
opal_pmix2x_event_t *event;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s RECEIVED NOTIFICATION OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status);
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
cd = OBJ_NEW(pmix2x_threadshift_t);
cd->id = evhdlr_registration_id;
cd->pmixcbfunc = cbfunc;
@ -281,6 +265,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&cd->pname.jobid, source->nspace))) {
OPAL_ERROR_LOG(rc);
OBJ_RELEASE(cd);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return;
}
cd->pname.vpid = pmix2x_convert_rank(source->rank);
@ -315,11 +300,35 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
}
}
/* now push it into the local thread */
opal_event_assign(&cd->ev, opal_pmix_base.evbase,
-1, EV_WRITE, _event_hdlr, cd);
OPAL_POST_OBJECT(cd);
opal_event_active(&cd->ev, EV_WRITE, 1);
/* cycle thru the registrations */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (evhdlr_registration_id == event->index) {
/* found it - invoke the handler, pointing its
* callback function to our callback function */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s _EVENT_HDLR CALLING EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (NULL != event->handler) {
OBJ_RETAIN(event);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
event->handler(cd->status, &cd->pname,
cd->info, &cd->results,
return_local_event_hdlr, cd);
OBJ_RELEASE(event);
return;
}
}
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* if we didn't find a match, we still have to call their final callback */
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata);
}
OPAL_LIST_RELEASE(cd->info);
OBJ_RELEASE(cd);
return;
}
opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank)
@ -627,6 +636,20 @@ pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist)
}
}
char* pmix2x_convert_jobid(opal_jobid_t jobid)
{
opal_pmix2x_jobid_trkr_t *jptr;
/* look thru our list of jobids and find the
* corresponding nspace */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == jobid) {
return jptr->nspace;
}
}
return NULL;
}
/**** RHC: NEED TO ADD SUPPORT FOR NEW PMIX DATA TYPES, INCLUDING
**** CONVERSION OF PROC STATES ****/
@ -1004,41 +1027,47 @@ static void errreg_cbfunc (pmix_status_t status,
OBJ_RELEASE(op);
}
static void _reg_hdlr(int sd, short args, void *cbdata)
static void register_handler(opal_list_t *event_codes,
opal_list_t *info,
opal_pmix_notification_fn_t evhandler,
opal_pmix_evhandler_reg_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
size_t n;
opal_value_t *kv;
OPAL_ACQUIRE_OBJECT(cd);
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s REGISTER HANDLER CODES %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == cd->event_codes) ? "NULL" : "NON-NULL");
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
if (NULL != cbfunc) {
cbfunc(OPAL_ERR_NOT_INITIALIZED, 0, cbdata);
}
return;
}
op = OBJ_NEW(pmix2x_opcaddy_t);
op->evregcbfunc = cd->cbfunc;
op->cbdata = cd->cbdata;
op->evregcbfunc = cbfunc;
op->cbdata = cbdata;
/* convert the event codes */
if (NULL != cd->event_codes) {
op->ncodes = opal_list_get_size(cd->event_codes);
if (NULL != event_codes) {
op->ncodes = opal_list_get_size(event_codes);
op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t));
n=0;
OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) {
OPAL_LIST_FOREACH(kv, event_codes, opal_value_t) {
op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer);
++n;
}
}
/* convert the list of info to an array of pmix_info_t */
if (NULL != cd->info) {
op->ninfo = opal_list_get_size(cd->info);
if (NULL != info) {
op->ninfo = opal_list_get_size(info);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
@ -1048,60 +1077,49 @@ static void _reg_hdlr(int sd, short args, void *cbdata)
/* register the event */
op->event = OBJ_NEW(opal_pmix2x_event_t);
op->event->handler = cd->evhandler;
op->event->handler = evhandler;
opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
PMIx_Register_event_handler(op->pcodes, op->ncodes,
op->info, op->ninfo,
pmix2x_event_hdlr, errreg_cbfunc, op);
OBJ_RELEASE(cd);
return;
}
static void register_handler(opal_list_t *event_codes,
opal_list_t *info,
opal_pmix_notification_fn_t evhandler,
opal_pmix_evhandler_reg_cbfunc_t cbfunc,
void *cbdata)
{
/* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */
OPAL_PMIX_THREADSHIFT(event_codes, info, evhandler, _reg_hdlr, cbfunc, cbdata);
return;
}
static void _dereg_hdlr(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_event_t *event;
OPAL_ACQUIRE_OBJECT(cd);
/* look for this event */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (cd->handler == event->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
OBJ_RELEASE(event);
break;
}
}
/* tell the library to deregister this handler */
PMIx_Deregister_event_handler(cd->handler, NULL, NULL);
/* release the caller */
if (NULL != cd->opcbfunc) {
cd->opcbfunc(OPAL_SUCCESS, cd->cbdata);
}
OBJ_RELEASE(cd);
}
static void deregister_handler(size_t evhandler,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
/* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */
OPAL_PMIX_OP_THREADSHIFT(evhandler, _dereg_hdlr, cbfunc, cbdata);
pmix2x_opcaddy_t *op;
opal_pmix2x_event_t *event;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
if (NULL != cbfunc) {
cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata);
}
return;
}
/* look for this event */
OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
if (evhandler == event->index) {
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
OBJ_RELEASE(event);
break;
}
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
op = OBJ_NEW(pmix2x_opcaddy_t);
op->opcbfunc = cbfunc;
op->cbdata = cbdata;
/* tell the library to deregister this handler */
PMIx_Deregister_event_handler(evhandler, opcbfunc, op);
return;
}
@ -1114,57 +1132,56 @@ static void notify_complete(pmix_status_t status, void *cbdata)
OBJ_RELEASE(op);
}
static void _notify(int sd, short args, void *cbdata)
static int notify_event(int status,
const opal_process_name_t *source,
opal_pmix_data_range_t range,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata;
pmix2x_opcaddy_t *op;
opal_value_t *kv;
pmix_proc_t p, *pptr;
pmix_status_t pstatus;
size_t n;
int rc=OPAL_SUCCESS;
pmix_data_range_t prange;
opal_pmix2x_jobid_trkr_t *job, *jptr;
char *nsptr;
OPAL_ACQUIRE_OBJECT(cd);
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
op = OBJ_NEW(pmix2x_opcaddy_t);
/* convert the status */
pstatus = pmix2x_convert_opalrc(cd->status);
pstatus = pmix2x_convert_opalrc(status);
/* convert the source */
if (NULL == cd->source) {
if (NULL == source) {
pptr = NULL;
} else {
/* look thru our list of jobids and find the
* corresponding nspace */
job = NULL;
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == cd->source->jobid) {
job = jptr;
break;
}
if (NULL == (nsptr = pmix2x_convert_jobid(source->jobid))) {
OBJ_RELEASE(op);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_FOUND;
}
if (NULL == job) {
rc = OPAL_ERR_NOT_FOUND;
goto release;
}
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
p.rank = pmix2x_convert_opalrank(cd->source->vpid);
(void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN);
p.rank = pmix2x_convert_opalrank(source->vpid);
pptr = &p;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* convert the range */
prange = pmix2x_convert_opalrange(cd->range);
prange = pmix2x_convert_opalrange(range);
/* convert the list of info */
if (NULL != cd->info) {
op->ninfo = opal_list_get_size(cd->info);
if (NULL != info) {
op->ninfo = opal_list_get_size(info);
if (0 < op->ninfo) {
PMIX_INFO_CREATE(op->info, op->ninfo);
n=0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, kv);
++n;
@ -1174,26 +1191,8 @@ static void _notify(int sd, short args, void *cbdata)
/* ask the library to notify our clients */
pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op);
rc = pmix2x_convert_rc(pstatus);
release:
/* release the caller */
if (NULL != cd->opcbfunc) {
cd->opcbfunc(rc, cd->cbdata);
}
OBJ_RELEASE(cd);
}
static int notify_event(int status,
const opal_process_name_t *source,
opal_pmix_data_range_t range,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
/* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */
OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata);
return OPAL_SUCCESS;
return pmix2x_convert_rc(pstatus);
}
static void relcbfunc(void *cbdata)
@ -1254,6 +1253,14 @@ static void pmix2x_query(opal_list_t *queries,
pmix_status_t prc;
opal_pmix_query_t *q;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = OPAL_ERR_NOT_INITIALIZED;
goto CLEANUP;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* create the caddy */
cd = OBJ_NEW(pmix2x_opcaddy_t);
@ -1304,18 +1311,6 @@ static void pmix2x_query(opal_list_t *queries,
return;
}
static void opcbfunc(pmix_status_t status, void *cbdata)
{
pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata;
OPAL_ACQUIRE_OBJECT(op);
if (NULL != op->opcbfunc) {
op->opcbfunc(pmix2x_convert_rc(status), op->cbdata);
}
OBJ_RELEASE(op);
}
static void pmix2x_log(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
@ -1325,6 +1320,14 @@ static void pmix2x_log(opal_list_t *info,
pmix2x_opcaddy_t *cd;
pmix_status_t prc;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = OPAL_ERR_NOT_INITIALIZED;
goto CLEANUP;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* create the caddy */
cd = OBJ_NEW(pmix2x_opcaddy_t);
@ -1388,27 +1391,36 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
static void evcon(opal_pmix2x_event_t *p)
{
OPAL_PMIX_CONSTRUCT_LOCK(&p->lock);
p->handler = NULL;
p->cbdata = NULL;
}
static void evdes(opal_pmix2x_event_t *p)
{
OPAL_PMIX_DESTRUCT_LOCK(&p->lock);
}
OBJ_CLASS_INSTANCE(opal_pmix2x_event_t,
opal_list_item_t,
evcon, NULL);
evcon, evdes);
static void opcon(pmix2x_opcaddy_t *p)
{
memset(&p->p, 0, sizeof(pmix_proc_t));
p->nspace = NULL;
p->procs = NULL;
p->nprocs = 0;
p->pdata = NULL;
p->npdata = 0;
p->error_procs = NULL;
p->nerror_procs = 0;
p->info = NULL;
p->ninfo = 0;
p->apps = NULL;
p->sz = 0;
p->active = false;
OPAL_PMIX_CONSTRUCT_LOCK(&p->lock);
p->codes = NULL;
p->pcodes = NULL;
p->ncodes = 0;
p->queries = NULL;
p->nqueries = 0;
p->event = NULL;
@ -1418,17 +1430,25 @@ static void opcon(pmix2x_opcaddy_t *p)
p->lkcbfunc = NULL;
p->spcbfunc = NULL;
p->evregcbfunc = NULL;
p->qcbfunc = NULL;
p->cbdata = NULL;
}
static void opdes(pmix2x_opcaddy_t *p)
{
OPAL_PMIX_DESTRUCT_LOCK(&p->lock);
if (NULL != p->nspace) {
free(p->nspace);
}
if (NULL != p->procs) {
PMIX_PROC_FREE(p->procs, p->nprocs);
}
if (NULL != p->pdata) {
PMIX_PDATA_FREE(p->pdata, p->npdata);
}
if (NULL != p->error_procs) {
PMIX_PROC_FREE(p->error_procs, p->nerror_procs);
}
if (0 < p->ninfo) {
if (NULL != p->info) {
PMIX_INFO_FREE(p->info, p->ninfo);
}
if (NULL != p->apps) {
@ -1473,7 +1493,9 @@ OBJ_CLASS_INSTANCE(pmix2x_opalcaddy_t,
static void tscon(pmix2x_threadshift_t *p)
{
p->active = false;
OPAL_PMIX_CONSTRUCT_LOCK(&p->lock);
p->msg = NULL;
p->strings = NULL;
p->source = NULL;
p->event_codes = NULL;
p->info = NULL;
@ -1486,6 +1508,10 @@ static void tscon(pmix2x_threadshift_t *p)
}
static void tsdes(pmix2x_threadshift_t *p)
{
OPAL_PMIX_DESTRUCT_LOCK(&p->lock);
if (NULL != p->strings) {
free(p->strings);
}
OPAL_LIST_DESTRUCT(&p->results);
}
OBJ_CLASS_INSTANCE(pmix2x_threadshift_t,

Просмотреть файл

@ -31,7 +31,7 @@
#include "opal/mca/event/event.h"
#include "opal/util/proc.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/pmix/base/base.h"
#include "pmix_server.h"
#include "pmix_common.h"
@ -62,6 +62,7 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t);
typedef struct {
opal_list_item_t super;
opal_pmix_lock_t lock;
size_t index;
opal_pmix_notification_fn_t handler;
void *cbdata;
@ -78,17 +79,21 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_dmx_trkr_t);
typedef struct {
opal_object_t super;
opal_event_t ev;
pmix_status_t status;
char *nspace;
pmix_proc_t p;
pmix_proc_t *procs;
size_t nprocs;
pmix_pdata_t *pdata;
size_t npdata;
pmix_proc_t *error_procs;
size_t nerror_procs;
pmix_info_t *info;
size_t ninfo;
pmix_app_t *apps;
size_t sz;
volatile bool active;
opal_pmix_lock_t lock;
opal_list_t *codes;
pmix_status_t *pcodes;
size_t ncodes;
@ -127,7 +132,9 @@ OBJ_CLASS_DECLARATION(pmix2x_opalcaddy_t);
typedef struct {
opal_object_t super;
opal_event_t ev;
volatile bool active;
opal_pmix_lock_t lock;
const char *msg;
char *strings;
size_t id;
int status;
opal_process_name_t pname;
@ -136,6 +143,7 @@ typedef struct {
opal_pmix_data_range_t range;
bool nondefault;
size_t handler;
opal_value_t *val;
opal_list_t *event_codes;
opal_list_t *info;
opal_list_t results;
@ -143,6 +151,8 @@ typedef struct {
opal_pmix_evhandler_reg_cbfunc_t cbfunc;
opal_pmix_op_cbfunc_t opcbfunc;
pmix_event_notification_cbfunc_fn_t pmixcbfunc;
opal_pmix_value_cbfunc_t valcbfunc;
opal_pmix_lookup_cbfunc_t lkcbfunc;
void *cbdata;
} pmix2x_threadshift_t;
OBJ_CLASS_DECLARATION(pmix2x_threadshift_t);
@ -191,6 +201,14 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t);
opal_event_active(&((_cd)->ev), EV_WRITE, 1); \
} while(0)
#define OPAL_PMIX2X_THREADSHIFT(p, cb) \
do { \
opal_event_assign(&((p)->ev), opal_pmix_base.evbase, \
-1, EV_WRITE, (cb), (p)); \
OPAL_POST_OBJECT(p); \
opal_event_active(&((p)->ev), EV_WRITE, 1); \
} while(0)
/**** CLIENT FUNCTIONS ****/
OPAL_MODULE_DECLSPEC int pmix2x_client_init(opal_list_t *ilist);
OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void);
@ -296,6 +314,8 @@ OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv,
OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir);
OPAL_MODULE_DECLSPEC char* pmix2x_convert_jobid(opal_jobid_t jobid);
END_C_DECLS
#endif /* MCA_PMIX_EXTERNAL_H */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -200,6 +200,10 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv
opalcaddy->cbdata = cbdata;
/* pass it up */
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s FINALIZED",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(proc));
rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy);
if (OPAL_SUCCESS != rc) {
OBJ_RELEASE(opalcaddy);
@ -228,6 +232,11 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object,
}
proc.vpid = pmix2x_convert_rank(p->rank);
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s CALLED ABORT",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(proc));
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->opcbfunc = cbfunc;
@ -279,10 +288,12 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata
/* if we were collecting all data, then check for any pending
* dmodx requests that we cached and notify them that the
* data has arrived */
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
while (NULL != (dmdx = (opal_pmix2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix2x_component.dmdx))) {
dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL);
OBJ_RELEASE(dmdx);
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
} else {
OBJ_RELEASE(opalcaddy);
}
@ -299,6 +310,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs,
opal_value_t *iptr;
int rc;
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s FENCE CALLED", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (NULL == host_module || NULL == host_module->fence_nb) {
return PMIX_ERR_NOT_SUPPORTED;
}
@ -359,6 +373,11 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p,
}
proc.vpid = pmix2x_convert_rank(p->rank);
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s CALLED DMODX",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(proc));
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->mdxcbfunc = cbfunc;
@ -372,10 +391,12 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p,
* arrived - this will trigger the pmix server to tell the
* client that the data is available */
if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) {
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
dmdx = OBJ_NEW(opal_pmix2x_dmx_trkr_t);
dmdx->cbfunc = cbfunc;
dmdx->cbdata = cbdata;
opal_list_append(&mca_pmix_pmix2x_component.dmdx, &dmdx->super);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return PMIX_SUCCESS;
}
@ -421,6 +442,11 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p,
}
proc.vpid = pmix2x_convert_rank(p->rank);
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s CALLED PUBLISH",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(proc));
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->opcbfunc = cbfunc;
@ -497,6 +523,11 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys,
}
proc.vpid = pmix2x_convert_rank(p->rank);
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s CALLED LOOKUP",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(proc));
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->lkupcbfunc = cbfunc;
@ -543,6 +574,11 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys,
}
proc.vpid = pmix2x_convert_rank(p->rank);
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s CALLED UNPUBLISH",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(proc));
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->opcbfunc = cbfunc;
@ -767,6 +803,10 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes,
opal_value_t *oinfo;
int rc;
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s REGISTER EVENTS",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->opcbfunc = cbfunc;
@ -795,6 +835,9 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes,
static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s DEREGISTER EVENTS", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return PMIX_ERR_NOT_SUPPORTED;
}
@ -829,6 +872,11 @@ static pmix_status_t server_notify_event(pmix_status_t code,
}
src.vpid = pmix2x_convert_rank(source->rank);
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s CALLED NOTIFY",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(src));
/* ignore the range for now */
/* convert the info */
@ -925,6 +973,11 @@ static pmix_status_t server_query(pmix_proc_t *proct,
}
requestor.vpid = pmix2x_convert_rank(proct->rank);
opal_output_verbose(3, opal_pmix_base_framework.framework_output,
"%s CLIENT %s CALLED QUERY",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(requestor));
/* convert the queries */
for (n=0; n < nqueries; n++) {
q = OBJ_NEW(opal_pmix_query_t);

Просмотреть файл

@ -52,29 +52,20 @@
extern pmix_server_module_t mymodule;
extern opal_pmix_server_module_t *host_module;
static char *dbgvalue=NULL;
static size_t errhdler_ref = 0;
#define PMIX_WAIT_FOR_COMPLETION(a) \
do { \
while ((a)) { \
usleep(10); \
} \
OPAL_ACQUIRE_OBJECT(a); \
} while (0)
static void errreg_cbfunc (pmix_status_t status,
size_t errhandler_ref,
void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata;
OPAL_ACQUIRE_OBJECT(active);
errhdler_ref = errhandler_ref;
OPAL_ACQUIRE_OBJECT(ev);
ev->index = errhandler_ref;
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu",
status, (unsigned long)errhandler_ref);
OPAL_POST_OBJECT(active);
*active = false;
OPAL_POST_OBJECT(ev);
OPAL_PMIX_WAKEUP_THREAD(&ev->lock);
}
static void opcbfunc(pmix_status_t status, void *cbdata)
@ -86,21 +77,15 @@ static void opcbfunc(pmix_status_t status, void *cbdata)
if (NULL != op->opcbfunc) {
op->opcbfunc(pmix2x_convert_rc(status), op->cbdata);
}
if (op->active) {
op->status = status;
OPAL_POST_OBJECT(op);
op->active = false;
} else {
OBJ_RELEASE(op);
}
OBJ_RELEASE(op);
}
static void op2cbfunc(pmix_status_t status, void *cbdata)
static void lkcbfunc(pmix_status_t status, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata;
OPAL_POST_OBJECT(active);
*active = false;
OPAL_POST_OBJECT(lk);
OPAL_PMIX_WAKEUP_THREAD(lk);
}
int pmix2x_server_init(opal_pmix_server_module_t *module,
@ -111,13 +96,19 @@ int pmix2x_server_init(opal_pmix_server_module_t *module,
opal_value_t *kv;
pmix_info_t *pinfo;
size_t sz, n;
volatile bool active;
opal_pmix2x_event_t *event;
opal_pmix2x_jobid_trkr_t *job;
opal_pmix_lock_t lk;
if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) {
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
putenv(dbgvalue);
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 == opal_pmix_base.initialized) {
if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) {
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
putenv(dbgvalue);
}
}
++opal_pmix_base.initialized;
/* convert the list to an array of pmix_info_t */
if (NULL != info) {
@ -140,6 +131,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module,
(void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid);
job->jobid = OPAL_PROC_MY_NAME.jobid;
opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) {
PMIX_INFO_FREE(pinfo, sz);
@ -151,41 +143,53 @@ int pmix2x_server_init(opal_pmix_server_module_t *module,
host_module = module;
/* register the default event handler */
active = true;
event = OBJ_NEW(opal_pmix2x_event_t);
opal_list_append(&mca_pmix_pmix2x_component.events, &event->super);
PMIX_INFO_CREATE(pinfo, 1);
PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING);
PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active);
PMIX_WAIT_FOR_COMPLETION(active);
PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)event);
OPAL_PMIX_WAIT_THREAD(&event->lock);
PMIX_INFO_FREE(pinfo, 1);
/* as we might want to use some client-side functions, be sure
* to register our own nspace */
OPAL_PMIX_CONSTRUCT_LOCK(&lk);
PMIX_INFO_CREATE(pinfo, 1);
PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL);
active = true;
PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active);
PMIX_WAIT_FOR_COMPLETION(active);
PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, lkcbfunc, (void*)&lk);
OPAL_PMIX_WAIT_THREAD(&lk);
OPAL_PMIX_DESTRUCT_LOCK(&lk);
PMIX_INFO_FREE(pinfo, 1);
return OPAL_SUCCESS;
}
static void fincb(pmix_status_t status, void *cbdata)
static void dereg_cbfunc(pmix_status_t st, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
OPAL_POST_OBJECT(active);
*active = false;
opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata;
OPAL_PMIX_WAKEUP_THREAD(&ev->lock);
}
int pmix2x_server_finalize(void)
{
pmix_status_t rc;
volatile bool active;
opal_pmix2x_event_t *event, *ev2;
/* deregister the default event handler */
active = true;
PMIx_Deregister_event_handler(errhdler_ref, fincb, (void*)&active);
PMIX_WAIT_FOR_COMPLETION(active);
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
--opal_pmix_base.initialized;
if (0 < opal_pmix_base.initialized) {
/* deregister all event handlers */
OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
OPAL_PMIX_WAIT_THREAD(&event->lock);
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
OBJ_RELEASE(event);
}
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = PMIx_server_finalize();
return pmix2x_convert_rc(rc);
@ -195,6 +199,13 @@ int pmix2x_server_gen_regex(const char *input, char **regex)
{
pmix_status_t rc;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = PMIx_generate_regex(input, regex);
return pmix2x_convert_rc(rc);
}
@ -204,13 +215,23 @@ int pmix2x_server_gen_ppn(const char *input, char **ppn)
{
pmix_status_t rc;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = PMIx_generate_ppn(input, ppn);
return pmix2x_convert_rc(rc);
}
static void _reg_nspace(int sd, short args, void *cbdata)
int pmix2x_server_register_nspace(opal_jobid_t jobid,
int nlocalprocs,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_value_t *kv, *k2;
pmix_info_t *pinfo = NULL, *pmap;
size_t sz, szmap, m, n;
@ -218,28 +239,31 @@ static void _reg_nspace(int sd, short args, void *cbdata)
pmix_status_t rc;
opal_list_t *pmapinfo;
opal_pmix2x_jobid_trkr_t *job;
pmix2x_opcaddy_t op;
opal_pmix_lock_t lock;
int ret;
OPAL_ACQUIRE_OBJECT(cd);
/* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
/* convert the jobid */
(void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, cd->jobid);
(void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid);
/* store this job in our list of known nspaces */
job = OBJ_NEW(opal_pmix2x_jobid_trkr_t);
(void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN);
job->jobid = cd->jobid;
job->jobid = jobid;
opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* convert the list to an array of pmix_info_t */
if (NULL != cd->info) {
sz = opal_list_get_size(cd->info);
if (NULL != info) {
sz = opal_list_get_size(info);
PMIX_INFO_CREATE(pinfo, sz);
n = 0;
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN);
if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) {
pinfo[n].value.type = PMIX_DATA_ARRAY;
@ -269,115 +293,63 @@ static void _reg_nspace(int sd, short args, void *cbdata)
pinfo = NULL;
}
OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t);
op.active = true;
rc = PMIx_server_register_nspace(nspace, cd->status, pinfo, sz,
opcbfunc, (void*)&op);
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz,
lkcbfunc, (void*)&lock);
if (PMIX_SUCCESS == rc) {
PMIX_WAIT_FOR_COMPLETION(op.active);
} else {
op.status = rc;
}
/* ensure we execute the cbfunc so the caller doesn't hang */
if (NULL != cd->opcbfunc) {
cd->opcbfunc(pmix2x_convert_rc(op.status), cd->cbdata);
OPAL_PMIX_WAIT_THREAD(&lock);
}
OPAL_PMIX_DESTRUCT_LOCK(&lock);
if (NULL != pinfo) {
PMIX_INFO_FREE(pinfo, sz);
}
OBJ_DESTRUCT(&op);
OBJ_RELEASE(cd);
}
int pmix2x_server_register_nspace(opal_jobid_t jobid,
int nlocalprocs,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_threadshift_t *cd;
ret = pmix2x_convert_rc(rc);
/* we must threadshift this request as it touches
* shared lists of objects */
cd = OBJ_NEW(pmix2x_threadshift_t);
cd->jobid = jobid;
cd->status = nlocalprocs;
cd->info = info;
cd->opcbfunc = cbfunc;
cd->cbdata = cbdata;
/* if the cbfunc is NULL, then the caller is in an event
* and we can directly call the processing function */
if (NULL == cbfunc) {
_reg_nspace(0, 0, cd);
} else {
opal_event_assign(&cd->ev, opal_pmix_base.evbase,
-1, EV_WRITE, _reg_nspace, cd);
OPAL_POST_OBJECT(cd);
opal_event_active(&cd->ev, EV_WRITE, 1);
/* release the caller */
if (NULL != cbfunc) {
cbfunc(ret, cbdata);
}
return OPAL_SUCCESS;
}
static void tdcbfunc(pmix_status_t status, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
OPAL_ACQUIRE_OBJECT(cd);
if (NULL != cd->opcbfunc) {
cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata);
}
if (cd->active) {
OPAL_POST_OBJECT(cd);
cd->active = false;
} else {
OBJ_RELEASE(cd);
}
}
static void _dereg_nspace(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_jobid_trkr_t *jptr;
OPAL_ACQUIRE_OBJECT(cd);
/* if we don't already have it, we can ignore this */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == cd->jobid) {
/* found it - tell the server to deregister */
cd->active = true;
PMIx_server_deregister_nspace(jptr->nspace, tdcbfunc, cd);
PMIX_WAIT_FOR_COMPLETION(cd->active);
OBJ_RELEASE(cd);
/* now get rid of it from our list */
opal_list_remove_item(&mca_pmix_pmix2x_component.jobids, &jptr->super);
OBJ_RELEASE(jptr);
return;
}
}
/* must release the caller */
tdcbfunc(PMIX_ERR_NOT_FOUND, cd);
return ret;
}
void pmix2x_server_deregister_nspace(opal_jobid_t jobid,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_threadshift_t *cd;
opal_pmix2x_jobid_trkr_t *jptr;
opal_pmix_lock_t lock;
/* we must threadshift this request as it touches
* shared lists of objects */
cd = OBJ_NEW(pmix2x_threadshift_t);
cd->jobid = jobid;
cd->opcbfunc = cbfunc;
cd->cbdata = cbdata;
if (NULL == cbfunc) {
_dereg_nspace(0, 0, cd);
} else {
opal_event_assign(&cd->ev, opal_pmix_base.evbase,
-1, EV_WRITE, _dereg_nspace, cd);
OPAL_POST_OBJECT(cd);
opal_event_active(&cd->ev, EV_WRITE, 1);
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* release the caller */
if (NULL != cbfunc) {
cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata);
}
return;
}
/* if we don't already have it, we can ignore this */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == jobid) {
/* found it - tell the server to deregister */
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
PMIx_server_deregister_nspace(jptr->nspace, lkcbfunc, (void*)&lock);
OPAL_PMIX_WAIT_THREAD(&lock);
OPAL_PMIX_DESTRUCT_LOCK(&lock);
/* now get rid of it from our list */
opal_list_remove_item(&mca_pmix_pmix2x_component.jobids, &jptr->super);
OBJ_RELEASE(jptr);
break;
}
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* release the caller */
if (NULL != cbfunc) {
cbfunc(OPAL_SUCCESS, cbdata);
}
}
@ -389,67 +361,64 @@ int pmix2x_server_register_client(const opal_process_name_t *proc,
{
pmix_status_t rc;
pmix_proc_t p;
pmix2x_opcaddy_t op;
opal_pmix_lock_t lock;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* convert the jobid */
(void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid);
p.rank = pmix2x_convert_opalrank(proc->vpid);
OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t);
op.active = true;
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
rc = PMIx_server_register_client(&p, uid, gid, server_object,
opcbfunc, (void*)&op);
lkcbfunc, (void*)&lock);
if (PMIX_SUCCESS == rc) {
PMIX_WAIT_FOR_COMPLETION(op.active);
rc = op.status;
OPAL_PMIX_WAIT_THREAD(&lock);
}
OBJ_DESTRUCT(&op);
OPAL_PMIX_DESTRUCT_LOCK(&lock);
return pmix2x_convert_rc(rc);
}
static void _dereg_client(int sd, short args, void *cbdata)
{
pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata;
opal_pmix2x_jobid_trkr_t *jptr;
pmix_proc_t p;
OPAL_ACQUIRE_OBJECT(cd);
/* if we don't already have it, we can ignore this */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == cd->source->jobid) {
/* found it - tell the server to deregister */
(void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN);
p.rank = pmix2x_convert_opalrank(cd->source->vpid);
cd->active = true;
PMIx_server_deregister_client(&p, tdcbfunc, (void*)cd);
PMIX_WAIT_FOR_COMPLETION(cd->active);
break;
}
}
OBJ_RELEASE(cd);
}
/* tell the local PMIx server to cleanup this client as it is
* done executing */
void pmix2x_server_deregister_client(const opal_process_name_t *proc,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_threadshift_t *cd;
opal_pmix2x_jobid_trkr_t *jptr;
pmix_proc_t p;
opal_pmix_lock_t lock;
/* we must threadshift this request as we might not be in an event
* and we are going to access framework-global lists/objects */
cd = OBJ_NEW(pmix2x_threadshift_t);
cd->source = proc;
cd->opcbfunc = cbfunc;
cd->cbdata = cbdata;
if (NULL == cbfunc) {
_dereg_client(0, 0, cd);
} else {
opal_event_assign(&cd->ev, opal_pmix_base.evbase,
-1, EV_WRITE, _dereg_client, cd);
OPAL_POST_OBJECT(cd);
opal_event_active(&cd->ev, EV_WRITE, 1);
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
if (NULL != cbfunc) {
cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata);
}
return;
}
/* if we don't already have it, we can ignore this */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
if (jptr->jobid == proc->jobid) {
/* found it - tell the server to deregister */
(void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN);
p.rank = pmix2x_convert_opalrank(proc->vpid);
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
PMIx_server_deregister_client(&p, lkcbfunc, (void*)&lock);
OPAL_PMIX_WAIT_THREAD(&lock);
OPAL_PMIX_DESTRUCT_LOCK(&lock);
break;
}
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
if (NULL != cbfunc) {
cbfunc(OPAL_SUCCESS, cbdata);
}
}
@ -459,6 +428,13 @@ int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env)
pmix_status_t rc;
pmix_proc_t p;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* convert the jobid */
(void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid);
p.rank = pmix2x_convert_opalrank(proc->vpid);
@ -489,6 +465,13 @@ int pmix2x_server_dmodex(const opal_process_name_t *proc,
pmix2x_opcaddy_t *op;
pmix_status_t rc;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* setup the caddy */
op = OBJ_NEW(pmix2x_opcaddy_t);
op->mdxcbfunc = cbfunc;
@ -518,6 +501,13 @@ int pmix2x_server_notify_event(int status,
pmix_status_t rc;
pmix2x_opcaddy_t *op;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 >= opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* convert the list to an array of pmix_info_t */
if (NULL != info) {
sz = opal_list_get_size(info);

Просмотреть файл

@ -32,6 +32,7 @@
#include "orte/util/error_strings.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/util/threads.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/mca/rml/rml.h"
@ -69,10 +70,10 @@ static size_t myerrhandle = SIZE_MAX;
static void register_cbfunc(int status, size_t errhndler, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
orte_lock_t *lk = (orte_lock_t*)cbdata;
myerrhandle = errhndler;
ORTE_POST_OBJECT(active);
*active = false;
ORTE_POST_OBJECT(lk);
ORTE_WAKEUP_THREAD(lk);
}
static void notify_cbfunc(int status,
@ -116,22 +117,23 @@ static void notify_cbfunc(int status,
static int init(void)
{
opal_list_t directives;
volatile bool active;
orte_lock_t lock;
opal_value_t *kv;
/* setup state machine to trap proc errors */
orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI);
/* tie the default PMIx event handler back to us */
active = true;
ORTE_CONSTRUCT_LOCK(&lock);
OBJ_CONSTRUCT(&directives, opal_list_t);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME);
kv->type = OPAL_STRING;
kv->data.string = strdup("ORTE-APP-DEFAULT");
opal_list_append(&directives, &kv->super);
opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&active);
ORTE_WAIT_FOR_COMPLETION(active);
opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&lock);
ORTE_WAIT_THREAD(&lock);
ORTE_DESTRUCT_LOCK(&lock);
OPAL_LIST_DESTRUCT(&directives);
return ORTE_SUCCESS;

Просмотреть файл

@ -185,7 +185,7 @@ static int tcp_component_open(void)
static int tcp_component_close(void)
{
/* cleanup listen event list */
OBJ_DESTRUCT(&mca_oob_tcp_component.listeners);
OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners);
OBJ_DESTRUCT(&mca_oob_tcp_component.peers);
@ -695,27 +695,11 @@ static int component_startup(void)
return rc;
}
static void cleanup(int sd, short args, void *cbdata)
{
opal_list_item_t * item;
bool *active = (bool*)cbdata;
ORTE_ACQUIRE_OBJECT(active);
while (NULL != (item = opal_list_remove_first(&mca_oob_tcp_component.listeners))) {
OBJ_RELEASE(item);
}
if (NULL != active) {
*active = false;
}
}
static void component_shutdown(void)
{
mca_oob_tcp_peer_t *peer;
uint64_t ui64;
int i = 0;
bool active;
opal_output_verbose(2, orte_oob_base_framework.framework_output,
"%s TCP SHUTDOWN",
@ -750,24 +734,6 @@ static void component_shutdown(void)
"no hnp or not active");
}
/* because the listeners are in a separate
* async thread for apps, we can't just release them here.
* Instead, we push it into that event thread and release
* them there */
if (ORTE_PROC_IS_APP) {
opal_event_t ev;
active = true;
opal_event_set(orte_event_base, &ev, -1,
OPAL_EV_WRITE, cleanup, &active);
opal_event_set_priority(&ev, ORTE_ERROR_PRI);
ORTE_POST_OBJECT(active);
opal_event_active(&ev, OPAL_EV_WRITE, 1);
ORTE_WAIT_FOR_COMPLETION(active);
} else {
/* we can call the destruct directly */
cleanup(0, 0, NULL);
}
opal_output_verbose(2, orte_oob_base_framework.framework_output,
"%s TCP SHUTDOWN done",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

Просмотреть файл

@ -86,19 +86,19 @@ static int orte_rml_base_register(mca_base_register_flag_t flags)
static void cleanup(int sd, short args, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
orte_lock_t *lk = (orte_lock_t*)cbdata;
ORTE_ACQUIRE_OBJECT(active);
OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs);
if (NULL != active) {
ORTE_POST_OBJECT(active);
*active = false;
if (NULL != lk) {
ORTE_POST_OBJECT(lk);
ORTE_WAKEUP_THREAD(lk);
}
}
static int orte_rml_base_close(void)
{
volatile bool active;
orte_lock_t lock;
int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits);
orte_rml_base_module_t *mod;
orte_rml_component_t *comp;
@ -127,13 +127,14 @@ static int orte_rml_base_close(void)
* it there */
if (ORTE_PROC_IS_APP) {
opal_event_t ev;
active = true;
ORTE_CONSTRUCT_LOCK(&lock);
opal_event_set(orte_event_base, &ev, -1,
OPAL_EV_WRITE, cleanup, (void*)&active);
OPAL_EV_WRITE, cleanup, (void*)&lock);
opal_event_set_priority(&ev, ORTE_ERROR_PRI);
ORTE_POST_OBJECT(ev);
opal_event_active(&ev, OPAL_EV_WRITE, 1);
ORTE_WAIT_FOR_COMPLETION(active);
ORTE_WAIT_THREAD(&lock);
ORTE_DESTRUCT_LOCK(&lock);
} else {
/* we can call the destruct directly */
cleanup(0, 0, NULL);

Просмотреть файл

@ -279,11 +279,20 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
jdata->num_apps++;
if (NULL != papp->cmd) {
app->app = strdup(papp->cmd);
} else if (NULL == papp->argv ||
NULL == papp->argv[0]) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
OBJ_RELEASE(jdata);
return ORTE_ERR_BAD_PARAM;
} else {
app->app = strdup(papp->argv[0]);
}
app->argv = opal_argv_copy(papp->argv);
app->env = opal_argv_copy(papp->env);
if (NULL != papp->argv) {
app->argv = opal_argv_copy(papp->argv);
}
if (NULL != papp->env) {
app->env = opal_argv_copy(papp->env);
}
if (NULL != papp->cwd) {
app->cwd = strdup(papp->cwd);
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -38,6 +38,7 @@
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/routed.h"
#include "orte/util/name_fns.h"
#include "orte/util/threads.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
@ -807,4 +808,3 @@ int orte_util_comm_halt_vm(const orte_process_name_t *hnp)
CLEANUP:
return rc;
}

Просмотреть файл

@ -13,6 +13,7 @@
#include "orte_config.h"
#include "opal/sys/atomic.h"
#include "opal/threads/threads.h"
/* provide macros for forward-proofing the shifting
* of objects between threads - at some point, we
@ -26,6 +27,12 @@
* we only have a memory barrier */
#define ORTE_ACQUIRE_OBJECT(o) opal_atomic_rmb()
#define orte_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread)
typedef pthread_cond_t orte_condition_t;
#define orte_condition_broadcast(a) pthread_cond_broadcast(a)
#define orte_condition_signal(a) pthread_cond_signal(a)
#define ORTE_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER
/* define a threadshift macro */
#define ORTE_THREADSHIFT(x, eb, f, p) \
do { \
@ -35,4 +42,118 @@
opal_event_active(&((x)->ev), OPAL_EV_WRITE, 1); \
} while(0)
typedef struct {
opal_mutex_t mutex;
orte_condition_t cond;
volatile bool active;
} orte_lock_t;
#define ORTE_CONSTRUCT_LOCK(l) \
do { \
OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \
pthread_cond_init(&(l)->cond, NULL); \
(l)->active = true; \
} while(0)
#define ORTE_DESTRUCT_LOCK(l) \
do { \
OBJ_DESTRUCT(&(l)->mutex); \
pthread_cond_destroy(&(l)->cond); \
} while(0)
#if OPAL_ENABLE_DEBUG
#define ORTE_ACQUIRE_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
if (opal_debug_threads) { \
opal_output(0, "Waiting for thread %s:%d", \
__FILE__, __LINE__); \
} \
while ((lck)->active) { \
orte_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
if (opal_debug_threads) { \
opal_output(0, "Thread obtained %s:%d", \
__FILE__, __LINE__); \
} \
(lck)->active = true; \
OPAL_ACQUIRE_OBJECT(lck); \
} while(0)
#else
#define ORTE_ACQUIRE_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
while ((lck)->active) { \
orte_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
(lck)->active = true; \
OPAL_ACQUIRE_OBJECT(lck); \
} while(0)
#endif
#if OPAL_ENABLE_DEBUG
#define ORTE_WAIT_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
if (opal_debug_threads) { \
opal_output(0, "Waiting for thread %s:%d", \
__FILE__, __LINE__); \
} \
while ((lck)->active) { \
orte_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
if (opal_debug_threads) { \
opal_output(0, "Thread obtained %s:%d", \
__FILE__, __LINE__); \
} \
OPAL_ACQUIRE_OBJECT(&lck); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#else
#define ORTE_WAIT_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
while ((lck)->active) { \
orte_condition_wait(&(lck)->cond, &(lck)->mutex); \
} \
OPAL_ACQUIRE_OBJECT(lck); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#endif
#if OPAL_ENABLE_DEBUG
#define ORTE_RELEASE_THREAD(lck) \
do { \
if (opal_debug_threads) { \
opal_output(0, "Releasing thread %s:%d", \
__FILE__, __LINE__); \
} \
(lck)->active = false; \
OPAL_POST_OBJECT(lck); \
orte_condition_broadcast(&(lck)->cond); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#else
#define ORTE_RELEASE_THREAD(lck) \
do { \
(lck)->active = false; \
OPAL_POST_OBJECT(lck); \
orte_condition_broadcast(&(lck)->cond); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#endif
#define ORTE_WAKEUP_THREAD(lck) \
do { \
opal_mutex_lock(&(lck)->mutex); \
(lck)->active = false; \
OPAL_POST_OBJECT(lck); \
orte_condition_broadcast(&(lck)->cond); \
opal_mutex_unlock(&(lck)->mutex); \
} while(0)
#endif /* ORTE_THREADS_H */