Enable the PMIx notification callback system. This currently is only supported by the pmix120 component, which is not selected by default. All other components will ignore error registration requests, and thus do not support debugger attach when launched via mpirun. Note that direct launched applications will support such attachment, but may not do so in a scalable fashion.
Fixes ##1225
Этот коммит содержится в:
родитель
c18af0d61f
Коммит
60a7bc2e50
@ -91,7 +91,7 @@ OMPI_DECLSPEC void __opal_attribute_noreturn__
|
||||
#define OMPI_ERROR_LOG ORTE_ERROR_LOG
|
||||
|
||||
/* Init and finalize objects and operations */
|
||||
#define ompi_rte_init(a, b) orte_init(a, b, ORTE_PROC_MPI)
|
||||
OMPI_DECLSPEC int ompi_rte_init(int *pargc, char ***pargv);
|
||||
#define ompi_rte_finalize() orte_finalize()
|
||||
OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void);
|
||||
|
||||
|
@ -52,6 +52,79 @@
|
||||
|
||||
extern ompi_rte_orte_component_t mca_rte_orte_component;
|
||||
|
||||
typedef struct {
|
||||
volatile bool active;
|
||||
int status;
|
||||
int errhandler;
|
||||
} errhandler_t;
|
||||
|
||||
static void register_cbfunc(int status, int errhndler, void *cbdata)
|
||||
{
|
||||
errhandler_t *cd = (errhandler_t*)cbdata;
|
||||
cd->status = status;
|
||||
cd->errhandler = errhndler;
|
||||
cd->active = false;
|
||||
}
|
||||
|
||||
static volatile bool wait_for_release = true;
|
||||
static int errhandler = -1;
|
||||
|
||||
static void notify_cbfunc(int status,
|
||||
opal_list_t *procs,
|
||||
opal_list_t *info,
|
||||
opal_pmix_release_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(cbdata);
|
||||
}
|
||||
wait_for_release = false;
|
||||
}
|
||||
|
||||
|
||||
int ompi_rte_init(int *pargc, char ***pargv)
|
||||
{
|
||||
int rc;
|
||||
opal_list_t info;
|
||||
opal_value_t val;
|
||||
errhandler_t cd;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_init(pargc, pargv, ORTE_PROC_MPI))) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!orte_standalone_operation) {
|
||||
/* register to receive any debugger release */
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
OBJ_CONSTRUCT(&val, opal_value_t);
|
||||
val.key = strdup(OPAL_PMIX_ERROR_NAME);
|
||||
val.type = OPAL_INT;
|
||||
val.data.integer = OPAL_ERR_DEBUGGER_RELEASE;
|
||||
opal_list_append(&info, &val.super);
|
||||
cd.status = ORTE_ERROR;
|
||||
cd.errhandler = -1;
|
||||
cd.active = true;
|
||||
|
||||
opal_pmix.register_errhandler(&info, notify_cbfunc, register_cbfunc, &cd);
|
||||
|
||||
/* let the MPI progress engine run while we wait for
|
||||
* registration to complete */
|
||||
OMPI_WAIT_FOR_COMPLETION(cd.active);
|
||||
/* safely deconstruct the list */
|
||||
opal_list_remove_first(&info);
|
||||
OBJ_DESTRUCT(&val);
|
||||
OBJ_DESTRUCT(&info);
|
||||
if (OPAL_SUCCESS != cd.status) {
|
||||
/* ouch - we are doomed */
|
||||
ORTE_ERROR_LOG(cd.status);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
errhandler = cd.errhandler;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void ompi_rte_abort(int error_code, char *fmt, ...)
|
||||
{
|
||||
va_list arglist;
|
||||
@ -100,10 +173,10 @@ void ompi_rte_abort(int error_code, char *fmt, ...)
|
||||
* attaching debuggers -- see big comment in
|
||||
* orte/tools/orterun/debuggers.c explaining the two scenarios.
|
||||
*/
|
||||
|
||||
void ompi_rte_wait_for_debugger(void)
|
||||
{
|
||||
int debugger;
|
||||
orte_rml_recv_cb_t xfer;
|
||||
|
||||
/* See lengthy comment in orte/tools/orterun/debuggers.c about
|
||||
orte_in_parallel_debugger */
|
||||
@ -117,12 +190,12 @@ void ompi_rte_wait_for_debugger(void)
|
||||
/* if not, just return */
|
||||
return;
|
||||
}
|
||||
|
||||
/* if we are being debugged, then we need to find
|
||||
* the correct plug-ins
|
||||
*/
|
||||
ompi_debugger_setup_dlls();
|
||||
|
||||
/* wait for the debugger to attach */
|
||||
if (orte_standalone_operation) {
|
||||
/* spin until debugger attaches and releases us */
|
||||
while (MPIR_debug_gate == 0) {
|
||||
@ -133,23 +206,9 @@ void ompi_rte_wait_for_debugger(void)
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
/* only the rank=0 proc waits for either a message from the
|
||||
* HNP or for the debugger to attach - everyone else will just
|
||||
* spin in * the grpcomm barrier in ompi_mpi_init until rank=0
|
||||
* joins them.
|
||||
*/
|
||||
if (0 != ORTE_PROC_MY_NAME->vpid) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* VPID 0 waits for a message from the HNP */
|
||||
OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t);
|
||||
xfer.active = true;
|
||||
orte_rml.recv_buffer_nb(OMPI_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_DEBUGGER_RELEASE,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &xfer);
|
||||
/* let the MPI progress engine run while we wait */
|
||||
OMPI_WAIT_FOR_COMPLETION(xfer.active);
|
||||
/* now wait for the notification to occur */
|
||||
OMPI_WAIT_FOR_COMPLETION(wait_for_release);
|
||||
/* deregister the errhandler */
|
||||
opal_pmix.deregister_errhandler(errhandler, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
@ -10,9 +10,9 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -449,3 +449,12 @@ int opal_dss_compare_jobid(opal_jobid_t *value1,
|
||||
return OPAL_EQUAL;
|
||||
}
|
||||
|
||||
int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type)
|
||||
{
|
||||
if (*value1 > *value2) return OPAL_VALUE1_GREATER;
|
||||
|
||||
if (*value2 > *value1) return OPAL_VALUE2_GREATER;
|
||||
|
||||
return OPAL_EQUAL;
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -61,6 +61,7 @@ int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type)
|
||||
|
||||
case OPAL_INT:
|
||||
case OPAL_UINT:
|
||||
case OPAL_STATUS:
|
||||
datasize = sizeof(int);
|
||||
break;
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
@ -332,6 +332,9 @@ int opal_dss_pack_jobid(opal_buffer_t *buffer, const void *src,
|
||||
int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, opal_data_type_t type);
|
||||
|
||||
int opal_dss_pack_status(opal_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, opal_data_type_t type);
|
||||
|
||||
/*
|
||||
* Internal unpack functions
|
||||
*/
|
||||
@ -401,6 +404,8 @@ int opal_dss_unpack_jobid(opal_buffer_t *buffer, void *dest,
|
||||
int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, opal_data_type_t type);
|
||||
|
||||
int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, opal_data_type_t type);
|
||||
|
||||
/*
|
||||
* Internal copy functions
|
||||
@ -497,6 +502,8 @@ int opal_dss_compare_jobid(opal_jobid_t *value1,
|
||||
opal_jobid_t *value2,
|
||||
opal_data_type_t type);
|
||||
|
||||
int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type);
|
||||
|
||||
/*
|
||||
* Internal print functions
|
||||
*/
|
||||
@ -536,6 +543,7 @@ int opal_dss_print_time(char **output, char *prefix, time_t *src, opal_data_type
|
||||
int opal_dss_print_name(char **output, char *prefix, opal_process_name_t *name, opal_data_type_t type);
|
||||
int opal_dss_print_jobid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type);
|
||||
int opal_dss_print_vpid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type);
|
||||
int opal_dss_print_status(char **output, char *prefix, int *src, opal_data_type_t type);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -611,6 +611,17 @@ int opal_dss_open(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
tmp = OPAL_STATUS;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_status,
|
||||
opal_dss_unpack_status,
|
||||
(opal_dss_copy_fn_t)opal_dss_std_copy,
|
||||
(opal_dss_compare_fn_t)opal_dss_compare_status,
|
||||
(opal_dss_print_fn_t)opal_dss_print_status,
|
||||
OPAL_DSS_UNSTRUCTURED,
|
||||
"OPAL_STATUS", &tmp))) {
|
||||
return rc;
|
||||
}
|
||||
/* All done */
|
||||
|
||||
opal_dss_initialized = true;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -1240,3 +1240,20 @@ int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* STATUS
|
||||
*/
|
||||
int opal_dss_pack_status(opal_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, opal_data_type_t type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Turn around and pack the real type */
|
||||
ret = opal_dss_pack_buffer(buffer, src, num_vals, OPAL_INT);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
OPAL_ERROR_LOG(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -25,6 +25,7 @@
|
||||
#include "opal_stdint.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/dss/dss_internal.h"
|
||||
|
||||
int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type)
|
||||
@ -1060,3 +1061,29 @@ int opal_dss_print_vpid(char **output, char *prefix,
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int opal_dss_print_status(char **output, char *prefix,
|
||||
int *src, opal_data_type_t type)
|
||||
{
|
||||
char *prefx;
|
||||
|
||||
/* deal with NULL prefix */
|
||||
if (NULL == prefix) asprintf(&prefx, " ");
|
||||
else prefx = prefix;
|
||||
|
||||
/* if src is NULL, just print data type and return */
|
||||
if (NULL == src) {
|
||||
asprintf(output, "%sData type: OPAL_STATUS\tValue: NULL pointer", prefx);
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
asprintf(output, "%sData type: OPAL_STATUS\tValue: %s", prefx, opal_strerror(*src));
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -13,9 +13,9 @@
|
||||
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -120,6 +120,8 @@ typedef struct {
|
||||
#define OPAL_NAME (opal_data_type_t) 50
|
||||
#define OPAL_JOBID (opal_data_type_t) 51
|
||||
#define OPAL_VPID (opal_data_type_t) 52
|
||||
#define OPAL_STATUS (opal_data_type_t) 53
|
||||
|
||||
/* OPAL Dynamic */
|
||||
#define OPAL_DSS_ID_DYNAMIC (opal_data_type_t) 100
|
||||
|
||||
@ -245,6 +247,7 @@ typedef struct {
|
||||
float fval;
|
||||
double dval;
|
||||
struct timeval tv;
|
||||
int status;
|
||||
opal_process_name_t name;
|
||||
opal_bool_array_t flag_array;
|
||||
opal_uint8_array_t byte_array;
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -1519,3 +1519,20 @@ int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* STATUS
|
||||
*/
|
||||
int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, opal_data_type_t type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Turn around and unpack the real type */
|
||||
ret = opal_dss_unpack_buffer(buffer, dest, num_vals, OPAL_INT);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
OPAL_ERROR_LOG(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -80,7 +80,8 @@ enum {
|
||||
OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50),
|
||||
OPAL_ERR_COMM_FAILURE = (OPAL_ERR_BASE - 51),
|
||||
OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52),
|
||||
OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53)
|
||||
OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53),
|
||||
OPAL_ERR_DEBUGGER_RELEASE = (OPAL_ERR_BASE - 54)
|
||||
};
|
||||
|
||||
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)
|
||||
|
@ -42,6 +42,7 @@ mca_pmix_pmix112_la_CPPFLAGS = \
|
||||
-I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS)
|
||||
mca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS)
|
||||
mca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS)
|
||||
mca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_pmix_pmix112_la_SOURCES =$(sources)
|
||||
@ -49,3 +50,4 @@ libmca_pmix_pmix112_la_CFLAGS = $(opal_pmix_pmix112_CFLAGS)
|
||||
libmca_pmix_pmix112_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS)
|
||||
libmca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS)
|
||||
libmca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS)
|
||||
libmca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD)
|
||||
|
@ -431,6 +431,7 @@ int pmix1_server_notify_error(int status,
|
||||
op->cbdata = cbdata;
|
||||
|
||||
rc = pmix1_convert_opalrc(status);
|
||||
opal_output(0, "CALLING NOTIFY ERROR");
|
||||
rc = PMIx_Notify_error(rc, ps, psz, eps, esz,
|
||||
pinfo, sz, opcbfunc, op);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
@ -42,6 +42,7 @@ mca_pmix_pmix120_la_CPPFLAGS = \
|
||||
-I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS)
|
||||
mca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS)
|
||||
mca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS)
|
||||
mca_pmix_pmix120_la_DEPENDENCIES = $(mca_pmix_pmix120_la_LIBADD)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_pmix_pmix120_la_SOURCES =$(sources)
|
||||
@ -49,3 +50,4 @@ libmca_pmix_pmix120_la_CFLAGS = $(opal_pmix_pmix120_CFLAGS)
|
||||
libmca_pmix_pmix120_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS)
|
||||
libmca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS)
|
||||
libmca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS)
|
||||
libmca_pmix_pmix120_la_DEPENDENCIES = $(libmca_pmix_pmix120_la_LIBADD)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
@ -63,8 +63,18 @@ BEGIN_C_DECLS
|
||||
#define PMIX_MAX_NSLEN 255
|
||||
#define PMIX_MAX_KEYLEN 511
|
||||
|
||||
/* define a *wildcard* value for requests involving rank */
|
||||
#define PMIX_RANK_WILDCARD -1
|
||||
/* define a value for requests for job-level data
|
||||
* where the info itself isn't associated with any
|
||||
* specific rank, or when a request involves
|
||||
* a rank that isn't known - e.g., when someone requests
|
||||
* info thru one of the legacy interfaces where the rank
|
||||
* is typically encoded into the key itself since there is
|
||||
* no rank parameter in the API itself */
|
||||
#define PMIX_RANK_UNDEF INT32_MAX
|
||||
/* define a value to indicate that the user wants the
|
||||
* data for the given key from every rank that posted
|
||||
* that key */
|
||||
#define PMIX_RANK_WILDCARD INT32_MAX-1
|
||||
|
||||
/* define a set of "standard" PMIx attributes that can
|
||||
* be queried. Implementations (and users) are free to extend as
|
||||
@ -199,7 +209,7 @@ BEGIN_C_DECLS
|
||||
|
||||
/**** PMIX ERROR CONSTANTS ****/
|
||||
/* PMIx errors are always negative, with 0 reserved for success */
|
||||
#define PMIX_ERROR_MIN -50 // set equal to number of non-zero entries in enum
|
||||
#define PMIX_ERROR_MIN -52 // set equal to number of non-zero entries in enum
|
||||
|
||||
typedef enum {
|
||||
PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER = PMIX_ERROR_MIN,
|
||||
@ -257,6 +267,8 @@ typedef enum {
|
||||
PMIX_ERR_SILENT,
|
||||
PMIX_ERROR,
|
||||
|
||||
PMIX_ERR_GRP_FOUND,
|
||||
PMIX_ERR_DFLT_FOUND,
|
||||
PMIX_SUCCESS
|
||||
} pmix_status_t;
|
||||
|
||||
@ -288,6 +300,9 @@ typedef enum {
|
||||
PMIX_TIMEVAL,
|
||||
PMIX_TIME,
|
||||
|
||||
PMIX_STATUS, // needs to be tracked separately from integer for those times
|
||||
// when we are embedded and it needs to be converted to the
|
||||
// host error definitions
|
||||
PMIX_HWLOC_TOPO,
|
||||
PMIX_VALUE,
|
||||
PMIX_INFO_ARRAY,
|
||||
@ -411,6 +426,7 @@ typedef struct {
|
||||
float fval;
|
||||
double dval;
|
||||
struct timeval tv;
|
||||
pmix_status_t status;
|
||||
pmix_info_array_t array;
|
||||
pmix_byte_object_t bo;
|
||||
} data;
|
||||
@ -494,6 +510,7 @@ extern void pmix_value_load(pmix_value_t *v, void *data,
|
||||
/**** PMIX INFO STRUCT ****/
|
||||
typedef struct {
|
||||
char key[PMIX_MAX_KEYLEN+1]; // ensure room for the NULL terminator
|
||||
bool required; // defaults to optional (i.e., required=false)
|
||||
pmix_value_t value;
|
||||
} pmix_info_t;
|
||||
|
||||
@ -531,6 +548,10 @@ typedef struct {
|
||||
(void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \
|
||||
pmix_value_load(&((m)->value), (v), (t)); \
|
||||
} while(0);
|
||||
#define PMIX_INFO_REQUIRED(m) \
|
||||
(m)->required = true;
|
||||
#define PMIX_INFO_OPTIONAL(m) \
|
||||
(m)->required = false;
|
||||
|
||||
|
||||
/**** PMIX LOOKUP RETURN STRUCT ****/
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -136,6 +136,10 @@ int pmix_bfrop_std_copy(void **dest, void *src, pmix_data_type_t type)
|
||||
datasize = sizeof(time_t);
|
||||
break;
|
||||
|
||||
case PMIX_STATUS:
|
||||
datasize = sizeof(pmix_status_t);
|
||||
break;
|
||||
|
||||
default:
|
||||
return PMIX_ERR_UNKNOWN_DATA_TYPE;
|
||||
}
|
||||
@ -166,7 +170,7 @@ int pmix_bfrop_copy_string(char **dest, char *src, pmix_data_type_t type)
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
/* compare function for pmix_value_t*/
|
||||
/* compare function for pmix_value_t */
|
||||
bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1)
|
||||
{
|
||||
bool rc = false;
|
||||
@ -213,6 +217,9 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1)
|
||||
case PMIX_STRING:
|
||||
rc = strcmp(p->data.string, p1->data.string);
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
rc = (p->data.status == p1->data.status);
|
||||
break;
|
||||
default:
|
||||
pmix_output(0, "COMPARE-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type);
|
||||
}
|
||||
@ -293,6 +300,9 @@ pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src)
|
||||
p->data.tv.tv_sec = src->data.tv.tv_sec;
|
||||
p->data.tv.tv_usec = src->data.tv.tv_usec;
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t));
|
||||
break;
|
||||
case PMIX_INFO_ARRAY:
|
||||
p->data.array.size = src->data.array.size;
|
||||
if (0 < src->data.array.size) {
|
||||
@ -343,6 +353,7 @@ int pmix_bfrop_copy_info(pmix_info_t **dest, pmix_info_t *src,
|
||||
{
|
||||
*dest = (pmix_info_t*)malloc(sizeof(pmix_info_t));
|
||||
(void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN);
|
||||
(*dest)->required = src->required;
|
||||
return pmix_value_xfer(&(*dest)->value, &src->value);
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -275,6 +275,8 @@ int pmix_bfrop_pack_timeval(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
int pmix_bfrop_pack_topo(pmix_buffer_t *buffer, const void *src,
|
||||
@ -337,6 +339,8 @@ int pmix_bfrop_unpack_timeval(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
int pmix_bfrop_unpack_topo(pmix_buffer_t *buffer, void *dest,
|
||||
@ -427,6 +431,7 @@ int pmix_bfrop_print_double(char **output, char *prefix, double *src, pmix_data_
|
||||
|
||||
int pmix_bfrop_print_timeval(char **output, char *prefix, struct timeval *src, pmix_data_type_t type);
|
||||
int pmix_bfrop_print_time(char **output, char *prefix, time_t *src, pmix_data_type_t type);
|
||||
int pmix_bfrop_print_status(char **output, char *prefix, pmix_status_t *src, pmix_data_type_t type);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
int pmix_bfrop_print_topo(char **output, char *prefix,
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -289,6 +289,12 @@ pmix_status_t pmix_bfrop_open(void)
|
||||
pmix_bfrop_std_copy,
|
||||
pmix_bfrop_print_time);
|
||||
|
||||
PMIX_REGISTER_TYPE("PMIX_STATUS", PMIX_STATUS,
|
||||
pmix_bfrop_pack_status,
|
||||
pmix_bfrop_unpack_status,
|
||||
pmix_bfrop_std_copy,
|
||||
pmix_bfrop_print_status);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
PMIX_REGISTER_TYPE("PMIX_HWLOC_TOPO", PMIX_HWLOC_TOPO,
|
||||
pmix_bfrop_pack_topo,
|
||||
@ -395,6 +401,8 @@ pmix_status_t pmix_bfrop_close(void)
|
||||
void pmix_value_load(pmix_value_t *v, void *data,
|
||||
pmix_data_type_t type)
|
||||
{
|
||||
pmix_byte_object_t *bo;
|
||||
|
||||
v->type = type;
|
||||
if (NULL == data) {
|
||||
/* just set the fields to zero */
|
||||
@ -457,9 +465,13 @@ void pmix_value_load(pmix_value_t *v, void *data,
|
||||
case PMIX_TIMEVAL:
|
||||
memcpy(&(v->data.tv), data, sizeof(struct timeval));
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
memcpy(&(v->data.status), data, sizeof(pmix_status_t));
|
||||
break;
|
||||
case PMIX_BYTE_OBJECT:
|
||||
v->data.bo.bytes = data;
|
||||
memcpy(&(v->data.bo.size), data, sizeof(size_t));
|
||||
bo = (pmix_byte_object_t*)data;
|
||||
v->data.bo.bytes = bo->bytes;
|
||||
memcpy(&(v->data.bo.size), &bo->size, sizeof(size_t));
|
||||
break;
|
||||
case PMIX_TIME:
|
||||
case PMIX_HWLOC_TOPO:
|
||||
@ -569,6 +581,10 @@ pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data,
|
||||
memcpy(*data, &(kv->data.tv), sizeof(struct timeval));
|
||||
*sz = sizeof(struct timeval);
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
memcpy(*data, &(kv->data.status), sizeof(pmix_status_t));
|
||||
*sz = sizeof(pmix_status_t);
|
||||
break;
|
||||
case PMIX_BYTE_OBJECT:
|
||||
if (NULL != kv->data.bo.bytes && 0 < kv->data.bo.size) {
|
||||
*data = kv->data.bo.bytes;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
@ -406,6 +406,26 @@ int pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src,
|
||||
}
|
||||
|
||||
|
||||
/* STATUS */
|
||||
int pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type)
|
||||
{
|
||||
int ret = PMIX_SUCCESS;
|
||||
int32_t i;
|
||||
pmix_status_t *ssrc = (pmix_status_t *)src;
|
||||
int32_t status;
|
||||
|
||||
for (i = 0; i < num_vals; ++i) {
|
||||
status = (int32_t)ssrc[i];
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &status, 1, PMIX_INT32))) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/* PACK FUNCTIONS FOR GENERIC PMIX TYPES */
|
||||
static int pack_val(pmix_buffer_t *buffer,
|
||||
pmix_value_t *p)
|
||||
@ -503,6 +523,11 @@ static int pack_val(pmix_buffer_t *buffer,
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.status, 1, PMIX_STATUS))) {
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case PMIX_INFO_ARRAY:
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.array, 1, PMIX_INFO_ARRAY))) {
|
||||
return ret;
|
||||
@ -563,6 +588,10 @@ int pmix_bfrop_pack_info(pmix_buffer_t *buffer, const void *src,
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &foo, 1, PMIX_STRING))) {
|
||||
return ret;
|
||||
}
|
||||
/* pack required flag */
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_bool(buffer, &info[i].required, 1, PMIX_BOOL))) {
|
||||
return ret;
|
||||
}
|
||||
/* pack the type */
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &info[i].value.type, 1, PMIX_INT))) {
|
||||
return ret;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,6 +27,7 @@
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#include "src/util/error.h"
|
||||
#include "src/buffer_ops/internal.h"
|
||||
|
||||
int pmix_bfrop_print(char **output, char *prefix, void *src, pmix_data_type_t type)
|
||||
@ -540,6 +541,32 @@ int pmix_bfrop_print_timeval(char **output, char *prefix,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
int pmix_bfrop_print_status(char **output, char *prefix,
|
||||
pmix_status_t *src, pmix_data_type_t type)
|
||||
{
|
||||
char *prefx;
|
||||
|
||||
/* deal with NULL prefix */
|
||||
if (NULL == prefix) asprintf(&prefx, " ");
|
||||
else prefx = prefix;
|
||||
|
||||
/* if src is NULL, just print data type and return */
|
||||
if (NULL == src) {
|
||||
asprintf(output, "%sData type: PMIX_STATUS\tValue: NULL pointer", prefx);
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
asprintf(output, "%sData type: PMIX_STATUS\tValue: %s", prefx, PMIx_Error_string(*src));
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
/* PRINT FUNCTIONS FOR GENERIC PMIX TYPES */
|
||||
|
||||
/*
|
||||
@ -632,6 +659,10 @@ int pmix_bfrop_print_value(char **output, char *prefix,
|
||||
asprintf(output, "%sPMIX_VALUE: Data type: PMIX_TIMEVAL\tValue: %ld.%06ld", prefx,
|
||||
(long)src->data.tv.tv_sec, (long)src->data.tv.tv_usec);
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STATUS\tValue: %s", prefx,
|
||||
PMIx_Error_string(src->data.status));
|
||||
break;
|
||||
default:
|
||||
asprintf(output, "%sPMIX_VALUE: Data type: UNKNOWN\tValue: UNPRINTABLE", prefx);
|
||||
break;
|
||||
@ -648,8 +679,8 @@ int pmix_bfrop_print_info(char **output, char *prefix,
|
||||
char *tmp;
|
||||
|
||||
pmix_bfrop_print_value(&tmp, NULL, &src->value, PMIX_VALUE);
|
||||
asprintf(output, "%sKEY: %s %s", prefix, src->key,
|
||||
(NULL == tmp) ? "NULL" : tmp);
|
||||
asprintf(output, "%sKEY: %s REQD: %s %s", prefix, src->key,
|
||||
src->required ? "Y" : "N", (NULL == tmp) ? "PMIX_VALUE: NULL" : tmp);
|
||||
if (NULL != tmp) {
|
||||
free(tmp);
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
@ -500,6 +500,20 @@ int pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest,
|
||||
}
|
||||
|
||||
|
||||
int pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type)
|
||||
{
|
||||
pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_status * %d\n", (int)*num_vals);
|
||||
/* check to see if there's enough data in buffer */
|
||||
if (pmix_bfrop_too_small(buffer, (*num_vals)*(sizeof(pmix_status_t)))) {
|
||||
return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER;
|
||||
}
|
||||
|
||||
/* unpack the data */
|
||||
return pmix_bfrop_unpack_int32(buffer, dest, num_vals, PMIX_INT32);
|
||||
}
|
||||
|
||||
|
||||
/* UNPACK FUNCTIONS FOR GENERIC PMIX TYPES */
|
||||
|
||||
/*
|
||||
@ -672,6 +686,11 @@ int pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest,
|
||||
}
|
||||
(void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN);
|
||||
free(tmp);
|
||||
/* unpack the required flag */
|
||||
m=1;
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_bool(buffer, &ptr[i].required, &m, PMIX_BOOL))) {
|
||||
return ret;
|
||||
}
|
||||
/* unpack value - since the value structure is statically-defined
|
||||
* instead of a pointer in this struct, we directly unpack it to
|
||||
* avoid the malloc */
|
||||
|
@ -10,7 +10,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
# Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -26,10 +26,14 @@ headers += \
|
||||
src/class/pmix_object.h \
|
||||
src/class/pmix_list.h \
|
||||
src/class/pmix_pointer_array.h \
|
||||
src/class/pmix_hash_table.h
|
||||
src/class/pmix_hash_table.h \
|
||||
src/class/pmix_hotel.h \
|
||||
src/class/pmix_ring_buffer.h
|
||||
|
||||
sources += \
|
||||
src/class/pmix_object.c \
|
||||
src/class/pmix_list.c \
|
||||
src/class/pmix_pointer_array.c \
|
||||
src/class/pmix_hash_table.c
|
||||
src/class/pmix_hash_table.c \
|
||||
src/class/pmix_hotel.c \
|
||||
src/class/pmix_ring_buffer.c
|
||||
|
136
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.c
Обычный файл
136
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.c
Обычный файл
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include PMIX_EVENT_HEADER
|
||||
#include "src/class/pmix_hotel.h"
|
||||
|
||||
|
||||
static void local_eviction_callback(int fd, short flags, void *arg)
|
||||
{
|
||||
pmix_hotel_room_eviction_callback_arg_t *eargs =
|
||||
(pmix_hotel_room_eviction_callback_arg_t*) arg;
|
||||
void *occupant = eargs->hotel->rooms[eargs->room_num].occupant;
|
||||
|
||||
/* Remove the occurpant from the room.
|
||||
|
||||
Do not change this logic without also changing the same logic
|
||||
in pmix_hotel_checkout() and
|
||||
pmix_hotel_checkout_and_return_occupant(). */
|
||||
pmix_hotel_t *hotel = eargs->hotel;
|
||||
pmix_hotel_room_t *room = &(hotel->rooms[eargs->room_num]);
|
||||
room->occupant = NULL;
|
||||
hotel->last_unoccupied_room++;
|
||||
assert(hotel->last_unoccupied_room < hotel->num_rooms);
|
||||
hotel->unoccupied_rooms[hotel->last_unoccupied_room] = eargs->room_num;
|
||||
|
||||
/* Invoke the user callback to tell them that they were evicted */
|
||||
hotel->evict_callback_fn(hotel,
|
||||
eargs->room_num,
|
||||
occupant);
|
||||
}
|
||||
|
||||
|
||||
int pmix_hotel_init(pmix_hotel_t *h, int num_rooms,
|
||||
pmix_event_base_t *evbase,
|
||||
uint32_t eviction_timeout,
|
||||
int eviction_event_priority,
|
||||
pmix_hotel_eviction_callback_fn_t evict_callback_fn)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Bozo check */
|
||||
if (num_rooms <= 0 ||
|
||||
NULL == evict_callback_fn) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
h->num_rooms = num_rooms;
|
||||
h->evbase = evbase;
|
||||
h->eviction_timeout.tv_usec = eviction_timeout % 1000000;
|
||||
h->eviction_timeout.tv_sec = eviction_timeout / 1000000;
|
||||
h->evict_callback_fn = evict_callback_fn;
|
||||
h->rooms = (pmix_hotel_room_t*)malloc(num_rooms * sizeof(pmix_hotel_room_t));
|
||||
if (NULL != evict_callback_fn) {
|
||||
h->eviction_args =
|
||||
(pmix_hotel_room_eviction_callback_arg_t*)malloc(num_rooms * sizeof(pmix_hotel_room_eviction_callback_arg_t));
|
||||
}
|
||||
h->unoccupied_rooms = (int*) malloc(num_rooms * sizeof(int));
|
||||
h->last_unoccupied_room = num_rooms - 1;
|
||||
|
||||
for (i = 0; i < num_rooms; ++i) {
|
||||
/* Mark this room as unoccupied */
|
||||
h->rooms[i].occupant = NULL;
|
||||
|
||||
/* Setup this room in the unoccupied index array */
|
||||
h->unoccupied_rooms[i] = i;
|
||||
|
||||
/* Setup the eviction callback args */
|
||||
h->eviction_args[i].hotel = h;
|
||||
h->eviction_args[i].room_num = i;
|
||||
|
||||
/* Create this room's event (but don't add it) */
|
||||
if (NULL != h->evbase) {
|
||||
event_assign(&(h->rooms[i].eviction_timer_event),
|
||||
h->evbase,
|
||||
-1, 0, local_eviction_callback,
|
||||
&(h->eviction_args[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void constructor(pmix_hotel_t *h)
|
||||
{
|
||||
h->num_rooms = 0;
|
||||
h->evbase = NULL;
|
||||
h->eviction_timeout.tv_sec = 0;
|
||||
h->eviction_timeout.tv_usec = 0;
|
||||
h->evict_callback_fn = NULL;
|
||||
h->rooms = NULL;
|
||||
h->eviction_args = NULL;
|
||||
h->unoccupied_rooms = NULL;
|
||||
h->last_unoccupied_room = -1;
|
||||
}
|
||||
|
||||
static void destructor(pmix_hotel_t *h)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Go through all occupied rooms and destroy their events */
|
||||
if (NULL != h->evbase) {
|
||||
for (i = 0; i < h->num_rooms; ++i) {
|
||||
if (NULL != h->rooms[i].occupant) {
|
||||
event_del(&(h->rooms[i].eviction_timer_event));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != h->rooms) {
|
||||
free(h->rooms);
|
||||
}
|
||||
if (NULL != h->eviction_args) {
|
||||
free(h->eviction_args);
|
||||
}
|
||||
if (NULL != h->unoccupied_rooms) {
|
||||
free(h->unoccupied_rooms);
|
||||
}
|
||||
}
|
||||
|
||||
PMIX_CLASS_INSTANCE(pmix_hotel_t,
|
||||
pmix_object_t,
|
||||
constructor,
|
||||
destructor);
|
354
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.h
Обычный файл
354
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.h
Обычный файл
@ -0,0 +1,354 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/** @file
|
||||
*
|
||||
* This file provides a "hotel" class:
|
||||
*
|
||||
* - A hotel has a fixed number of rooms (i.e., storage slots)
|
||||
* - An arbitrary data pointer can check into an empty room at any time
|
||||
* - The occupant of a room can check out at any time
|
||||
* - Optionally, the occupant of a room can be forcibly evicted at a
|
||||
* given time (i.e., when an pmix timer event expires).
|
||||
* - The hotel has finite occupancy; if you try to checkin a new
|
||||
* occupant and the hotel is already full, it will gracefully fail
|
||||
* to checkin.
|
||||
*
|
||||
* One use case for this class is for ACK-based network retransmission
|
||||
* schemes (NACK-based retransmission schemes probably can use
|
||||
* pmix_ring_buffer).
|
||||
*
|
||||
* For ACK-based retransmission schemes, a hotel might be used
|
||||
* something like this:
|
||||
*
|
||||
* - when a message is sent, check it in to a hotel with a timer
|
||||
* - if an ACK is received, check it out of the hotel (which also cancels
|
||||
* the timer)
|
||||
* - if an ACK isn't received in time, the timer will expire and the
|
||||
* upper layer will get a callback with the message
|
||||
* - if an ACK is received late (i.e., after its timer has expired),
|
||||
* then checkout will gracefully fail
|
||||
*
|
||||
* Note that this class intentionally provides pretty minimal
|
||||
* functionality. It is intended to be used in performance-critical
|
||||
* code paths -- extra functionality would simply add latency.
|
||||
*
|
||||
* There is an pmix_hotel_init() function to create a hotel, but no
|
||||
* corresponding finalize; the destructor will handle all finalization
|
||||
* issues. Note that when a hotel is destroyed, it will delete all
|
||||
* pending events from the event base (i.e., all pending eviction
|
||||
* callbacks); no further eviction callbacks will be invoked.
|
||||
*/
|
||||
|
||||
#ifndef PMIX_HOTEL_H
|
||||
#define PMIX_HOTEL_H
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
#include "private/types.h"
|
||||
#include "private/prefetch.h"
|
||||
#include "pmix/pmix_common.h"
|
||||
#include "src/class/pmix_object.h"
|
||||
#include PMIX_EVENT_HEADER
|
||||
#include <pmix/rename.h>
|
||||
#include "src/util/output.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct pmix_hotel_t;
|
||||
|
||||
/* User-supplied function to be invoked when an occupant is evicted. */
|
||||
typedef void (*pmix_hotel_eviction_callback_fn_t)(struct pmix_hotel_t *hotel,
|
||||
int room_num,
|
||||
void *occupant);
|
||||
|
||||
/* Note that this is an internal data structure; it is not part of the
|
||||
public pmix_hotel interface. Public consumers of pmix_hotel
|
||||
shouldn't need to use this struct at all (we only have it here in
|
||||
this .h file because some functions are inlined for speed, and need
|
||||
to get to the internals of this struct).
|
||||
|
||||
The room struct should be as small as possible to be cache
|
||||
friendly. Specifically: it would be great if multiple rooms could
|
||||
fit in a single cache line because we'll always allocate a
|
||||
contiguous set of rooms in an array. */
|
||||
typedef struct {
|
||||
void *occupant;
|
||||
pmix_event_t eviction_timer_event;
|
||||
} pmix_hotel_room_t;
|
||||
|
||||
/* Note that this is an internal data structure; it is not part of the
|
||||
public pmix_hotel interface. Public consumers of pmix_hotel
|
||||
shouldn't need to use this struct at all (we only have it here in
|
||||
this .h file because some functions are inlined for speed, and need
|
||||
to get to the internals of this struct).
|
||||
|
||||
Use a unique struct for holding the arguments for eviction
|
||||
callbacks. We *could* make the to-be-evicted pmix_hotel_room_t
|
||||
instance as the argument, but we don't, for 2 reasons:
|
||||
|
||||
1. We want as many pmix_hotel_room_t's to fit in a cache line as
|
||||
possible (i.e., to be as cache-friendly as possible). The
|
||||
common/fast code path only needs to access the data in the
|
||||
pmix_hotel_room_t (and not the callback argument data).
|
||||
|
||||
2. Evictions will be uncommon, so we don't mind penalizing them a
|
||||
bit by making the data be in a separate cache line.
|
||||
*/
|
||||
typedef struct {
|
||||
struct pmix_hotel_t *hotel;
|
||||
int room_num;
|
||||
} pmix_hotel_room_eviction_callback_arg_t;
|
||||
|
||||
typedef struct pmix_hotel_t {
|
||||
/* make this an object */
|
||||
pmix_object_t super;
|
||||
|
||||
/* Max number of rooms in the hotel */
|
||||
int num_rooms;
|
||||
|
||||
/* event base to be used for eviction timeout */
|
||||
pmix_event_base_t *evbase;
|
||||
struct timeval eviction_timeout;
|
||||
pmix_hotel_eviction_callback_fn_t evict_callback_fn;
|
||||
|
||||
/* All rooms in this hotel */
|
||||
pmix_hotel_room_t *rooms;
|
||||
|
||||
/* Separate array for all the eviction callback arguments (see
|
||||
rationale above for why this is a separate array) */
|
||||
pmix_hotel_room_eviction_callback_arg_t *eviction_args;
|
||||
|
||||
/* All currently unoccupied rooms in this hotel (not necessarily
|
||||
in any particular order) */
|
||||
int *unoccupied_rooms;
|
||||
int last_unoccupied_room;
|
||||
} pmix_hotel_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_hotel_t);
|
||||
|
||||
/**
|
||||
* Initialize the hotel.
|
||||
*
|
||||
* @param hotel Pointer to a hotel (IN)
|
||||
* @param num_rooms The total number of rooms in the hotel (IN)
|
||||
* @param evbase Pointer to event base used for eviction timeout
|
||||
* @param eviction_timeout Max length of a stay at the hotel before
|
||||
* the eviction callback is invoked (in microseconds)
|
||||
* @param eviction_event_priority Event lib priority for the eviction timeout
|
||||
* @param evict_callback_fn Callback function invoked if an occupant
|
||||
* does not check out before the eviction_timeout.
|
||||
*
|
||||
* NOTE: If the callback function is NULL, then no eviction timer
|
||||
* will be set - occupants will remain checked into the hotel until
|
||||
* explicitly checked out.
|
||||
*
|
||||
* Also note: the eviction_callback_fn should absolutely not call any
|
||||
* of the hotel checkout functions. Specifically: the occupant has
|
||||
* already been ("forcibly") checked out *before* the
|
||||
* eviction_callback_fn is invoked.
|
||||
*
|
||||
* @return PMIX_SUCCESS if all initializations were succesful. Otherwise,
|
||||
* the error indicate what went wrong in the function.
|
||||
*/
|
||||
PMIX_DECLSPEC int pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms,
|
||||
pmix_event_base_t *evbase,
|
||||
uint32_t eviction_timeout,
|
||||
int eviction_event_priority,
|
||||
pmix_hotel_eviction_callback_fn_t evict_callback_fn);
|
||||
|
||||
/**
|
||||
* Check in an occupant to the hotel.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param occupant Occupant to check in (opaque to the hotel) (IN)
|
||||
* @param room The room number that identifies this occupant in the
|
||||
* hotel (OUT).
|
||||
*
|
||||
* If there is room in the hotel, the occupant is checked in and the
|
||||
* timer for that occupant is started. The occupant's room is
|
||||
* returned in the "room" param.
|
||||
*
|
||||
* Note that once a room's checkout_expire timer expires, the occupant
|
||||
* is forcibly checked out, and then the eviction callback is invoked.
|
||||
*
|
||||
* @return PMIX_SUCCESS if the occupant is successfully checked in,
|
||||
* and the room parameter will contain a valid value.
|
||||
* @return PMIX_ERR_TEMP_OUT_OF_RESOURCE is the hotel is full. Try
|
||||
* again later.
|
||||
*/
|
||||
static inline int pmix_hotel_checkin(pmix_hotel_t *hotel,
|
||||
void *occupant,
|
||||
int *room_num)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Do we have any rooms available? */
|
||||
if (PMIX_UNLIKELY(hotel->last_unoccupied_room < 0)) {
|
||||
return PMIX_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Put this occupant into the first empty room that we have */
|
||||
*room_num = hotel->unoccupied_rooms[hotel->last_unoccupied_room--];
|
||||
room = &(hotel->rooms[*room_num]);
|
||||
room->occupant = occupant;
|
||||
|
||||
/* Assign the event and make it pending */
|
||||
if (NULL != hotel->evbase) {
|
||||
event_add(&(room->eviction_timer_event),
|
||||
&(hotel->eviction_timeout));
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as pmix_hotel_checkin(), but slightly optimized for when the
|
||||
* caller *knows* that there is a room available.
|
||||
*/
|
||||
static inline void pmix_hotel_checkin_with_res(pmix_hotel_t *hotel,
|
||||
void *occupant,
|
||||
int *room_num)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Put this occupant into the first empty room that we have */
|
||||
*room_num = hotel->unoccupied_rooms[hotel->last_unoccupied_room--];
|
||||
room = &(hotel->rooms[*room_num]);
|
||||
assert(room->occupant == NULL);
|
||||
room->occupant = occupant;
|
||||
|
||||
/* Assign the event and make it pending */
|
||||
if (NULL != hotel->evbase) {
|
||||
event_add(&(room->eviction_timer_event),
|
||||
&(hotel->eviction_timeout));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the specified occupant out of the hotel.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param room Room number to checkout (IN)
|
||||
*
|
||||
* If there is an occupant in the room, their timer is canceled and
|
||||
* they are checked out.
|
||||
*
|
||||
* Nothing is returned (as a minor optimization).
|
||||
*/
|
||||
static inline void pmix_hotel_checkout(pmix_hotel_t *hotel, int room_num)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Bozo check */
|
||||
assert(room_num < hotel->num_rooms);
|
||||
|
||||
/* If there's an occupant in the room, check them out */
|
||||
room = &(hotel->rooms[room_num]);
|
||||
if (PMIX_LIKELY(NULL != room->occupant)) {
|
||||
/* Do not change this logic without also changing the same
|
||||
logic in pmix_hotel_checkout_and_return_occupant() and
|
||||
pmix_hotel.c:local_eviction_callback(). */
|
||||
room->occupant = NULL;
|
||||
if (NULL != hotel->evbase) {
|
||||
event_del(&(room->eviction_timer_event));
|
||||
}
|
||||
hotel->last_unoccupied_room++;
|
||||
assert(hotel->last_unoccupied_room < hotel->num_rooms);
|
||||
hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num;
|
||||
}
|
||||
|
||||
/* Don't bother returning whether we actually checked someone out
|
||||
or not (because this is in the critical performance path) --
|
||||
assume the upper layer knows what it's doing. */
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the specified occupant out of the hotel and return the occupant.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param room Room number to checkout (IN)
|
||||
* @param void * occupant (OUT)
|
||||
* If there is an occupant in the room, their timer is canceled and
|
||||
* they are checked out.
|
||||
*
|
||||
* Use this checkout and when caller needs the occupant
|
||||
*/
|
||||
static inline void pmix_hotel_checkout_and_return_occupant(pmix_hotel_t *hotel, int room_num, void **occupant)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Bozo check */
|
||||
assert(room_num < hotel->num_rooms);
|
||||
|
||||
/* If there's an occupant in the room, check them out */
|
||||
room = &(hotel->rooms[room_num]);
|
||||
if (PMIX_LIKELY(NULL != room->occupant)) {
|
||||
pmix_output (10, "checking out occupant %p from room num %d", room->occupant, room_num);
|
||||
/* Do not change this logic without also changing the same
|
||||
logic in pmix_hotel_checkout() and
|
||||
pmix_hotel.c:local_eviction_callback(). */
|
||||
*occupant = room->occupant;
|
||||
room->occupant = NULL;
|
||||
if (NULL != hotel->evbase) {
|
||||
event_del(&(room->eviction_timer_event));
|
||||
}
|
||||
hotel->last_unoccupied_room++;
|
||||
assert(hotel->last_unoccupied_room < hotel->num_rooms);
|
||||
hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num;
|
||||
}
|
||||
else {
|
||||
*occupant = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the hotel is empty (no occupant)
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @return bool true if empty false if there is a occupant(s)
|
||||
*
|
||||
*/
|
||||
static inline bool pmix_hotel_is_empty (pmix_hotel_t *hotel)
|
||||
{
|
||||
if (hotel->last_unoccupied_room == hotel->num_rooms - 1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Access the occupant of a room, but leave them checked into their room.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param room Room number to checkout (IN)
|
||||
* @param void * occupant (OUT)
|
||||
*
|
||||
* This accessor function is typically used to cycle across the occupants
|
||||
* to check for someone already present that matches a description.
|
||||
*/
|
||||
static inline void pmix_hotel_knock(pmix_hotel_t *hotel, int room_num, void **occupant)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Bozo check */
|
||||
assert(room_num < hotel->num_rooms);
|
||||
|
||||
*occupant = NULL;
|
||||
|
||||
/* If there's an occupant in the room, have them come to the door */
|
||||
room = &(hotel->rooms[room_num]);
|
||||
if (PMIX_LIKELY(NULL != room->occupant)) {
|
||||
pmix_output (10, "occupant %p in room num %d responded to knock", room->occupant, room_num);
|
||||
*occupant = room->occupant;
|
||||
}
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* PMIX_HOTEL_H */
|
154
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.c
Обычный файл
154
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.c
Обычный файл
@ -0,0 +1,154 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "pmix/pmix_common.h"
|
||||
#include "src/class/pmix_ring_buffer.h"
|
||||
#include "src/util/output.h"
|
||||
|
||||
static void pmix_ring_buffer_construct(pmix_ring_buffer_t *);
|
||||
static void pmix_ring_buffer_destruct(pmix_ring_buffer_t *);
|
||||
|
||||
PMIX_CLASS_INSTANCE(pmix_ring_buffer_t, pmix_object_t,
|
||||
pmix_ring_buffer_construct,
|
||||
pmix_ring_buffer_destruct);
|
||||
|
||||
/*
|
||||
* pmix_ring_buffer constructor
|
||||
*/
|
||||
static void pmix_ring_buffer_construct(pmix_ring_buffer_t *ring)
|
||||
{
|
||||
ring->head = 0;
|
||||
ring->tail = -1;
|
||||
ring->size = 0;
|
||||
ring->addr = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* pmix_ring_buffer destructor
|
||||
*/
|
||||
static void pmix_ring_buffer_destruct(pmix_ring_buffer_t *ring)
|
||||
{
|
||||
if( NULL != ring->addr) {
|
||||
free(ring->addr);
|
||||
ring->addr = NULL;
|
||||
}
|
||||
|
||||
ring->size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize a ring object
|
||||
*/
|
||||
int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size)
|
||||
{
|
||||
/* check for errors */
|
||||
if (NULL == ring) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Allocate and set the ring to NULL */
|
||||
ring->addr = (char **)calloc(size * sizeof(char*), 1);
|
||||
if (NULL == ring->addr) { /* out of memory */
|
||||
return PMIX_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
ring->size = size;
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr)
|
||||
{
|
||||
char *p=NULL;
|
||||
|
||||
if (NULL != ring->addr[ring->head]) {
|
||||
p = (char*)ring->addr[ring->head];
|
||||
if (ring->tail == ring->size - 1) {
|
||||
ring->tail = 0;
|
||||
} else {
|
||||
ring->tail = ring->head + 1;
|
||||
}
|
||||
}
|
||||
ring->addr[ring->head] = (char*)ptr;
|
||||
if (ring->tail < 0) {
|
||||
ring->tail = ring->head;
|
||||
}
|
||||
if (ring->head == ring->size - 1) {
|
||||
ring->head = 0;
|
||||
} else {
|
||||
ring->head++;
|
||||
}
|
||||
return (void*)p;
|
||||
}
|
||||
|
||||
void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring)
|
||||
{
|
||||
char *p=NULL;
|
||||
|
||||
if (-1 == ring->tail) {
|
||||
/* nothing has been put on the ring yet */
|
||||
p = NULL;
|
||||
} else {
|
||||
p = (char*)ring->addr[ring->tail];
|
||||
ring->addr[ring->tail] = NULL;
|
||||
if (ring->tail == ring->size-1) {
|
||||
ring->tail = 0;
|
||||
} else {
|
||||
ring->tail++;
|
||||
}
|
||||
/* see if the ring is empty */
|
||||
if (ring->tail == ring->head) {
|
||||
ring->tail = -1;
|
||||
}
|
||||
}
|
||||
return (void*)p;
|
||||
}
|
||||
|
||||
void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i)
|
||||
{
|
||||
char *p=NULL;
|
||||
int offset;
|
||||
|
||||
if (ring->size <= i || -1 == ring->tail) {
|
||||
p = NULL;
|
||||
} else if (i < 0) {
|
||||
/* return the value at the head of the ring */
|
||||
if (ring->head == 0) {
|
||||
p = ring->addr[ring->size - 1];
|
||||
} else {
|
||||
p = ring->addr[ring->head - 1];
|
||||
}
|
||||
} else {
|
||||
/* calculate the offset of the tail in the ring */
|
||||
offset = ring->tail + i;
|
||||
/* correct for wrap-around */
|
||||
if (ring->size <= offset) {
|
||||
offset -= ring->size;
|
||||
}
|
||||
p = ring->addr[offset];
|
||||
}
|
||||
return (void*)p;
|
||||
}
|
102
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.h
Обычный файл
102
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.h
Обычный файл
@ -0,0 +1,102 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef PMIX_RING_BUFFER_H
|
||||
#define PMIX_RING_BUFFER_H
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#include "src/class/pmix_object.h"
|
||||
#include "src/util/output.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* dynamic pointer ring
|
||||
*/
|
||||
struct pmix_ring_buffer_t {
|
||||
/** base class */
|
||||
pmix_object_t super;
|
||||
/* head/tail indices */
|
||||
int head;
|
||||
int tail;
|
||||
/** size of list, i.e. number of elements in addr */
|
||||
int size;
|
||||
/** pointer to ring */
|
||||
char **addr;
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct pmix_ring_buffer_t pmix_ring_buffer_t;
|
||||
/**
|
||||
* Class declaration
|
||||
*/
|
||||
PMIX_DECLSPEC PMIX_CLASS_DECLARATION(pmix_ring_buffer_t);
|
||||
|
||||
/**
|
||||
* Initialize the ring buffer, defining its size.
|
||||
*
|
||||
* @param ring Pointer to a ring buffer (IN/OUT)
|
||||
* @param size The number of elements in the ring (IN)
|
||||
*
|
||||
* @return PMIX_SUCCESS if all initializations were succesful. Otherwise,
|
||||
* the error indicate what went wrong in the function.
|
||||
*/
|
||||
PMIX_DECLSPEC int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size);
|
||||
|
||||
/**
|
||||
* Push an item onto the ring buffer, displacing the oldest
|
||||
* item on the ring if the ring is full
|
||||
*
|
||||
* @param ring Pointer to ring (IN)
|
||||
* @param ptr Pointer value (IN)
|
||||
*
|
||||
* @return Pointer to displaced item, NULL if ring
|
||||
* is not yet full
|
||||
*/
|
||||
PMIX_DECLSPEC void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr);
|
||||
|
||||
|
||||
/**
|
||||
* Pop an item off of the ring. The oldest entry on the ring will be
|
||||
* returned. If nothing on the ring, NULL is returned.
|
||||
*
|
||||
* @param ring Pointer to ring (IN)
|
||||
*
|
||||
* @return Error code. NULL indicates an error.
|
||||
*/
|
||||
|
||||
PMIX_DECLSPEC void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring);
|
||||
|
||||
/*
|
||||
* Access an element of the ring, without removing it, indexed
|
||||
* starting at the tail - a value of -1 will return the element
|
||||
* at the head of the ring
|
||||
*/
|
||||
PMIX_DECLSPEC void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* PMIX_RING_BUFFER_H */
|
@ -44,6 +44,7 @@
|
||||
#define PMI_MAX_KVSNAME_LEN PMIX_MAX_NSLEN /* Maximum size of KVS name */
|
||||
#define PMI_MAX_VAL_LEN 4096 /* Maximum size of a PMI value */
|
||||
|
||||
|
||||
#define PMI_CHECK() \
|
||||
do { \
|
||||
if (!pmi_init) { \
|
||||
@ -55,25 +56,37 @@
|
||||
static pmix_status_t convert_int(int *value, pmix_value_t *kv);
|
||||
static int convert_err(pmix_status_t rc);
|
||||
static pmix_proc_t myproc;
|
||||
static bool data_commited = false;
|
||||
static int pmi_init = 0;
|
||||
|
||||
int PMI_Init(int *spawned)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_status_t rc;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
if (PMIX_SUCCESS != PMIx_Init(&myproc)) {
|
||||
return PMI_ERR_INIT;
|
||||
}
|
||||
|
||||
/* getting internal key requires special rank value */
|
||||
memcpy(&proc, &myproc, sizeof(myproc));
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (NULL != spawned) {
|
||||
/* get the spawned flag */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_SPAWNED, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) {
|
||||
rc = convert_int(spawned, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
return convert_err(rc);
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
/* if not found, default to not spawned */
|
||||
@ -82,7 +95,12 @@ int PMI_Init(int *spawned)
|
||||
}
|
||||
pmi_init = 1;
|
||||
|
||||
return PMI_SUCCESS;
|
||||
rc = PMIX_SUCCESS;
|
||||
|
||||
error:
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Initialized(PMI_BOOL *initialized)
|
||||
@ -160,8 +178,6 @@ int PMI_KVS_Commit(const char kvsname[])
|
||||
kvsname);
|
||||
|
||||
rc = PMIx_Commit();
|
||||
/* PMIx permits only one data commit! */
|
||||
data_commited = true;
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -169,17 +185,14 @@ int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int lengt
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
uint32_t i;
|
||||
static pmix_proc_t proc;
|
||||
uint32_t procnum;
|
||||
proc = myproc;
|
||||
pmix_proc_t proc;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN)) {
|
||||
return PMI_ERR_INVALID_KVS;
|
||||
}
|
||||
if ((key == NULL) || (strlen(key) >PMI_MAX_KEY_LEN)) {
|
||||
if ((key == NULL) || (strlen(key) > PMI_MAX_KEY_LEN)) {
|
||||
return PMI_ERR_INVALID_KEY;
|
||||
}
|
||||
if (value == NULL) {
|
||||
@ -189,60 +202,22 @@ int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int lengt
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"PMI_KVS_Get: KVS=%s, key=%s value=%s", kvsname, key, value);
|
||||
|
||||
/* PMI-1 expects resource manager to set
|
||||
* process mapping in ANL notation. */
|
||||
if (!strcmp(key, ANL_MAPPING)) {
|
||||
/* we are looking in the job-data. If there is nothing there
|
||||
* we don't want to look in rank's data, thus set rank to widcard */
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) &&
|
||||
(NULL != val) && (PMIX_STRING == val->type)) {
|
||||
strncpy(value, val->data.string, length);
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
return PMI_SUCCESS;
|
||||
} else {
|
||||
/* artpol:
|
||||
* Some RM's (i.e. SLURM) already have ANL precomputed. The export it
|
||||
* through PMIX_ANL_MAP variable.
|
||||
* If we haven't found it we want to have our own packing functionality
|
||||
* since it's common.
|
||||
* Somebody else has to write it since I've already done that for
|
||||
* GPL'ed SLURM :) */
|
||||
return PMI_FAIL;
|
||||
/* retrieve the data from PMIx - since we don't have a rank,
|
||||
* we indicate that by passing the UNDEF value */
|
||||
(void)strncpy(proc.nspace, kvsname, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
rc = PMIx_Get(&proc, key, NULL, 0, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, length);
|
||||
}
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
|
||||
/* We don't know what process keeps this data. So it looks like we need to
|
||||
* check each process.
|
||||
* TODO: Is there any beter way?
|
||||
* WARNING: this may lead to the VERY long HANG's if we ask for the unknown key
|
||||
* before we've done Commit on all nodes. We need a workaround for that.
|
||||
*
|
||||
* SOLUTION: perhaps rovide "OK if nothing" info flag to tell PMIx that
|
||||
* the key supposed to already be there and if nothing there - gave up with
|
||||
* an error and don't try to use direct modex.
|
||||
*/
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmi1: executing put for KVS %s, key %s value %s", kvsname, key,
|
||||
value);
|
||||
return convert_err(rc);
|
||||
}
|
||||
procnum = val->data.uint32;
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
|
||||
for (i = 0; i < procnum; i++) {
|
||||
proc.rank = i;
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, key, NULL, 0, &val) && (NULL != val)
|
||||
&& (PMIX_STRING == val->type)) {
|
||||
strncpy(value, val->data.string, length);
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
return PMI_SUCCESS;
|
||||
}
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
}
|
||||
return PMI_FAIL;
|
||||
}
|
||||
|
||||
/* Barrier only applies to our own nspace, and we want all
|
||||
@ -253,28 +228,28 @@ int PMI_Barrier(void)
|
||||
pmix_info_t buf;
|
||||
int ninfo = 0;
|
||||
pmix_info_t *info = NULL;
|
||||
bool val = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if (data_commited) {
|
||||
bool val = 1;
|
||||
info = &buf;
|
||||
PMIX_INFO_CONSTRUCT(info);
|
||||
PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL);
|
||||
ninfo = 1;
|
||||
}
|
||||
rc = PMIx_Fence(NULL, 0, info, ninfo);
|
||||
|
||||
if (NULL != info) {
|
||||
PMIX_INFO_DESTRUCT(info);
|
||||
}
|
||||
return rc;
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_size(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
@ -282,13 +257,23 @@ int PMI_Get_size(int *size)
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val)) {
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_JOB_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
return PMI_FAIL;
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_rank(int *rk)
|
||||
@ -307,6 +292,9 @@ int PMI_Get_universe_size(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
@ -314,29 +302,56 @@ int PMI_Get_universe_size(int *size)
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val)) {
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
return PMI_FAIL;
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_appnum(int *appnum)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if (NULL != appnum &&
|
||||
PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_APPNUM, NULL, 0, &val)) {
|
||||
rc = convert_int(appnum, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
if (NULL == appnum) {
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
return PMI_FAIL;
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) {
|
||||
rc = convert_int(appnum, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Publish_name(const char service_name[], const char port[])
|
||||
@ -461,24 +476,34 @@ int PMI_Get_clique_size(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if (NULL == size) {
|
||||
return PMI_ERR_INVALID_ARGS;
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) {
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
return PMI_FAIL;
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_clique_ranks(int ranks[], int length)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
char **rks;
|
||||
int i;
|
||||
@ -498,9 +523,9 @@ int PMI_Get_clique_ranks(int ranks[], int length)
|
||||
}
|
||||
pmix_argv_free(rks);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI_SUCCESS;
|
||||
}
|
||||
return PMI_FAIL;
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_KVS_Get_my_name(char kvsname[], int length)
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "src/util/error.h"
|
||||
#include "src/util/output.h"
|
||||
|
||||
|
||||
#define PMI2_CHECK() \
|
||||
do { \
|
||||
if (!pmi2_init) { \
|
||||
@ -55,6 +56,8 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
if (PMIX_SUCCESS != PMIx_Init(&myproc)) {
|
||||
return PMI2_ERR_INIT;
|
||||
@ -65,14 +68,20 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
|
||||
/* getting internal key requires special rank value */
|
||||
memcpy(&proc, &myproc, sizeof(myproc));
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (NULL != size) {
|
||||
/* get the universe size - this will likely pull
|
||||
* down all attributes assigned to the job, thus
|
||||
* making all subsequent "get" operations purely
|
||||
* local */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
@ -80,13 +89,14 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
}
|
||||
} else {
|
||||
/* cannot continue without this info */
|
||||
return PMI2_ERR_INIT;
|
||||
rc = PMIX_ERR_INIT;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != spawned) {
|
||||
/* get the spawned flag */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) {
|
||||
rc = convert_int(spawned, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
@ -100,7 +110,7 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
|
||||
if (NULL != appnum) {
|
||||
/* get our appnum */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) {
|
||||
rc = convert_int(appnum, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
@ -113,9 +123,11 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
}
|
||||
pmi2_init = 1;
|
||||
|
||||
return PMI2_SUCCESS;
|
||||
rc = PMIX_SUCCESS;
|
||||
|
||||
error:
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -175,20 +187,25 @@ int PMI2_KVS_Fence(void)
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
pmix_output_verbose(3, pmix_globals.debug_output, "PMI2_KVS_Fence");
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Commit())) {
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
/* we want all data to be collected upon completion */
|
||||
{
|
||||
pmix_info_t info;
|
||||
int ninfo = 1;
|
||||
bool val = 1;
|
||||
pmix_info_t info[1];
|
||||
bool val_data = 1;
|
||||
|
||||
PMIX_INFO_CONSTRUCT(&info);
|
||||
PMIX_INFO_LOAD(&info, PMIX_COLLECT_DATA, &val, PMIX_BOOL);
|
||||
rc = PMIx_Fence(NULL, 0, &info, ninfo);
|
||||
PMIX_INFO_DESTRUCT(&info);
|
||||
/* set controlling parameters
|
||||
* PMIX_COLLECT_DATA - meet legacy PMI2 requirement
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &val_data, PMIX_BOOL);
|
||||
|
||||
rc = PMIx_Fence(NULL, 0, &info[0], 1);
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
}
|
||||
|
||||
return convert_err(rc);
|
||||
@ -206,10 +223,12 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id,
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
uint32_t procnum = 0;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
/* set default */
|
||||
*vallen = 0;
|
||||
|
||||
if ((NULL == key) || (NULL == value)) {
|
||||
return PMI2_ERR_INVALID_ARG;
|
||||
}
|
||||
@ -219,37 +238,22 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id,
|
||||
|
||||
(void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), PMIX_MAX_NSLEN);
|
||||
if (src_pmi_id == PMI2_ID_NULL) {
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) {
|
||||
return convert_err(rc);
|
||||
}
|
||||
procnum = val->data.uint32;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
proc.rank = 0;
|
||||
/* the rank is UNDEF */
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
} else {
|
||||
proc.rank = src_pmi_id;
|
||||
}
|
||||
|
||||
do {
|
||||
rc = PMIx_Get(&proc, key, NULL, 0, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI2_FAIL;
|
||||
}
|
||||
if (NULL != val->data.string) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, maxvalue);
|
||||
*vallen = strlen(val->data.string);
|
||||
}
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
break;
|
||||
} else if (PMIX_ERR_NOT_FOUND == rc) {
|
||||
proc.rank++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while (proc.rank < (int)procnum);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
@ -258,6 +262,8 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
@ -265,15 +271,18 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo
|
||||
return PMI2_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
*found = 0;
|
||||
rc = PMIx_Get(&myproc, name, NULL, 0, &val);
|
||||
rc = PMIx_Get(&myproc, name, info, 1, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI2_FAIL;
|
||||
}
|
||||
if (NULL != val->data.string) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, valuelen);
|
||||
*found = 1;
|
||||
}
|
||||
@ -281,6 +290,9 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo
|
||||
} else if (PMIX_ERR_NOT_FOUND == rc) {
|
||||
rc = PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -307,6 +319,8 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
@ -316,17 +330,20 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou
|
||||
|
||||
/* getting internal key requires special rank value */
|
||||
memcpy(&proc, &myproc, sizeof(myproc));
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
*found = 0;
|
||||
rc = PMIx_Get(&proc, name, NULL, 0, &val);
|
||||
rc = PMIx_Get(&proc, name, info, 1, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI2_FAIL;
|
||||
}
|
||||
if (NULL != val->data.string) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, valuelen);
|
||||
*found = 1;
|
||||
}
|
||||
@ -334,6 +351,9 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou
|
||||
} else if (PMIX_ERR_NOT_FOUND == rc) {
|
||||
rc = PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -482,8 +502,10 @@ int PMI2_Job_GetRank(int *rank)
|
||||
|
||||
int PMI2_Info_GetSize(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_status_t rc = PMIX_ERROR;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
@ -491,13 +513,20 @@ int PMI2_Info_GetSize(int *size)
|
||||
return PMI2_ERR_INVALID_ARGS;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) {
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
return PMI2_FAIL;
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -273,13 +273,13 @@ int PMIx_Init(pmix_proc_t *proc)
|
||||
/* get our effective id's */
|
||||
pmix_globals.uid = geteuid();
|
||||
pmix_globals.gid = getegid();
|
||||
/* default to our internal errhandler */
|
||||
pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref);
|
||||
/* initialize the output system */
|
||||
if (!pmix_output_init()) {
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
/* default to our internal errhandler */
|
||||
pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref);
|
||||
/* see if debug is requested */
|
||||
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
|
||||
debug_level = strtol(evar, NULL, 10);
|
||||
@ -1270,6 +1270,7 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
int ret, cnt;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: regevents callback recvd");
|
||||
|
||||
@ -1286,8 +1287,7 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
rc = pmix_remove_errhandler(cb->errhandler_ref);
|
||||
/* call the callback with error */
|
||||
cb->errreg_cbfunc(PMIX_ERR_SERVER_FAILED_REQUEST, -1, cb->cbdata);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* complete err handler registration with success status*/
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"client:reg events cbfunc received status %d for errhandler %d",
|
||||
@ -1303,28 +1303,36 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
void *cbdata)
|
||||
{
|
||||
/* add err handler, process info keys and register for events and call the callback */
|
||||
int rc, index = 0;
|
||||
int index = 0;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler");
|
||||
"pmix: register errhandler with %d infos", (int)ninfo);
|
||||
|
||||
/* check if this handler is already registered if so return error */
|
||||
if (PMIX_SUCCESS == pmix_lookup_errhandler (errhandler, &index)) {
|
||||
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(info, ninfo, &index))) {
|
||||
/* complete request with error status and return its original reference */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - already registered");
|
||||
cbfunc(PMIX_EXISTS, index, cbdata);
|
||||
|
||||
} else if (PMIX_ERR_GRP_FOUND == rc) {
|
||||
/* just acknowledge it */
|
||||
cbfunc(PMIX_SUCCESS, index, cbdata);
|
||||
} else if (PMIX_ERR_DFLT_FOUND == rc && NULL == info) {
|
||||
/* if they are registering a default errhandler, then
|
||||
* overwrite the existing one with it - the index will
|
||||
* contain its location */
|
||||
pmix_add_errhandler(errhandler, info, ninfo, &index);
|
||||
} else {
|
||||
if(PMIX_SUCCESS != (rc = pmix_add_errhandler (errhandler, info, ninfo, &index))) {
|
||||
/* need to add this errhandler */
|
||||
if (PMIX_SUCCESS != (rc = pmix_add_errhandler(errhandler, info, ninfo, &index))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - error status rc=%d", rc);
|
||||
/* complete request with error*/
|
||||
cbfunc(rc, index, cbdata);
|
||||
}
|
||||
else {
|
||||
/* To do: need to determine if the client needs to process the info keys before passing it to
|
||||
server */
|
||||
} else {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - added index=%d, ninfo =%lu", index, ninfo);
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
@ -1334,11 +1342,10 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
PMIX_RELEASE(msg);
|
||||
pmix_remove_errhandler(index);
|
||||
cbfunc(PMIX_ERR_PACK_FAILURE, -1, cbdata);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
* the server acks/nacks the register events request */
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - pack events success status=%d", rc);
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
@ -1358,6 +1365,7 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
int ret, cnt =1;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: deregevents_cbfunc recvd");
|
||||
|
||||
@ -1371,7 +1379,7 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
PMIX_ERROR_LOG(rc);
|
||||
|
||||
}
|
||||
/* remove the err handler and call the error handler reg completion callback fn.*/
|
||||
/* remove the err handler and call the error handler dereg completion callback fn.*/
|
||||
pmix_remove_errhandler(cb->errhandler_ref);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"client:dereg events cbfunc received status %d for errhandler %d",
|
||||
@ -1388,17 +1396,18 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
pmix_error_reg_info_t *errreg;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cb_t *cb;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_client_deregister_errhandler errhandler_ref = %d", errhandler_ref);
|
||||
errreg = (pmix_error_reg_info_t *) pmix_pointer_array_get_item (&pmix_globals.errregs, errhandler_ref);
|
||||
|
||||
errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, errhandler_ref);
|
||||
if (NULL != errreg ) {
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_DEREGEVENTS_CMD, errreg->info, errreg->ninfo))) {
|
||||
PMIX_RELEASE(msg);
|
||||
pmix_remove_errhandler(errhandler_ref);
|
||||
cbfunc(PMIX_ERR_PACK_FAILURE, cbdata);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
@ -1409,9 +1418,9 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb);
|
||||
}
|
||||
}
|
||||
else
|
||||
} else {
|
||||
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
@ -1420,6 +1429,7 @@ static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
int ret, cnt;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: notifyerror_cbfunc recvd");
|
||||
|
||||
@ -1427,14 +1437,15 @@ static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
return;
|
||||
}
|
||||
|
||||
/* unpack the status code */
|
||||
if ((PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_INT))) ||
|
||||
(PMIX_SUCCESS != ret)) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
|
||||
}
|
||||
/* call the notify error completion callback fn.*/
|
||||
|
||||
/* call the notify error completion callback fn.*/
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"client: notified error cbfunc received status %d ",
|
||||
ret);
|
||||
@ -1449,9 +1460,13 @@ pmix_status_t pmix_client_notify_error(pmix_status_t status,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
pmix_buffer_t *msg = PMIX_NEW(pmix_buffer_t);
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cmd_t cmd = PMIX_NOTIFY_CMD;
|
||||
pmix_cb_t *cb;
|
||||
|
||||
/* get the message buffer */
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
|
||||
/* pack the command */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -1484,17 +1499,22 @@ pmix_status_t pmix_client_notify_error(pmix_status_t status,
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->op_cbfunc = cbfunc;
|
||||
cb->cbdata = cbdata;
|
||||
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, notifyerror_cbfunc, cb);
|
||||
return PMIX_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
PMIX_RELEASE(msg);
|
||||
cbfunc(rc, cbdata);
|
||||
/* never call a callback function when returning an error as
|
||||
* the error tells the caller that they will never recv a
|
||||
* callback */
|
||||
return rc;
|
||||
}
|
||||
|
@ -56,16 +56,16 @@
|
||||
|
||||
#include "pmix_client_ops.h"
|
||||
|
||||
static pmix_buffer_t* pack_get(char *nspace, int rank,
|
||||
static pmix_buffer_t* _pack_get(char *nspace, int rank,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_cmd_t cmd);
|
||||
|
||||
static void _getnbfn(int sd, short args, void *cbdata);
|
||||
|
||||
static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata);
|
||||
|
||||
static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata);
|
||||
static void _value_cbfunc(int status, pmix_value_t *kv, void *cbdata);
|
||||
|
||||
int PMIx_Get(const pmix_proc_t *proc, const char key[],
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
@ -74,16 +74,6 @@ int PMIx_Get(const pmix_proc_t *proc, const char key[],
|
||||
pmix_cb_t *cb;
|
||||
int rc;
|
||||
|
||||
if (NULL == proc) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: %s:%d getting value for proc %s:%d key %s",
|
||||
pmix_globals.myid.nspace, pmix_globals.myid.rank,
|
||||
proc->nspace, proc->rank,
|
||||
(NULL == key) ? "NULL" : key);
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
@ -93,7 +83,7 @@ int PMIx_Get(const pmix_proc_t *proc, const char key[],
|
||||
* the return message is recvd */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, value_cbfunc, cb))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, _value_cbfunc, cb))) {
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
@ -115,30 +105,60 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
|
||||
pmix_value_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb;
|
||||
|
||||
if (NULL == proc) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: get_nb value for proc %s:%d key %s",
|
||||
proc->nspace, proc->rank,
|
||||
(NULL == key) ? "NULL" : key);
|
||||
int rank;
|
||||
char *nm;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* protect against bozo input */
|
||||
if (NULL == key) {
|
||||
/* if the proc is NULL, then the caller is assuming
|
||||
* that the key is universally unique within the caller's
|
||||
* own nspace. This most likely indicates that the code
|
||||
* was originally written for a legacy version of PMI.
|
||||
*
|
||||
* If the key is NULL, then the caller wants all
|
||||
* data from the specified proc. Again, this likely
|
||||
* indicates use of a legacy version of PMI.
|
||||
*
|
||||
* Either case is supported. However, we don't currently
|
||||
* support the case where -both- values are NULL */
|
||||
if (NULL == proc && NULL == key) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if the key is NULL, the rank cannot be WILDCARD as
|
||||
* we cannot return all info from every rank */
|
||||
if (NULL != proc && PMIX_RANK_WILDCARD == proc->rank && NULL == key) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if the given proc param is NULL, or the nspace is
|
||||
* empty, then the caller is referencing our own nspace */
|
||||
if (NULL == proc || 0 == strlen(proc->nspace)) {
|
||||
nm = pmix_globals.myid.nspace;
|
||||
} else {
|
||||
nm = (char*)proc->nspace;
|
||||
}
|
||||
|
||||
/* if the proc param is NULL, then we are seeking a key that
|
||||
* must be globally unique, so communicate this to the hash
|
||||
* functions with the UNDEF rank */
|
||||
if (NULL == proc) {
|
||||
rank = PMIX_RANK_UNDEF;
|
||||
} else {
|
||||
rank = proc->rank;
|
||||
}
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: get_nb value for proc %s:%d key %s",
|
||||
nm, rank, (NULL == key) ? "NULL" : key);
|
||||
|
||||
/* thread-shift so we can check global objects */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
(void)strncpy(cb->nspace, proc->nspace, PMIX_MAX_NSLEN);
|
||||
cb->rank = proc->rank;
|
||||
(void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN);
|
||||
cb->rank = rank;
|
||||
cb->key = (char*)key;
|
||||
cb->info = (pmix_info_t*)info;
|
||||
cb->ninfo = ninfo;
|
||||
@ -149,7 +169,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata)
|
||||
static void _value_cbfunc(int status, pmix_value_t *kv, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
@ -163,7 +183,7 @@ static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata)
|
||||
cb->active = false;
|
||||
}
|
||||
|
||||
static pmix_buffer_t* pack_get(char *nspace, int rank,
|
||||
static pmix_buffer_t* _pack_get(char *nspace, int rank,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_cmd_t cmd)
|
||||
{
|
||||
@ -209,7 +229,7 @@ static pmix_buffer_t* pack_get(char *nspace, int rank,
|
||||
/* this callback is coming from the usock recv, and thus
|
||||
* is occurring inside of our progress thread - hence, no
|
||||
* need to thread shift */
|
||||
static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
@ -218,18 +238,19 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
pmix_value_t *val = NULL;
|
||||
int32_t cnt;
|
||||
pmix_buffer_t *bptr;
|
||||
pmix_kval_t *kp;
|
||||
pmix_nspace_t *ns, *nptr;
|
||||
int rank;
|
||||
int cur_rank;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: get_nb callback recvd");
|
||||
|
||||
if (NULL == cb) {
|
||||
/* nothing we can do */
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
return;
|
||||
}
|
||||
// cache the rank
|
||||
/* cache the rank */
|
||||
rank = cb->rank;
|
||||
|
||||
/* unpack the status */
|
||||
@ -262,31 +283,36 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
* unpack and store it in the modex - this could consist
|
||||
* of buffers from multiple scopes */
|
||||
cnt = 1;
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) {
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &cur_rank, &cnt, PMIX_INT))) {
|
||||
pmix_kval_t *cur_kval;
|
||||
|
||||
cnt = 1;
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, kp, &cnt, PMIX_KVAL))) {
|
||||
if (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) {
|
||||
cnt = 1;
|
||||
cur_kval = PMIX_NEW(pmix_kval_t);
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, cur_kval, &cnt, PMIX_KVAL))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: unpacked key %s", kp->key);
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cb->rank, kp))) {
|
||||
"pmix: unpacked key %s", cur_kval->key);
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cur_rank, cur_kval))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
if (NULL != cb->key && 0 == strcmp(cb->key, kp->key)) {
|
||||
if (NULL != cb->key && 0 == strcmp(cb->key, cur_kval->key)) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: found requested value");
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, kp->value, PMIX_VALUE))) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, cur_kval->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(kp);
|
||||
PMIX_RELEASE(cur_kval);
|
||||
val = NULL;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain
|
||||
PMIX_RELEASE(cur_kval); // maintain acctg - hash_store does a retain
|
||||
cnt = 1;
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
cur_kval = PMIX_NEW(pmix_kval_t);
|
||||
}
|
||||
cnt = 1;
|
||||
PMIX_RELEASE(kp);
|
||||
PMIX_RELEASE(cur_kval);
|
||||
}
|
||||
PMIX_RELEASE(bptr); // free's the data region
|
||||
if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -338,28 +364,21 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
pmix_cb_t *cbret;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t *info, *iptr;
|
||||
pmix_pointer_array_t results;
|
||||
pmix_status_t rc;
|
||||
char *nm;
|
||||
pmix_nspace_t *ns, *nptr;
|
||||
size_t n;
|
||||
size_t n, nvals;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: getnbfn value for proc %s:%d key %s",
|
||||
cb->nspace, cb->rank,
|
||||
(NULL == cb->key) ? "NULL" : cb->key);
|
||||
|
||||
/* if the nspace is empty, then the caller is referencing
|
||||
* our own nspace */
|
||||
if (0 == strlen(cb->nspace)) {
|
||||
nm = pmix_globals.myid.nspace;
|
||||
} else {
|
||||
nm = (char*)cb->nspace;
|
||||
}
|
||||
|
||||
/* find the nspace object */
|
||||
nptr = NULL;
|
||||
PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) {
|
||||
if (0 == strcmp(nm, ns->nspace)) {
|
||||
if (0 == strcmp(cb->nspace, ns->nspace)) {
|
||||
nptr = ns;
|
||||
break;
|
||||
}
|
||||
@ -370,13 +389,105 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
* server has never heard of it, the server will return
|
||||
* an error */
|
||||
nptr = PMIX_NEW(pmix_nspace_t);
|
||||
(void)strncpy(nptr->nspace, nm, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(nptr->nspace, cb->nspace, PMIX_MAX_NSLEN);
|
||||
pmix_list_append(&pmix_globals.nspaces, &nptr->super);
|
||||
/* there is no point in looking for data in this nspace
|
||||
* object, so let's just go generate the request */
|
||||
goto request;
|
||||
}
|
||||
|
||||
/* if the key is NULL, then we have to check both the job-data
|
||||
* and the modex tables. If we don't yet have the modex data,
|
||||
* then we are going to have to go get it. So let's check that
|
||||
* case first */
|
||||
if (NULL == cb->key) {
|
||||
PMIX_CONSTRUCT(&results, pmix_pointer_array_t);
|
||||
pmix_pointer_array_init(&results, 2, INT_MAX, 1);
|
||||
nvals = 0;
|
||||
/* if the rank is WILDCARD, then they want all the job-level info,
|
||||
* so no need to check the modex */
|
||||
if (PMIX_RANK_WILDCARD != cb->rank) {
|
||||
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: value retrieved from dstore");
|
||||
/* since we didn't provide them with a key, the hash function
|
||||
* must return the results in the pmix_info_array field of the
|
||||
* value */
|
||||
if (NULL == val || PMIX_INFO_ARRAY != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
cb->value_cbfunc(PMIX_ERR_BAD_PARAM, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
/* save the results */
|
||||
info = (pmix_info_t*)val->data.array.array;
|
||||
for (n=0; n < val->data.array.size; n++) {
|
||||
pmix_pointer_array_add(&results, &info[n]);
|
||||
++nvals;
|
||||
}
|
||||
val->data.array.array = NULL; // protect the data
|
||||
val->data.array.size = 0;
|
||||
/* cleanup */
|
||||
if (NULL != val) {
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
} else {
|
||||
/* if we didn't find a modex for this rank, then we need
|
||||
* to go get it. Recall that the NULL==key scenario only
|
||||
* pertains to cases where legacy PMI methods are being
|
||||
* employed. Thus, the caller wants -all- information for
|
||||
* the specified rank, not just the job-level info. */
|
||||
goto request;
|
||||
}
|
||||
}
|
||||
/* now get any data from the job-level info */
|
||||
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, NULL, &val))) {
|
||||
/* since we didn't provide them with a key, the hash function
|
||||
* must return the results in the pmix_info_array field of the
|
||||
* value */
|
||||
if (NULL == val || PMIX_INFO_ARRAY != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
cb->value_cbfunc(PMIX_ERR_BAD_PARAM, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
/* save the results */
|
||||
info = (pmix_info_t*)val->data.array.array;
|
||||
for (n=0; n < val->data.array.size; n++) {
|
||||
pmix_pointer_array_add(&results, &info[n]);
|
||||
++nvals;
|
||||
}
|
||||
val->data.array.array = NULL; // protect the data
|
||||
val->data.array.size = 0;
|
||||
/* cleanup */
|
||||
if (NULL != val) {
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
}
|
||||
/* now let's package up the results */
|
||||
PMIX_VALUE_CREATE(val, 1);
|
||||
val->type = PMIX_INFO_ARRAY;
|
||||
val->data.array.size = nvals;
|
||||
PMIX_INFO_CREATE(iptr, nvals);
|
||||
val->data.array.array = (struct pmix_info_t*)iptr;
|
||||
for (n=0; n < (size_t)results.size && n < nvals; n++) {
|
||||
if (NULL != (info = (pmix_info_t*)pmix_pointer_array_get_item(&results, n))) {
|
||||
(void)strncpy(iptr[n].key, info->key, PMIX_MAX_KEYLEN);
|
||||
pmix_value_xfer(&iptr[n].value, &info->value);
|
||||
PMIX_INFO_FREE(info, 1);
|
||||
}
|
||||
}
|
||||
/* done with results array */
|
||||
PMIX_DESTRUCT(&results);
|
||||
/* return the result to the caller */
|
||||
cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata);
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
|
||||
/* the requested data could be in the job-data table, so let's
|
||||
* just check there first. */
|
||||
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, cb->key, &val))) {
|
||||
@ -433,7 +544,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
* the error */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"Error requesting key=%s for rank = %d, namespace = %s",
|
||||
cb->key, cb->rank, nm);
|
||||
cb->key, cb->rank, cb->nspace);
|
||||
cb->value_cbfunc(rc, NULL, cb->cbdata);
|
||||
/* protect the data */
|
||||
cb->procs = NULL;
|
||||
@ -461,7 +572,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
/* they don't want us to try and retrieve it */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",
|
||||
cb->key, cb->rank, nm);
|
||||
cb->key, cb->rank, cb->nspace);
|
||||
cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
@ -472,7 +583,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
* this nspace:rank. If we do, then no need to ask again as the
|
||||
* request will return _all_ data from that proc */
|
||||
PMIX_LIST_FOREACH(cbret, &pmix_client_globals.pending_requests, pmix_cb_t) {
|
||||
if (0 == strncmp(cbret->nspace, nm, PMIX_MAX_NSLEN) &&
|
||||
if (0 == strncmp(cbret->nspace, cb->nspace, PMIX_MAX_NSLEN) &&
|
||||
cbret->rank == cb->rank) {
|
||||
/* we do have a pending request, but we still need to track this
|
||||
* outstanding request so we can satisfy it once the data is returned */
|
||||
@ -483,7 +594,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
|
||||
/* we don't have a pending request, so let's create one - don't worry
|
||||
* about packing the key as we return everything from that proc */
|
||||
msg = pack_get(nm, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD);
|
||||
msg = _pack_get(cb->nspace, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD);
|
||||
if (NULL == msg) {
|
||||
cb->value_cbfunc(PMIX_ERROR, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
@ -496,5 +607,5 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
pmix_list_append(&pmix_client_globals.pending_requests, &cb->super);
|
||||
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, getnb_cbfunc, cb);
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, _getnb_cbfunc, cb);
|
||||
}
|
||||
|
@ -150,11 +150,13 @@ pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t ninfo,
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
if (0 < napps) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, apps, napps, PMIX_APP))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
|
@ -43,7 +43,7 @@ void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
* call pmix_server_register_for_events, and call cbfunc with
|
||||
* reference to the errhandler */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"registering client err handler");
|
||||
"registering client err handler with %d info", (int)ninfo);
|
||||
pmix_client_register_errhandler(info, ninfo,
|
||||
errhandler,
|
||||
cbfunc, cbdata);
|
||||
@ -80,17 +80,17 @@ pmix_status_t PMIx_Notify_error(pmix_status_t status,
|
||||
int rc;
|
||||
|
||||
if (pmix_globals.server) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server_notify_error error =%d, rc=%d", status, rc);
|
||||
rc = pmix_server_notify_error(status, procs, nprocs, error_procs,
|
||||
error_nprocs, info, ninfo,
|
||||
cbfunc, cbdata);
|
||||
pmix_output_verbose(0, pmix_globals.debug_output,
|
||||
"pmix_server_notify_error error =%d, rc=%d", status, rc);
|
||||
} else {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_client_notify_error error =%d, rc=%d", status, rc);
|
||||
rc = pmix_client_notify_error(status, procs, nprocs, error_procs,
|
||||
error_nprocs, info, ninfo,
|
||||
cbfunc, cbdata);
|
||||
pmix_output_verbose(0, pmix_globals.debug_output,
|
||||
"pmix_client_notify_error error =%d, rc=%d", status, rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -56,7 +56,8 @@ void pmix_globals_init(void)
|
||||
{
|
||||
memset(&pmix_globals.myid, 0, sizeof(pmix_proc_t));
|
||||
PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t);
|
||||
pmix_pointer_array_init(&pmix_globals.errregs, 1, PMIX_MAX_ERROR_REGISTRATIONS, 1);
|
||||
PMIX_CONSTRUCT(&pmix_globals.errregs, pmix_pointer_array_t);
|
||||
pmix_pointer_array_init(&pmix_globals.errregs, 16, PMIX_MAX_ERROR_REGISTRATIONS, 16);
|
||||
}
|
||||
|
||||
void pmix_globals_finalize(void)
|
||||
@ -68,6 +69,7 @@ void pmix_globals_finalize(void)
|
||||
if (NULL != pmix_globals.cache_remote) {
|
||||
PMIX_RELEASE(pmix_globals.cache_remote);
|
||||
}
|
||||
PMIX_DESTRUCT(&pmix_globals.errregs);
|
||||
}
|
||||
|
||||
|
||||
@ -158,6 +160,7 @@ PMIX_CLASS_INSTANCE(pmix_rank_info_t,
|
||||
|
||||
static void errcon(pmix_error_reg_info_t *p)
|
||||
{
|
||||
p->sglhdlr = false;
|
||||
p->errhandler = NULL;
|
||||
p->info = NULL;
|
||||
p->ninfo = 0;
|
||||
@ -165,7 +168,9 @@ static void errcon(pmix_error_reg_info_t *p)
|
||||
static void errdes(pmix_error_reg_info_t *p)
|
||||
{
|
||||
p->errhandler = NULL;
|
||||
// PMIX_INFO_FREE(p->info, p->ninfo);
|
||||
if (NULL != p->info) {
|
||||
PMIX_INFO_FREE(p->info, p->ninfo);
|
||||
}
|
||||
}
|
||||
PMIX_CLASS_INSTANCE(pmix_error_reg_info_t,
|
||||
pmix_object_t,
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -44,6 +44,7 @@ BEGIN_C_DECLS
|
||||
/* define a structure for tracking error registrations */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
bool sglhdlr; // registers a specific error status handler
|
||||
pmix_notification_fn_t errhandler; /* registered err handler callback fn */
|
||||
pmix_info_t *info; /* error info keys registered with the handler */
|
||||
size_t ninfo; /* size of info */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -241,6 +241,8 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.client_eventregs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t);
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.notifications, pmix_ring_buffer_t);
|
||||
pmix_ring_buffer_init(&pmix_server_globals.notifications, 256);
|
||||
|
||||
/* see if debug is requested */
|
||||
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
|
||||
@ -1111,6 +1113,7 @@ static bool match_error_registration(pmix_regevents_info_t *reginfoptr, pmix_not
|
||||
static void _notify_error(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_notify_caddy_t *cd = (pmix_notify_caddy_t*)cbdata;
|
||||
pmix_notify_caddy_t *rbout;
|
||||
pmix_status_t rc;
|
||||
pmix_cmd_t cmd = PMIX_NOTIFY_CMD;
|
||||
int i;
|
||||
@ -1119,25 +1122,27 @@ static void _notify_error(int sd, short args, void *cbdata)
|
||||
pmix_regevents_info_t *reginfoptr;
|
||||
bool notify, notifyall;
|
||||
|
||||
pmix_output_verbose(0, pmix_globals.debug_output,
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server: _notify_error notifying client of error %d",
|
||||
cd->status);
|
||||
|
||||
/* pack the command */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cmd, 1, PMIX_CMD))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* pack the status */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* pack the error procs */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->error_nprocs, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (0 < cd->error_nprocs) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, cd->error_procs, cd->error_nprocs, PMIX_PROC))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -1145,11 +1150,13 @@ static void _notify_error(int sd, short args, void *cbdata)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* pack the info */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->ninfo, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (0 < cd->ninfo) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, cd->info, cd->ninfo, PMIX_INFO))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -1157,62 +1164,23 @@ static void _notify_error(int sd, short args, void *cbdata)
|
||||
}
|
||||
}
|
||||
|
||||
/* if the RM gave us a NULL proc list, then we are notifying everyone */
|
||||
if (NULL == cd->procs) {
|
||||
notifyall = true;
|
||||
} else {
|
||||
notifyall = false;
|
||||
|
||||
/* we cannot know if everyone who wants this notice has had a chance
|
||||
* to register for it - the notice may be coming too early. So cache
|
||||
* the message until all local procs have received it, or it ages to
|
||||
* the point where it gets pushed out by more recent events */
|
||||
PMIX_RETAIN(cd);
|
||||
rbout = pmix_ring_buffer_push(&pmix_server_globals.notifications, cd);
|
||||
|
||||
/* if an older event was bumped, release it */
|
||||
if (NULL != rbout) {
|
||||
PMIX_RELEASE(rbout);
|
||||
}
|
||||
|
||||
/* cycle across our connected clients and send the message to
|
||||
/* cycle across our registered events and send the message to
|
||||
* any within the specified proc array */
|
||||
for (i=0; i < pmix_server_globals.clients.size; i++) {
|
||||
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
|
||||
continue;
|
||||
}
|
||||
if (!notifyall) {
|
||||
/* check to see if this proc matches that of one in the specified array */
|
||||
notify = false;
|
||||
for (j=0; j < cd->nprocs; j++) {
|
||||
if (0 != strncmp(peer->info->nptr->nspace, cd->procs[j].nspace, PMIX_MAX_NSLEN)) {
|
||||
continue;
|
||||
}
|
||||
if (PMIX_RANK_WILDCARD == cd->procs[j].rank ||
|
||||
cd->procs[j].rank == peer->info->rank) {
|
||||
notify = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!notify) {
|
||||
/* if we are not notifying everyone, and this proc isn't to
|
||||
* be notified, then just continue the main loop */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* get the client's error registration and check if client
|
||||
* requested notification of this error */
|
||||
reginfoptr = NULL;
|
||||
notify = false;
|
||||
PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.client_eventregs, pmix_regevents_info_t) {
|
||||
if (reginfoptr->peer == peer) {
|
||||
/* check if the client has registered for this error
|
||||
* by parsing the info keys */
|
||||
notify = match_error_registration(reginfoptr, cd);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server _notify_error - match error registration returned notify =%d ", notify);
|
||||
}
|
||||
if (notify) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (notify) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server: _notify_error - notifying process rank %d error %d",
|
||||
peer->info->rank, cd->status);
|
||||
PMIX_RETAIN(cd->buf);
|
||||
PMIX_SERVER_QUEUE_REPLY(peer, 0, cd->buf);
|
||||
}
|
||||
pmix_server_check_notifications(reginfoptr, cd);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
@ -1234,12 +1202,32 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status,
|
||||
|
||||
cd = PMIX_NEW(pmix_notify_caddy_t);
|
||||
cd->status = status;
|
||||
cd->procs = procs;
|
||||
/* have to copy the info here as we may have to cache this
|
||||
* notification until procs have a chance to register for it */
|
||||
if (NULL != procs) {
|
||||
cd->nprocs = nprocs;
|
||||
cd->error_procs = error_procs;
|
||||
PMIX_PROC_CREATE(cd->procs, cd->nprocs);
|
||||
for (n=0; n < cd->nprocs; n++) {
|
||||
(void)strncpy(cd->procs[n].nspace, procs[n].nspace, PMIX_MAX_NSLEN);
|
||||
cd->procs[n].rank = procs[n].rank;
|
||||
}
|
||||
}
|
||||
if (NULL != error_procs) {
|
||||
cd->error_nprocs = error_nprocs;
|
||||
cd->info = info;
|
||||
PMIX_PROC_CREATE(cd->error_procs, cd->error_nprocs);
|
||||
for (n=0; n < cd->error_nprocs; n++) {
|
||||
(void)strncpy(cd->error_procs[n].nspace, error_procs[n].nspace, PMIX_MAX_NSLEN);
|
||||
cd->error_procs[n].rank = error_procs[n].rank;
|
||||
}
|
||||
}
|
||||
if (NULL != info) {
|
||||
cd->ninfo = ninfo;
|
||||
PMIX_INFO_CREATE(cd->info, cd->ninfo);
|
||||
for (n=0; n < cd->ninfo; n++) {
|
||||
PMIX_INFO_LOAD(&cd->info[n], info[n].key,
|
||||
&info[n].value.data, info[n].value.type);
|
||||
}
|
||||
}
|
||||
cd->cbfunc = cbfunc;
|
||||
cd->cbdata = cbdata;
|
||||
|
||||
@ -1253,26 +1241,68 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
|
||||
pmix_notify_caddy_t *cd)
|
||||
{
|
||||
bool notify;
|
||||
size_t j;
|
||||
|
||||
/* if the RM gave us a NULL proc list, then we are notifying everyone */
|
||||
if (NULL != cd->procs) {
|
||||
/* check to see if this proc matches that of one in the specified array */
|
||||
notify = false;
|
||||
for (j=0; j < cd->nprocs; j++) {
|
||||
if (0 != strncmp(reginfo->peer->info->nptr->nspace, cd->procs[j].nspace, PMIX_MAX_NSLEN)) {
|
||||
continue;
|
||||
}
|
||||
if (PMIX_RANK_WILDCARD == cd->procs[j].rank ||
|
||||
cd->procs[j].rank == reginfo->peer->info->rank) {
|
||||
notify = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!notify) {
|
||||
/* if we are not notifying everyone, and this proc isn't to
|
||||
* be notified, so just return */
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* check if the client has registered for this error
|
||||
* by parsing the info keys */
|
||||
if (match_error_registration(reginfo, cd)) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server: check notifications - notifying process rank %d error %d",
|
||||
reginfo->peer->info->rank, cd->status);
|
||||
PMIX_RETAIN(cd->buf);
|
||||
PMIX_SERVER_QUEUE_REPLY(reginfo->peer, 0, cd->buf);
|
||||
}
|
||||
|
||||
}
|
||||
static void reg_errhandler(int sd, short args, void *cbdata)
|
||||
{
|
||||
int index = 0;
|
||||
pmix_status_t rc;
|
||||
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
||||
pmix_notify_caddy_t *rb;
|
||||
|
||||
/* check if this handler is already registered if so return error */
|
||||
if (PMIX_SUCCESS == pmix_lookup_errhandler(cd->err, &index)) {
|
||||
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(cd->info, cd->ninfo, &index))) {
|
||||
/* complete request with error status and return its original reference */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server_register_errhandler error - hdlr already registered index = %d",
|
||||
index);
|
||||
cd->cbfunc.errregcbfn(PMIX_EXISTS, index, cd->cbdata);
|
||||
} else {
|
||||
rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server_register_errhandler - success index =%d", index);
|
||||
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
|
||||
}
|
||||
cd->active = false;
|
||||
/* cycle across any cached notifications and see if any are
|
||||
* pending for us and match this description */
|
||||
|
||||
/* acknowledge the registration so the caller can release
|
||||
* their data */
|
||||
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
|
||||
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
|
||||
@ -2161,21 +2191,23 @@ static void cnct_cbfunc(pmix_status_t status, void *cbdata)
|
||||
PMIX_THREADSHIFT(scd, _cnct);
|
||||
}
|
||||
|
||||
void regevents_cbfunc (pmix_status_t status, void *cbdata)
|
||||
void regevents_cbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata;
|
||||
pmix_regevents_info_t *reginfo, *reginfo_next;
|
||||
pmix_buffer_t *reply;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server:regevents_cbfunc called status = %d", status);
|
||||
|
||||
if (PMIX_SUCCESS != status) {
|
||||
/* need to delete the stored event reg info when server
|
||||
nacks reg events request */
|
||||
* nacks reg events request */
|
||||
PMIX_LIST_FOREACH_SAFE(reginfo, reginfo_next, &pmix_server_globals.client_eventregs,
|
||||
pmix_regevents_info_t) {
|
||||
if(reginfo->peer == cd->peer) {
|
||||
pmix_list_remove_item (&pmix_server_globals.client_eventregs,
|
||||
if (reginfo->peer == cd->peer) {
|
||||
pmix_list_remove_item(&pmix_server_globals.client_eventregs,
|
||||
®info->super);
|
||||
PMIX_RELEASE(reginfo);
|
||||
break;
|
||||
@ -2183,22 +2215,27 @@ void regevents_cbfunc (pmix_status_t status, void *cbdata)
|
||||
}
|
||||
}
|
||||
reply = PMIX_NEW(pmix_buffer_t);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
|
||||
static void deregevents_cbfunc (pmix_status_t status, void *cbdata)
|
||||
static void deregevents_cbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata;
|
||||
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server:deregevents_cbfunc called status = %d", status);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
@ -2209,10 +2246,14 @@ static void notifyerror_cbfunc (pmix_status_t status, void *cbdata)
|
||||
pmix_status_t rc;
|
||||
pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata;
|
||||
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server:notifyerror_cbfunc called status = %d", status);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
@ -2387,6 +2428,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (PMIX_DEREGEVENTS_CMD == cmd) {
|
||||
PMIX_PEER_CADDY(cd, peer, tag);
|
||||
if (PMIX_SUCCESS != (rc = pmix_server_deregister_events(peer, buf, deregevents_cbfunc, cd))) {
|
||||
@ -2395,6 +2437,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (PMIX_NOTIFY_CMD == cmd) {
|
||||
PMIX_PEER_CADDY(cd, peer, tag);
|
||||
if (PMIX_SUCCESS != (rc = pmix_server_notify_error_client(peer, buf, notifyerror_cbfunc, cd))) {
|
||||
@ -2402,6 +2445,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
return PMIX_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
@ -215,6 +215,20 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* do not force dmodex logic for non-specific ranks
|
||||
* let return not found status instead of doing fence with
|
||||
* data exchange. User can make a decision to do such call getting
|
||||
* not found status
|
||||
*/
|
||||
if (PMIX_RANK_UNDEF == rank || PMIX_RANK_WILDCARD == rank) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"%s:%d not found data for namespace = %s, rank = %d "
|
||||
"(do not request resource manager server for non-specified rank)",
|
||||
pmix_globals.myid.nspace,
|
||||
pmix_globals.myid.rank, nspace, rank);
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* If we get here, then we don't have the data at this time. Check
|
||||
* to see if we already have a pending request for the data - if
|
||||
* we do, then we can just wait for it to arrive */
|
||||
@ -362,31 +376,52 @@ static pmix_status_t _satisfy_request(pmix_hash_table_t *ht, int rank,
|
||||
pmix_value_t *val;
|
||||
char *data;
|
||||
size_t sz;
|
||||
int cur_rank;
|
||||
int found = 0;
|
||||
pmix_buffer_t xfer, pbkt, *xptr;
|
||||
void *last;
|
||||
|
||||
/* check to see if this data already has been
|
||||
* obtained as a result of a prior direct modex request from
|
||||
* a remote peer, or due to data from a local client
|
||||
* having been committed */
|
||||
rc = pmix_hash_fetch(ht, rank, "modex", &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
cur_rank = rank;
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
rc = pmix_hash_fetch_by_key(ht, "modex", &cur_rank, &val, &last);
|
||||
} else {
|
||||
rc = pmix_hash_fetch(ht, cur_rank, "modex", &val);
|
||||
}
|
||||
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
|
||||
while (PMIX_SUCCESS == rc) {
|
||||
if (NULL != val) {
|
||||
pmix_bfrop.pack(&pbkt, &cur_rank, 1, PMIX_INT);
|
||||
/* the client is expecting this to arrive as a byte object
|
||||
* containing a buffer, so package it accordingly */
|
||||
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
|
||||
PMIX_CONSTRUCT(&xfer, pmix_buffer_t);
|
||||
xptr = &xfer;
|
||||
PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
pmix_bfrop.pack(&pbkt, &xptr, 1, PMIX_BUFFER);
|
||||
xfer.base_ptr = NULL; // protect the passed data
|
||||
xfer.bytes_used = 0;
|
||||
PMIX_DESTRUCT(&xfer);
|
||||
found++;
|
||||
}
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
rc = pmix_hash_fetch_by_key(ht, NULL, &cur_rank, &val, &last);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
PMIX_UNLOAD_BUFFER(&pbkt, data, sz);
|
||||
PMIX_DESTRUCT(&pbkt);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
|
||||
if (found) {
|
||||
/* pass it back */
|
||||
cbfunc(rc, data, sz, cbdata, relfn, data);
|
||||
return rc;
|
||||
cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data);
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -132,7 +132,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
pmix_nspace_t *nptr;
|
||||
pmix_rank_info_t *info;
|
||||
pmix_dmdx_remote_t *dcd, *dcdnext;
|
||||
pmix_buffer_t pbkt;
|
||||
pmix_buffer_t *pbkt;
|
||||
pmix_value_t *val;
|
||||
char *data;
|
||||
size_t sz;
|
||||
@ -141,6 +141,12 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
info = peer->info;
|
||||
nptr = info->nptr;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"%s:%d EXECUTE COMMIT FOR %s:%d",
|
||||
pmix_globals.myid.nspace,
|
||||
pmix_globals.myid.rank,
|
||||
nptr->nspace, info->rank);
|
||||
|
||||
/* this buffer will contain one or more buffers, each
|
||||
* representing a different scope. These need to be locally
|
||||
* stored separately so we can provide required data based
|
||||
@ -162,6 +168,27 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* see if we already have info for this proc */
|
||||
if (PMIX_SUCCESS == pmix_hash_fetch(ht, info->rank, "modex", &val) && NULL != val) {
|
||||
/* create the new data storage */
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
kp->key = strdup("modex");
|
||||
PMIX_VALUE_CREATE(kp->value, 1);
|
||||
kp->value->type = PMIX_BYTE_OBJECT;
|
||||
/* get space for the new new data blob */
|
||||
kp->value->data.bo.bytes = (char*)malloc(b2->bytes_used + val->data.bo.size);
|
||||
memcpy(kp->value->data.bo.bytes, val->data.bo.bytes, val->data.bo.size);
|
||||
memcpy(kp->value->data.bo.bytes+val->data.bo.size, b2->base_ptr, b2->bytes_used);
|
||||
kp->value->data.bo.size = val->data.bo.size + b2->bytes_used;
|
||||
/* release the storage */
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
/* store it in the appropriate hash */
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
PMIX_RELEASE(kp); // maintain acctg
|
||||
} else {
|
||||
/* create a new kval to hold this data */
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
kp->key = strdup("modex");
|
||||
PMIX_VALUE_CREATE(kp->value, 1);
|
||||
@ -173,6 +200,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
PMIX_RELEASE(kp); // maintain acctg
|
||||
}
|
||||
cnt = 1;
|
||||
}
|
||||
if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
|
||||
@ -191,16 +219,16 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
if (dcd->cd->proc.rank == info->rank) {
|
||||
/* we can now fulfill this request - collect the
|
||||
* remote/global data from this proc */
|
||||
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
|
||||
pbkt = PMIX_NEW(pmix_buffer_t);
|
||||
/* get any remote contribution - note that there
|
||||
* may not be a contribution */
|
||||
if (PMIX_SUCCESS == pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val) &&
|
||||
NULL != val) {
|
||||
PMIX_LOAD_BUFFER(&pbkt, val->data.bo.bytes, val->data.bo.size);
|
||||
PMIX_LOAD_BUFFER(pbkt, val->data.bo.bytes, val->data.bo.size);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
PMIX_UNLOAD_BUFFER(&pbkt, data, sz);
|
||||
PMIX_DESTRUCT(&pbkt);
|
||||
PMIX_UNLOAD_BUFFER(pbkt, data, sz);
|
||||
PMIX_RELEASE(pbkt);
|
||||
/* execute the callback */
|
||||
dcd->cd->cbfunc(PMIX_SUCCESS, data, sz, dcd->cd->cbdata);
|
||||
if (NULL != data) {
|
||||
@ -805,7 +833,7 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer,
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the array of apps */
|
||||
/* unpack the array of directives */
|
||||
if (0 < ninfo) {
|
||||
PMIX_INFO_CREATE(info, ninfo);
|
||||
cnt=ninfo;
|
||||
@ -958,6 +986,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer,
|
||||
pmix_info_t *info = NULL;
|
||||
size_t ninfo, n;
|
||||
pmix_regevents_info_t *reginfo;
|
||||
pmix_notify_caddy_t *cd;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"recvd register events");
|
||||
@ -995,13 +1024,20 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer,
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server register events: calling host server reg events");
|
||||
/* call the local server */
|
||||
if(PMIX_SUCCESS != (rc = pmix_host_server.register_events(reginfo->info,
|
||||
reginfo->ninfo, cbfunc, cbdata)))
|
||||
{
|
||||
|
||||
if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(reginfo->info,
|
||||
reginfo->ninfo, cbfunc, cbdata))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server register events: host server reg events returned rc =%d", rc);
|
||||
}
|
||||
|
||||
/* check if any matching notifications have been cached */
|
||||
for (n=0; n < pmix_server_globals.notifications.size; n++) {
|
||||
if (NULL == (cd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_server_globals.notifications, n))) {
|
||||
break;
|
||||
}
|
||||
pmix_server_check_notifications(reginfo, cd);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server register events: ninfo =%lu rc =%d", ninfo, rc);
|
||||
@ -1197,6 +1233,15 @@ static void ncon(pmix_notify_caddy_t *p)
|
||||
}
|
||||
static void ndes(pmix_notify_caddy_t *p)
|
||||
{
|
||||
if (NULL != p->procs) {
|
||||
PMIX_PROC_FREE(p->procs, p->nprocs);
|
||||
}
|
||||
if (NULL != p->error_procs) {
|
||||
PMIX_PROC_FREE(p->error_procs, p->error_nprocs);
|
||||
}
|
||||
if (NULL != p->info) {
|
||||
PMIX_INFO_FREE(p->info, p->ninfo);
|
||||
}
|
||||
if (NULL != p->buf) {
|
||||
PMIX_RELEASE(p->buf);
|
||||
}
|
||||
@ -1261,3 +1306,4 @@ static void regdes(pmix_regevents_info_t *p)
|
||||
PMIX_CLASS_INSTANCE(pmix_regevents_info_t,
|
||||
pmix_list_item_t,
|
||||
regcon, regdes);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
@ -15,6 +15,7 @@
|
||||
#include <private/autogen/config.h>
|
||||
#include <pmix/rename.h>
|
||||
#include <pmix/pmix_common.h>
|
||||
#include <src/class/pmix_ring_buffer.h>
|
||||
#include <pmix_server.h>
|
||||
#include "src/usock/usock.h"
|
||||
#include "src/util/hash.h"
|
||||
@ -157,6 +158,7 @@ typedef struct {
|
||||
int stop_thread[2]; // pipe used to stop listener thread
|
||||
pmix_buffer_t gdata; // cache of data given to me for passing to all clients
|
||||
pmix_list_t client_eventregs; // list of registered events per client.
|
||||
pmix_ring_buffer_t notifications; // ring buffer of pending notifications
|
||||
} pmix_server_globals_t;
|
||||
|
||||
#define PMIX_PEER_CADDY(c, p, t) \
|
||||
@ -272,6 +274,8 @@ pmix_status_t pmix_server_notify_error_client(pmix_peer_t *peer,
|
||||
pmix_buffer_t *buf,
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata);
|
||||
void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
|
||||
pmix_notify_caddy_t *cd);
|
||||
|
||||
void regevents_cbfunc (pmix_status_t status, void *cbdata);
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -137,10 +137,16 @@ const char* PMIx_Error_string(pmix_status_t errnum)
|
||||
return "PROC-ABORT-REQUESTED";
|
||||
case PMIX_ERR_PROC_ABORTED:
|
||||
return "PROC-ABORTED";
|
||||
case PMIX_ERR_DEBUGGER_RELEASE:
|
||||
return "DEBUGGER-RELEASE";
|
||||
case PMIX_ERR_SILENT:
|
||||
return "SILENT_ERROR";
|
||||
case PMIX_ERROR:
|
||||
return "ERROR";
|
||||
case PMIX_ERR_GRP_FOUND:
|
||||
return "GROUP-FOUND";
|
||||
case PMIX_ERR_DFLT_FOUND:
|
||||
return "DEFAULT-FOUND";
|
||||
case PMIX_SUCCESS:
|
||||
return "SUCCESS";
|
||||
|
||||
@ -162,15 +168,26 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
pmix_error_reg_info_t *errreg, *errdflt=NULL;
|
||||
pmix_info_t *iptr;
|
||||
|
||||
/* we will need to provide the errhandler reference id when
|
||||
* we provide the callback. Since the callback function doesn't
|
||||
* provide a param for that purpose, we have to add it to any
|
||||
* info array that came from the RM, so extend the array by 1 */
|
||||
PMIX_INFO_CREATE(iptr, ninfo+1);
|
||||
/* put the reference id in the first location */
|
||||
(void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN);
|
||||
iptr[0].value.type = PMIX_INT;
|
||||
/* we don't know the reference id yet, but we'll fill that in
|
||||
* later - for now, just copy the incoming info array across */
|
||||
if (NULL != info) {
|
||||
for (j=0; j < ninfo; j++) {
|
||||
PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type);
|
||||
}
|
||||
}
|
||||
|
||||
/* search our array of errhandlers for a match. We take any specific
|
||||
* error status first, then take the group of the incoming status next.
|
||||
* If neither of those have been registered, then use any default
|
||||
* errhandler - otherwise, ignore it */
|
||||
for (i = 0; i < pmix_globals.errregs.size; i++) {
|
||||
if (NULL == (errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item(&pmix_globals.errregs, i))) {
|
||||
continue;
|
||||
@ -194,7 +211,7 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!exact_match) {
|
||||
if (!exact_match && NULL != info) {
|
||||
/* if no exact match was found, then we will fire the errhandler
|
||||
* for any matching info key. This may be too lax and need to be adjusted
|
||||
* later */
|
||||
@ -217,22 +234,102 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
PMIX_INFO_FREE(iptr, ninfo+1);
|
||||
}
|
||||
|
||||
pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err,
|
||||
/* lookup an errhandler during registration */
|
||||
pmix_status_t pmix_lookup_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
int *index)
|
||||
{
|
||||
int i;
|
||||
pmix_status_t rc = PMIX_ERR_NOT_FOUND;
|
||||
pmix_error_reg_info_t *errreg = NULL;
|
||||
int i, idflt=-1, igrp=-1;
|
||||
pmix_error_reg_info_t *errreg;
|
||||
size_t sz, n;
|
||||
char errgrp[PMIX_MAX_KEYLEN];
|
||||
bool exact_given = false;
|
||||
int given = -1;
|
||||
pmix_status_t status;
|
||||
char *grp;
|
||||
|
||||
for (i = 0; i < pmix_pointer_array_get_size(&pmix_globals.errregs) ; i++) {
|
||||
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i);
|
||||
if ((NULL != errreg) && (err == errreg->errhandler)) {
|
||||
*index = i;
|
||||
rc = PMIX_SUCCESS;
|
||||
/* scan the incoming specification to see if it is a general errhandler,
|
||||
* a group errhandler, or an error handler for a specific status. Only
|
||||
* one of these options can be specified! */
|
||||
if (NULL == info) {
|
||||
/* this is the general error handler */
|
||||
given = 0;
|
||||
} else {
|
||||
for (n=0; n < ninfo; n++) {
|
||||
if (0 == strncmp(info[n].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) {
|
||||
/* this is a specific errhandler */
|
||||
given = 1;
|
||||
status = info[n].value.data.integer;
|
||||
break;
|
||||
} else if (0 == strcmp(info[n].key, "pmix.errgroup")) {
|
||||
/* this is a group errhandler */
|
||||
given = 2;
|
||||
grp = info[n].value.data.string;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* search our array of errhandlers for a match */
|
||||
for (i = 0; i < pmix_globals.errregs.size ; i++) {
|
||||
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i);
|
||||
if (NULL == errreg) {
|
||||
continue;
|
||||
}
|
||||
if (NULL == errreg->info) {
|
||||
/* this is the general errhandler - if they gave us
|
||||
* another general errhandler, then we should
|
||||
* replace it */
|
||||
if (0 == given) {
|
||||
*index = i;
|
||||
return PMIX_ERR_DFLT_FOUND;
|
||||
}
|
||||
/* save this spot as we will default to it if nothing else is found */
|
||||
idflt = i;
|
||||
continue;
|
||||
}
|
||||
if (0 == given) {
|
||||
/* they are looking for the general errhandler */
|
||||
continue;
|
||||
}
|
||||
/* if this registration is for a single specific errhandler, then
|
||||
* see if the incoming one matches */
|
||||
if (1 == given && errreg->sglhdlr) {
|
||||
for (sz=0; sz < errreg->ninfo; sz++) {
|
||||
if (0 == strncmp(errreg->info[sz].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) {
|
||||
if (status == errreg->info[sz].value.data.integer) {
|
||||
/* we have an exact match - return this errhandler and
|
||||
* let the caller know it was an exact match */
|
||||
*index = i;
|
||||
return PMIX_EXISTS;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (2 == given && !errreg->sglhdlr) {
|
||||
/* this registration is for a group, so check that case */
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* if we get here, then no match was found. If they
|
||||
* gave us a specific error, then we have to return not_found */
|
||||
if (exact_given) {
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* If we have a group match, then that takes precedence */
|
||||
if (0 <= igrp) {
|
||||
*index = igrp;
|
||||
return PMIX_ERR_GRP_FOUND;
|
||||
}
|
||||
|
||||
/* if we found a default errhandler, then use it */
|
||||
if (0 <= idflt) {
|
||||
*index = idflt;
|
||||
return PMIX_ERR_DFLT_FOUND;
|
||||
}
|
||||
|
||||
/* otherwise, it wasn't found */
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err,
|
||||
@ -242,25 +339,45 @@ pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err,
|
||||
int i;
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_error_reg_info_t *errreg;
|
||||
bool sglhdlr = false;
|
||||
|
||||
if (0 != *index) {
|
||||
/* overwrite an existing entry */
|
||||
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, *index);
|
||||
if (NULL == errreg) {
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
errreg->errhandler = err;
|
||||
PMIX_INFO_FREE(errreg->info, errreg->ninfo);
|
||||
errreg->ninfo = ninfo;
|
||||
} else {
|
||||
errreg = PMIX_NEW(pmix_error_reg_info_t);
|
||||
errreg->errhandler = err;
|
||||
errreg->ninfo = ninfo;
|
||||
if (NULL != info && 0 < ninfo) {
|
||||
PMIX_INFO_CREATE(errreg->info, ninfo);
|
||||
for (i=0; i < ninfo; i++) {
|
||||
(void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN);
|
||||
pmix_value_xfer(&errreg->info[i].value, &info[i].value);
|
||||
}
|
||||
}
|
||||
*index = pmix_pointer_array_add(&pmix_globals.errregs, errreg);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_add_errhandler index =%d", *index);
|
||||
if (*index < 0) {
|
||||
PMIX_RELEASE(errreg);
|
||||
rc = PMIX_ERROR;
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
/* sadly, we have to copy the info objects as we cannot
|
||||
* rely on them to remain in-memory */
|
||||
if (NULL != info && 0 < ninfo) {
|
||||
PMIX_INFO_CREATE(errreg->info, ninfo);
|
||||
for (i=0; i < ninfo; i++) {
|
||||
/* if this is a specific, single errhandler, then
|
||||
* mark it accordingly */
|
||||
if (0 == strncmp(info[i].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) {
|
||||
errreg->sglhdlr = true;
|
||||
}
|
||||
(void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN);
|
||||
pmix_value_xfer(&errreg->info[i].value, &info[i].value);
|
||||
}
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
pmix_status_t pmix_remove_errhandler(int errhandler_ref)
|
||||
|
@ -42,11 +42,19 @@ BEGIN_C_DECLS
|
||||
pmix_errhandler_invoke(e, NULL, 0, NULL, 0); \
|
||||
} while(0);
|
||||
|
||||
/* invoke the error handler that is registered against the given
|
||||
* status, passing it the provided info on the procs that were
|
||||
* affected, plus any additional info provided by the server */
|
||||
PMIX_DECLSPEC void pmix_errhandler_invoke(pmix_status_t status,
|
||||
pmix_proc_t procs[], size_t nprocs,
|
||||
pmix_info_t info[], size_t ninfo);
|
||||
|
||||
PMIX_DECLSPEC pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err,
|
||||
/* lookup the errhandler registered against the given status. If there
|
||||
* is none, but an errhandler has been registered against the group
|
||||
* that this status belongs to, then return that errhandler. If neither
|
||||
* of those is true, but a general errhandler has been registered, then
|
||||
* return that errhandler. Otherwise, return NOT_FOUND */
|
||||
PMIX_DECLSPEC pmix_status_t pmix_lookup_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
int *index);
|
||||
|
||||
PMIX_DECLSPEC pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err,
|
||||
|
@ -67,33 +67,28 @@ int pmix_hash_store(pmix_hash_table_t *table,
|
||||
{
|
||||
pmix_proc_data_t *proc_data;
|
||||
uint64_t id;
|
||||
pmix_kval_t *kv;
|
||||
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:STORE rank %d key %s",
|
||||
rank, kin->key);
|
||||
|
||||
if (PMIX_RANK_WILDCARD == rank) {
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
id = UINT64_MAX;
|
||||
} else {
|
||||
id = (uint64_t)rank;
|
||||
}
|
||||
|
||||
/* lookup the proc data object for this proc - create
|
||||
* it if we don't */
|
||||
* it if we don't already have it */
|
||||
if (NULL == (proc_data = lookup_proc(table, id, true))) {
|
||||
return PMIX_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* see if we already have this key in the data - means we are updating
|
||||
* a pre-existing value
|
||||
*/
|
||||
kv = lookup_keyval(&proc_data->data, kin->key);
|
||||
if (NULL != kv) {
|
||||
pmix_list_remove_item(&proc_data->data, &kv->super);
|
||||
PMIX_RELEASE(kv);
|
||||
}
|
||||
/* store the new value */
|
||||
/* add the new value - note that if the user is updating
|
||||
* a value, the ordering of the stored blobs will cause
|
||||
* an update to eventually occur. In other words, the
|
||||
* receiving process will first unpack the "old" data,
|
||||
* and then unpack the update and overwrite it */
|
||||
PMIX_RETAIN(kin);
|
||||
pmix_list_append(&proc_data->data, &kin->super);
|
||||
|
||||
@ -103,46 +98,131 @@ int pmix_hash_store(pmix_hash_table_t *table,
|
||||
pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank,
|
||||
const char *key, pmix_value_t **kvs)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_proc_data_t *proc_data;
|
||||
pmix_kval_t *hv;
|
||||
uint64_t id;
|
||||
pmix_status_t rc;
|
||||
char *node;
|
||||
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH rank %d key %s",
|
||||
rank, (NULL == key) ? "NULL" : key);
|
||||
|
||||
/* NULL keys are not supported */
|
||||
if (NULL == key) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
/* PMIX_RANK_UNDEF should return following statuses
|
||||
* PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_SUCCESS
|
||||
* special logic is basing on these statuses on a client and a server */
|
||||
rc = pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data, (void**)&node);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH proc data for rank %d not found",
|
||||
rank);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (PMIX_RANK_WILDCARD == rank) {
|
||||
id = UINT64_MAX;
|
||||
} else {
|
||||
/* specified rank can return following statuses
|
||||
* PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_ERR_NOT_FOUND | PMIX_SUCCESS
|
||||
* special logic is basing on these statuses on a client and a server */
|
||||
id = (uint64_t)rank;
|
||||
}
|
||||
|
||||
/* lookup the proc data object for this proc */
|
||||
if (NULL == (proc_data = lookup_proc(table, id, false))) {
|
||||
while (PMIX_SUCCESS == rc) {
|
||||
proc_data = lookup_proc(table, id, false);
|
||||
if (NULL == proc_data) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH proc data for rank %d not found",
|
||||
rank);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* find the value from within this proc_data object */
|
||||
if (NULL == (hv = lookup_keyval(&proc_data->data, key))) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH data for key %s not found", key);
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
/* if the key is NULL, then the user wants -all- data
|
||||
* put by the specified rank */
|
||||
if (NULL == key) {
|
||||
/* we will return the data as an array of pmix_info_t
|
||||
* in the kvs pmix_value_t */
|
||||
|
||||
} else {
|
||||
/* find the value from within this proc_data object */
|
||||
hv = lookup_keyval(&proc_data->data, key);
|
||||
if (hv) {
|
||||
/* create the copy */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
break;
|
||||
} else if (PMIX_RANK_UNDEF != rank) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH data for key %s not found", key);
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
rc = pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data, node, (void**)&node);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH data for key %s not found", key);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
pmix_status_t pmix_hash_fetch_by_key(pmix_hash_table_t *table, const char *key,
|
||||
int *rank, pmix_value_t **kvs, void **last)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_proc_data_t *proc_data;
|
||||
pmix_kval_t *hv;
|
||||
uint64_t id;
|
||||
char *node;
|
||||
static const char *key_r = NULL;
|
||||
|
||||
if (key == NULL && (node = *last) == NULL) {
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (key == NULL && key_r == NULL) {
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (key) {
|
||||
rc = pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data, (void**)&node);
|
||||
key_r = key;
|
||||
} else {
|
||||
rc = pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data, node, (void**)&node);
|
||||
}
|
||||
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH BY KEY rank %d key %s",
|
||||
(int)id, key_r);
|
||||
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH proc data for key %s not found",
|
||||
key_r);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* find the value from within this proc_data object */
|
||||
hv = lookup_keyval(&proc_data->data, key_r);
|
||||
if (hv) {
|
||||
/* create the copy */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
*rank = (int)id;
|
||||
*last = node;
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
@ -150,6 +230,7 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank,
|
||||
int pmix_hash_remove_data(pmix_hash_table_t *table,
|
||||
int rank, const char *key)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_proc_data_t *proc_data;
|
||||
pmix_kval_t *kv;
|
||||
uint64_t id;
|
||||
@ -157,11 +238,11 @@ int pmix_hash_remove_data(pmix_hash_table_t *table,
|
||||
|
||||
/* if the rank is wildcard, we want to apply this to
|
||||
* all rank entries */
|
||||
if (PMIX_RANK_WILDCARD == rank) {
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
id = UINT64_MAX;
|
||||
if (PMIX_SUCCESS == pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data,
|
||||
(void**)&node)) {
|
||||
rc = pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data, (void**)&node);
|
||||
while (PMIX_SUCCESS == rc) {
|
||||
if (NULL != proc_data) {
|
||||
if (NULL == key) {
|
||||
PMIX_RELEASE(proc_data);
|
||||
@ -175,23 +256,8 @@ int pmix_hash_remove_data(pmix_hash_table_t *table,
|
||||
}
|
||||
}
|
||||
}
|
||||
while (PMIX_SUCCESS == pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data,
|
||||
node, (void**)&node)) {
|
||||
if (NULL != proc_data) {
|
||||
if (NULL == key) {
|
||||
PMIX_RELEASE(proc_data);
|
||||
} else {
|
||||
PMIX_LIST_FOREACH(kv, &proc_data->data, pmix_kval_t) {
|
||||
if (0 == strcmp(key, kv->key)) {
|
||||
pmix_list_remove_item(&proc_data->data, &kv->super);
|
||||
PMIX_RELEASE(kv);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
rc = pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data, node, (void**)&node);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,11 +27,19 @@ BEGIN_C_DECLS
|
||||
pmix_status_t pmix_hash_store(pmix_hash_table_t *table,
|
||||
int rank, pmix_kval_t *kv);
|
||||
|
||||
/* Fetch the value for a specified key from within
|
||||
/* Fetch the value for a specified key and rank from within
|
||||
* the given hash_table */
|
||||
pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank,
|
||||
const char *key, pmix_value_t **kvs);
|
||||
|
||||
/* Fetch the value for a specified key from within
|
||||
* the given hash_table
|
||||
* It gets the next portion of data from table, where matching key.
|
||||
* To get the first data from table, function is called with key parameter as string.
|
||||
* Remaining data from table are obtained by calling function with a null pointer for the key parameter.*/
|
||||
pmix_status_t pmix_hash_fetch_by_key(pmix_hash_table_t *table, const char *key,
|
||||
int *rank, pmix_value_t **kvs, void **last);
|
||||
|
||||
/* remove the specified key-value from the given hash_table.
|
||||
* A NULL key will result in removal of all data for the
|
||||
* given rank. A rank of PMIX_RANK_WILDCARD indicates that
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <pmix/pmix_common.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
@ -468,6 +469,56 @@ void pmix_output_set_output_file_info(const char *dir,
|
||||
}
|
||||
}
|
||||
|
||||
void pmix_output_hexdump(int verbose_level, int output_id,
|
||||
void *ptr, int buflen)
|
||||
{
|
||||
unsigned char *buf = (unsigned char *) ptr;
|
||||
char out_buf[120];
|
||||
int ret = 0;
|
||||
int out_pos = 0;
|
||||
int i, j;
|
||||
|
||||
if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS &&
|
||||
info[output_id].ldi_verbose_level >= verbose_level) {
|
||||
pmix_output_verbose(verbose_level, output_id, "dump data at %p %d bytes\n", ptr, buflen);
|
||||
for (i = 0; i < buflen; i += 16) {
|
||||
out_pos = 0;
|
||||
ret = sprintf(out_buf + out_pos, "%06x: ", i);
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
for (j = 0; j < 16; j++) {
|
||||
if (i + j < buflen)
|
||||
ret = sprintf(out_buf + out_pos, "%02x ",
|
||||
buf[i + j]);
|
||||
else
|
||||
ret = sprintf(out_buf + out_pos, " ");
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
}
|
||||
ret = sprintf(out_buf + out_pos, " ");
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
for (j = 0; j < 16; j++)
|
||||
if (i + j < buflen) {
|
||||
ret = sprintf(out_buf + out_pos, "%c",
|
||||
isprint(buf[i+j]) ?
|
||||
buf[i + j] :
|
||||
'.');
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
}
|
||||
ret = sprintf(out_buf + out_pos, "\n");
|
||||
if (ret < 0)
|
||||
return;
|
||||
pmix_output_verbose(verbose_level, output_id, "%s", out_buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Shut down the output stream system
|
||||
|
@ -507,6 +507,12 @@ struct pmix_output_stream_t {
|
||||
char **olddir,
|
||||
char **oldprefix);
|
||||
|
||||
/**
|
||||
* Same as pmix_output_verbose(), but pointer to buffer and size.
|
||||
*/
|
||||
PMIX_DECLSPEC void pmix_output_hexdump(int verbose_level, int output_id,
|
||||
void *ptr, int buflen);
|
||||
|
||||
#if PMIX_ENABLE_DEBUG
|
||||
/**
|
||||
* Main macro for use in sending debugging output to output streams;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
|
@ -211,6 +211,11 @@ static void reg_thread(int sd, short args, void *cbdata)
|
||||
int rc;
|
||||
opal_pmix120_etracker_t *trk;
|
||||
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s register complete with status %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
||||
cd->status);
|
||||
|
||||
/* convert the status */
|
||||
rc = pmix120_convert_rc(cd->status);
|
||||
|
||||
@ -251,6 +256,11 @@ static void pmix120_register_errhandler(opal_list_t *info,
|
||||
size_t n;
|
||||
opal_value_t *ival;
|
||||
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s REGISTER ERRHDNLR INFO %s",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
||||
(NULL == info) ? "NULL" : "NOT-NULL");
|
||||
|
||||
/* setup a caddy for the operation so we can free
|
||||
* the array when done */
|
||||
cd = OBJ_NEW(pmix120_opcaddy_t);
|
||||
@ -266,7 +276,8 @@ static void pmix120_register_errhandler(opal_list_t *info,
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
|
||||
(void)strncpy(cd->info[n].key, ival->key, PMIX_MAX_KEYLEN);
|
||||
pmix120_value_load(&cd->info[n].value, ival);
|
||||
cd->info[n].value.type = PMIX_INT;
|
||||
cd->info[n].value.data.status = pmix120_convert_opalrc(ival->data.integer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -248,8 +248,14 @@ opal_err2str(int errnum, const char **errmsg)
|
||||
case OPAL_ERR_SERVER_NOT_AVAIL:
|
||||
retval = "Server not available";
|
||||
break;
|
||||
case OPAL_ERR_IN_PROCESS:
|
||||
retval = "Operation in process";
|
||||
break;
|
||||
case OPAL_ERR_DEBUGGER_RELEASE:
|
||||
retval = "Release debugger";
|
||||
break;
|
||||
default:
|
||||
retval = NULL;
|
||||
retval = "UNRECOGNIZED";
|
||||
}
|
||||
|
||||
*errmsg = retval;
|
||||
|
@ -540,6 +540,7 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata)
|
||||
sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
|
||||
sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
sig->signature[0].vpid = ORTE_VPID_WILDCARD;
|
||||
sig->sz = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buffer))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buffer);
|
||||
@ -693,9 +694,6 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* need to init_after_spawn for debuggers */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS);
|
||||
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(caddy);
|
||||
}
|
||||
|
@ -110,8 +110,8 @@ BEGIN_C_DECLS
|
||||
/* show help */
|
||||
#define ORTE_RML_TAG_SHOW_HELP 36
|
||||
|
||||
/* debugger release */
|
||||
#define ORTE_RML_TAG_DEBUGGER_RELEASE 37
|
||||
/* error notifications */
|
||||
#define ORTE_RML_TAG_NOTIFICATION 37
|
||||
|
||||
/* bootstrap */
|
||||
#define ORTE_RML_TAG_BOOTSTRAP 38
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
@ -197,6 +197,7 @@ int pmix_server_init(void)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
OBJ_CONSTRUCT(&orte_pmix_server_globals.notifications, opal_list_t);
|
||||
|
||||
/* setup recv for direct modex requests */
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX,
|
||||
@ -214,6 +215,10 @@ int pmix_server_init(void)
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT,
|
||||
ORTE_RML_PERSISTENT, pmix_server_keyval_client, NULL);
|
||||
|
||||
/* setup recv for notifications */
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION,
|
||||
ORTE_RML_PERSISTENT, pmix_server_notify, NULL);
|
||||
|
||||
/* ensure the PMIx server uses the proper rendezvous directory */
|
||||
opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ);
|
||||
|
||||
@ -348,12 +353,16 @@ void pmix_server_finalize(void)
|
||||
/* stop receives */
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX_RESP);
|
||||
|
||||
/* cleanup collectives */
|
||||
OBJ_DESTRUCT(&orte_pmix_server_globals.reqs);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION);
|
||||
|
||||
/* shutdown the local server */
|
||||
opal_pmix.server_finalize();
|
||||
|
||||
/* cleanup collectives */
|
||||
OBJ_DESTRUCT(&orte_pmix_server_globals.reqs);
|
||||
OPAL_LIST_DESTRUCT(&orte_pmix_server_globals.notifications);
|
||||
}
|
||||
|
||||
static void send_error(int status, opal_process_name_t *idreq,
|
||||
@ -634,6 +643,7 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
|
||||
static void opcon(orte_pmix_server_op_caddy_t *p)
|
||||
{
|
||||
p->procs = NULL;
|
||||
p->eprocs = NULL;
|
||||
p->info = NULL;
|
||||
p->cbdata = NULL;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -111,24 +111,204 @@ int pmix_server_abort_fn(opal_process_name_t *proc, void *server_object,
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static void _register_events(int sd, short args, void *cbdata)
|
||||
{
|
||||
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
|
||||
opal_value_t *info;
|
||||
|
||||
/* the OPAL layer "owns" the list, but let's deconstruct it
|
||||
* here so we don't have to duplicate the data */
|
||||
while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) {
|
||||
/* don't worry about duplication as the underlying host
|
||||
* server is already protecting us from it */
|
||||
opal_list_append(&orte_pmix_server_globals.notifications, &info->super);
|
||||
}
|
||||
|
||||
if (NULL != cd->cbfunc) {
|
||||
cd->cbfunc(ORTE_SUCCESS, cd->cbdata);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
|
||||
/* hook for the local PMIX server to pass event registrations
|
||||
* up to us - we will assume the responsibility for providing
|
||||
* notifications for registered events */
|
||||
int pmix_server_register_events_fn(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
/* for now, just execute the cbfunc */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(OPAL_SUCCESS, cbdata);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
/* need to thread-shift this request as we are going
|
||||
* to access our global list of registered events */
|
||||
ORTE_PMIX_OPERATION(NULL, info, _register_events, cbfunc, cbdata);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void _deregister_events(int sd, short args, void *cbdata)
|
||||
{
|
||||
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
|
||||
opal_value_t *info, *iptr, *nptr;
|
||||
|
||||
/* the OPAL layer "owns" the list, but let's deconstruct it
|
||||
* here for consistency */
|
||||
while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) {
|
||||
/* search for matching requests */
|
||||
OPAL_LIST_FOREACH_SAFE(iptr, nptr, &orte_pmix_server_globals.notifications, opal_value_t) {
|
||||
if (OPAL_EQUAL == opal_dss.compare(iptr, info, OPAL_VALUE)) {
|
||||
opal_list_remove_item(&orte_pmix_server_globals.notifications, &iptr->super);
|
||||
OBJ_RELEASE(iptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(info);
|
||||
}
|
||||
|
||||
if (NULL != cd->cbfunc) {
|
||||
cd->cbfunc(ORTE_SUCCESS, cd->cbdata);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
/* hook for the local PMIX server to pass event deregistrations
|
||||
* up to us */
|
||||
int pmix_server_deregister_events_fn(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
/* for now, just execute the cbfunc */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(OPAL_SUCCESS, cbdata);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
/* need to thread-shift this request as we are going
|
||||
* to access our global list of registered events */
|
||||
ORTE_PMIX_OPERATION(NULL, info, _deregister_events, cbfunc, cbdata);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void _notify_release(int status, void *cbdata)
|
||||
{
|
||||
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
|
||||
|
||||
if (NULL != cd->procs) {
|
||||
OPAL_LIST_RELEASE(cd->procs);
|
||||
}
|
||||
if (NULL != cd->eprocs) {
|
||||
OPAL_LIST_RELEASE(cd->eprocs);
|
||||
}
|
||||
if (NULL != cd->info) {
|
||||
OPAL_LIST_RELEASE(cd->info);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
void pmix_server_notify(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tg, void *cbdata)
|
||||
{
|
||||
opal_list_t *procs = NULL, *eprocs = NULL, *info = NULL;
|
||||
int cnt, rc, ret, nprocs, n;
|
||||
opal_namelist_t *nm;
|
||||
opal_value_t *val;
|
||||
orte_pmix_server_op_caddy_t *cd;
|
||||
|
||||
opal_output_verbose(2, orte_pmix_server_globals.output,
|
||||
"%s Notification received",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* unpack the status */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* unpack the target procs that are to be notified */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any were provided, add them to the list */
|
||||
if (0 < nprocs) {
|
||||
procs = OBJ_NEW(opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
opal_list_append(procs, &nm->super);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nm->name, &cnt, OPAL_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the procs that were impacted by the error */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any were provided, add them to the list */
|
||||
if (0 < nprocs) {
|
||||
eprocs = OBJ_NEW(opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
opal_list_append(eprocs, &nm->super);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nm->name, &cnt, OPAL_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
OPAL_LIST_RELEASE(eprocs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the infos that were provided */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any were provided, add them to the list */
|
||||
if (0 < nprocs) {
|
||||
info = OBJ_NEW(opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
opal_list_append(info, &val->super);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &val, &cnt, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
if (NULL != eprocs) {
|
||||
OPAL_LIST_RELEASE(eprocs);
|
||||
}
|
||||
OPAL_LIST_RELEASE(info);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cd = OBJ_NEW(orte_pmix_server_op_caddy_t);
|
||||
cd->procs = procs;
|
||||
cd->eprocs = eprocs;
|
||||
cd->info = info;
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.server_notify_error(ret, procs, eprocs, info, _notify_release, cd))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
if (NULL != eprocs) {
|
||||
OPAL_LIST_RELEASE(eprocs);
|
||||
}
|
||||
if (NULL != info) {
|
||||
OPAL_LIST_RELEASE(info);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
}
|
||||
|
@ -75,6 +75,7 @@ typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
opal_list_t *procs;
|
||||
opal_list_t *eprocs;
|
||||
opal_list_t *info;
|
||||
opal_pmix_op_cbfunc_t cbfunc;
|
||||
void *cbdata;
|
||||
@ -175,6 +176,10 @@ extern void pmix_server_keyval_client(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tg, void *cbdata);
|
||||
|
||||
extern void pmix_server_notify(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tg, void *cbdata);
|
||||
|
||||
/* exposed shared variables */
|
||||
typedef struct {
|
||||
bool initialized;
|
||||
@ -186,6 +191,7 @@ typedef struct {
|
||||
char *server_uri;
|
||||
bool wait_for_server;
|
||||
orte_process_name_t server;
|
||||
opal_list_t notifications;
|
||||
} pmix_server_globals_t;
|
||||
|
||||
extern pmix_server_globals_t orte_pmix_server_globals;
|
||||
|
@ -2511,6 +2511,58 @@ static void setup_debugger_job(void)
|
||||
|
||||
static bool mpir_breakpoint_fired = false;
|
||||
|
||||
static void _send_notification(void)
|
||||
{
|
||||
opal_buffer_t buf;
|
||||
int status = OPAL_ERR_DEBUGGER_RELEASE;
|
||||
orte_grpcomm_signature_t sig;
|
||||
int rc;
|
||||
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
||||
/* pack the debugger_attached status */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
status = 0;
|
||||
|
||||
/* notify all procs */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* all procs are impacted */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* no further info to provide */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* xcast it to everyone */
|
||||
OBJ_CONSTRUCT(&sig, orte_grpcomm_signature_t);
|
||||
sig.signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
|
||||
sig.signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
sig.signature[0].vpid = ORTE_VPID_WILDCARD;
|
||||
sig.sz = 1;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(&sig, ORTE_RML_TAG_NOTIFICATION, &buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
OBJ_DESTRUCT(&sig);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialization of data structures for running under a debugger
|
||||
* using the MPICH/TotalView parallel debugger interface. This stage
|
||||
@ -2527,8 +2579,6 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
orte_proc_t *proc;
|
||||
orte_app_context_t *appctx;
|
||||
orte_vpid_t i, j;
|
||||
opal_buffer_t *buf;
|
||||
int rc, k;
|
||||
char **aliases, *aptr;
|
||||
|
||||
/* if we couldn't get thru the mapper stage, we might
|
||||
@ -2548,31 +2598,8 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
/* trigger the debugger */
|
||||
MPIR_Breakpoint();
|
||||
|
||||
/* send a message to rank=0 of any app jobs to release it */
|
||||
for (k=1; k < orte_job_data->size; k++) {
|
||||
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, k))) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
|
||||
/* ignore debugger jobs */
|
||||
continue;
|
||||
}
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 0)) ||
|
||||
ORTE_PROC_STATE_UNTERMINATED < proc->state ||
|
||||
NULL == proc->rml_uri) {
|
||||
/* proc is already dead or never registered with us (so we don't have
|
||||
* contact info for him)
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
buf = OBJ_NEW(opal_buffer_t); /* don't need anything in this */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&proc->name, buf,
|
||||
ORTE_RML_TAG_DEBUGGER_RELEASE,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
}
|
||||
/* notify all procs that the debugger is ready */
|
||||
_send_notification();
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2665,35 +2692,8 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
/* trigger the debugger */
|
||||
MPIR_Breakpoint();
|
||||
|
||||
/* send a message to rank=0 of any app jobs to release it */
|
||||
for (k=1; k < orte_job_data->size; k++) {
|
||||
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, k))) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
|
||||
/* ignore debugger jobs */
|
||||
continue;
|
||||
}
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 0)) ||
|
||||
ORTE_PROC_STATE_UNTERMINATED < proc->state ||
|
||||
NULL == proc->rml_uri) {
|
||||
/* proc is already dead or never registered with us (so we don't have
|
||||
* contact info for him)
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(2, orte_debug_output,
|
||||
"%s sending debugger release to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc->name));
|
||||
buf = OBJ_NEW(opal_buffer_t); /* don't need anything in this */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&proc->name, buf,
|
||||
ORTE_RML_TAG_DEBUGGER_RELEASE,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
}
|
||||
/* notify all procs that the debugger is ready */
|
||||
_send_notification();
|
||||
} else {
|
||||
/* if I am launching debugger daemons, then I need to do so now
|
||||
* that the job has been started and I know which nodes have
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user