Merge pull request #1358 from rhc54/topic/notification
Enable the PMIx notification callback system and fix debugger attach
Этот коммит содержится в:
Коммит
142e38cbb2
@ -91,7 +91,7 @@ OMPI_DECLSPEC void __opal_attribute_noreturn__
|
||||
#define OMPI_ERROR_LOG ORTE_ERROR_LOG
|
||||
|
||||
/* Init and finalize objects and operations */
|
||||
#define ompi_rte_init(a, b) orte_init(a, b, ORTE_PROC_MPI)
|
||||
OMPI_DECLSPEC int ompi_rte_init(int *pargc, char ***pargv);
|
||||
#define ompi_rte_finalize() orte_finalize()
|
||||
OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void);
|
||||
|
||||
|
@ -52,6 +52,79 @@
|
||||
|
||||
extern ompi_rte_orte_component_t mca_rte_orte_component;
|
||||
|
||||
typedef struct {
|
||||
volatile bool active;
|
||||
int status;
|
||||
int errhandler;
|
||||
} errhandler_t;
|
||||
|
||||
static void register_cbfunc(int status, int errhndler, void *cbdata)
|
||||
{
|
||||
errhandler_t *cd = (errhandler_t*)cbdata;
|
||||
cd->status = status;
|
||||
cd->errhandler = errhndler;
|
||||
cd->active = false;
|
||||
}
|
||||
|
||||
static volatile bool wait_for_release = true;
|
||||
static int errhandler = -1;
|
||||
|
||||
static void notify_cbfunc(int status,
|
||||
opal_list_t *procs,
|
||||
opal_list_t *info,
|
||||
opal_pmix_release_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(cbdata);
|
||||
}
|
||||
wait_for_release = false;
|
||||
}
|
||||
|
||||
|
||||
int ompi_rte_init(int *pargc, char ***pargv)
|
||||
{
|
||||
int rc;
|
||||
opal_list_t info;
|
||||
opal_value_t val;
|
||||
errhandler_t cd;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_init(pargc, pargv, ORTE_PROC_MPI))) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!orte_standalone_operation) {
|
||||
/* register to receive any debugger release */
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
OBJ_CONSTRUCT(&val, opal_value_t);
|
||||
val.key = strdup(OPAL_PMIX_ERROR_NAME);
|
||||
val.type = OPAL_INT;
|
||||
val.data.integer = OPAL_ERR_DEBUGGER_RELEASE;
|
||||
opal_list_append(&info, &val.super);
|
||||
cd.status = ORTE_ERROR;
|
||||
cd.errhandler = -1;
|
||||
cd.active = true;
|
||||
|
||||
opal_pmix.register_errhandler(&info, notify_cbfunc, register_cbfunc, &cd);
|
||||
|
||||
/* let the MPI progress engine run while we wait for
|
||||
* registration to complete */
|
||||
OMPI_WAIT_FOR_COMPLETION(cd.active);
|
||||
/* safely deconstruct the list */
|
||||
opal_list_remove_first(&info);
|
||||
OBJ_DESTRUCT(&val);
|
||||
OBJ_DESTRUCT(&info);
|
||||
if (OPAL_SUCCESS != cd.status) {
|
||||
/* ouch - we are doomed */
|
||||
ORTE_ERROR_LOG(cd.status);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
errhandler = cd.errhandler;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void ompi_rte_abort(int error_code, char *fmt, ...)
|
||||
{
|
||||
va_list arglist;
|
||||
@ -100,10 +173,10 @@ void ompi_rte_abort(int error_code, char *fmt, ...)
|
||||
* attaching debuggers -- see big comment in
|
||||
* orte/tools/orterun/debuggers.c explaining the two scenarios.
|
||||
*/
|
||||
|
||||
void ompi_rte_wait_for_debugger(void)
|
||||
{
|
||||
int debugger;
|
||||
orte_rml_recv_cb_t xfer;
|
||||
|
||||
/* See lengthy comment in orte/tools/orterun/debuggers.c about
|
||||
orte_in_parallel_debugger */
|
||||
@ -117,12 +190,12 @@ void ompi_rte_wait_for_debugger(void)
|
||||
/* if not, just return */
|
||||
return;
|
||||
}
|
||||
|
||||
/* if we are being debugged, then we need to find
|
||||
* the correct plug-ins
|
||||
*/
|
||||
ompi_debugger_setup_dlls();
|
||||
|
||||
/* wait for the debugger to attach */
|
||||
if (orte_standalone_operation) {
|
||||
/* spin until debugger attaches and releases us */
|
||||
while (MPIR_debug_gate == 0) {
|
||||
@ -133,23 +206,9 @@ void ompi_rte_wait_for_debugger(void)
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
/* only the rank=0 proc waits for either a message from the
|
||||
* HNP or for the debugger to attach - everyone else will just
|
||||
* spin in * the grpcomm barrier in ompi_mpi_init until rank=0
|
||||
* joins them.
|
||||
*/
|
||||
if (0 != ORTE_PROC_MY_NAME->vpid) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* VPID 0 waits for a message from the HNP */
|
||||
OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t);
|
||||
xfer.active = true;
|
||||
orte_rml.recv_buffer_nb(OMPI_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_DEBUGGER_RELEASE,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_rml_recv_callback, &xfer);
|
||||
/* let the MPI progress engine run while we wait */
|
||||
OMPI_WAIT_FOR_COMPLETION(xfer.active);
|
||||
/* now wait for the notification to occur */
|
||||
OMPI_WAIT_FOR_COMPLETION(wait_for_release);
|
||||
/* deregister the errhandler */
|
||||
opal_pmix.deregister_errhandler(errhandler, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
@ -10,9 +10,9 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -449,3 +449,12 @@ int opal_dss_compare_jobid(opal_jobid_t *value1,
|
||||
return OPAL_EQUAL;
|
||||
}
|
||||
|
||||
int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type)
|
||||
{
|
||||
if (*value1 > *value2) return OPAL_VALUE1_GREATER;
|
||||
|
||||
if (*value2 > *value1) return OPAL_VALUE2_GREATER;
|
||||
|
||||
return OPAL_EQUAL;
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -61,6 +61,7 @@ int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type)
|
||||
|
||||
case OPAL_INT:
|
||||
case OPAL_UINT:
|
||||
case OPAL_STATUS:
|
||||
datasize = sizeof(int);
|
||||
break;
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
@ -332,6 +332,9 @@ int opal_dss_pack_jobid(opal_buffer_t *buffer, const void *src,
|
||||
int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, opal_data_type_t type);
|
||||
|
||||
int opal_dss_pack_status(opal_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, opal_data_type_t type);
|
||||
|
||||
/*
|
||||
* Internal unpack functions
|
||||
*/
|
||||
@ -401,6 +404,8 @@ int opal_dss_unpack_jobid(opal_buffer_t *buffer, void *dest,
|
||||
int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, opal_data_type_t type);
|
||||
|
||||
int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, opal_data_type_t type);
|
||||
|
||||
/*
|
||||
* Internal copy functions
|
||||
@ -497,6 +502,8 @@ int opal_dss_compare_jobid(opal_jobid_t *value1,
|
||||
opal_jobid_t *value2,
|
||||
opal_data_type_t type);
|
||||
|
||||
int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type);
|
||||
|
||||
/*
|
||||
* Internal print functions
|
||||
*/
|
||||
@ -536,6 +543,7 @@ int opal_dss_print_time(char **output, char *prefix, time_t *src, opal_data_type
|
||||
int opal_dss_print_name(char **output, char *prefix, opal_process_name_t *name, opal_data_type_t type);
|
||||
int opal_dss_print_jobid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type);
|
||||
int opal_dss_print_vpid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type);
|
||||
int opal_dss_print_status(char **output, char *prefix, int *src, opal_data_type_t type);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -611,6 +611,17 @@ int opal_dss_open(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
tmp = OPAL_STATUS;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_status,
|
||||
opal_dss_unpack_status,
|
||||
(opal_dss_copy_fn_t)opal_dss_std_copy,
|
||||
(opal_dss_compare_fn_t)opal_dss_compare_status,
|
||||
(opal_dss_print_fn_t)opal_dss_print_status,
|
||||
OPAL_DSS_UNSTRUCTURED,
|
||||
"OPAL_STATUS", &tmp))) {
|
||||
return rc;
|
||||
}
|
||||
/* All done */
|
||||
|
||||
opal_dss_initialized = true;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -1240,3 +1240,20 @@ int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* STATUS
|
||||
*/
|
||||
int opal_dss_pack_status(opal_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, opal_data_type_t type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Turn around and pack the real type */
|
||||
ret = opal_dss_pack_buffer(buffer, src, num_vals, OPAL_INT);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
OPAL_ERROR_LOG(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -25,6 +25,7 @@
|
||||
#include "opal_stdint.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/dss/dss_internal.h"
|
||||
|
||||
int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type)
|
||||
@ -1060,3 +1061,29 @@ int opal_dss_print_vpid(char **output, char *prefix,
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int opal_dss_print_status(char **output, char *prefix,
|
||||
int *src, opal_data_type_t type)
|
||||
{
|
||||
char *prefx;
|
||||
|
||||
/* deal with NULL prefix */
|
||||
if (NULL == prefix) asprintf(&prefx, " ");
|
||||
else prefx = prefix;
|
||||
|
||||
/* if src is NULL, just print data type and return */
|
||||
if (NULL == src) {
|
||||
asprintf(output, "%sData type: OPAL_STATUS\tValue: NULL pointer", prefx);
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
asprintf(output, "%sData type: OPAL_STATUS\tValue: %s", prefx, opal_strerror(*src));
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -13,9 +13,9 @@
|
||||
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -120,6 +120,8 @@ typedef struct {
|
||||
#define OPAL_NAME (opal_data_type_t) 50
|
||||
#define OPAL_JOBID (opal_data_type_t) 51
|
||||
#define OPAL_VPID (opal_data_type_t) 52
|
||||
#define OPAL_STATUS (opal_data_type_t) 53
|
||||
|
||||
/* OPAL Dynamic */
|
||||
#define OPAL_DSS_ID_DYNAMIC (opal_data_type_t) 100
|
||||
|
||||
@ -245,6 +247,7 @@ typedef struct {
|
||||
float fval;
|
||||
double dval;
|
||||
struct timeval tv;
|
||||
int status;
|
||||
opal_process_name_t name;
|
||||
opal_bool_array_t flag_array;
|
||||
opal_uint8_array_t byte_array;
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -1519,3 +1519,20 @@ int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* STATUS
|
||||
*/
|
||||
int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, opal_data_type_t type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Turn around and unpack the real type */
|
||||
ret = opal_dss_unpack_buffer(buffer, dest, num_vals, OPAL_INT);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
OPAL_ERROR_LOG(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -80,7 +80,8 @@ enum {
|
||||
OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50),
|
||||
OPAL_ERR_COMM_FAILURE = (OPAL_ERR_BASE - 51),
|
||||
OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52),
|
||||
OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53)
|
||||
OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53),
|
||||
OPAL_ERR_DEBUGGER_RELEASE = (OPAL_ERR_BASE - 54)
|
||||
};
|
||||
|
||||
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)
|
||||
|
@ -42,6 +42,7 @@ mca_pmix_pmix112_la_CPPFLAGS = \
|
||||
-I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS)
|
||||
mca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS)
|
||||
mca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS)
|
||||
mca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_pmix_pmix112_la_SOURCES =$(sources)
|
||||
@ -49,3 +50,4 @@ libmca_pmix_pmix112_la_CFLAGS = $(opal_pmix_pmix112_CFLAGS)
|
||||
libmca_pmix_pmix112_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS)
|
||||
libmca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS)
|
||||
libmca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS)
|
||||
libmca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD)
|
||||
|
@ -431,6 +431,7 @@ int pmix1_server_notify_error(int status,
|
||||
op->cbdata = cbdata;
|
||||
|
||||
rc = pmix1_convert_opalrc(status);
|
||||
opal_output(0, "CALLING NOTIFY ERROR");
|
||||
rc = PMIx_Notify_error(rc, ps, psz, eps, esz,
|
||||
pinfo, sz, opcbfunc, op);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
@ -42,6 +42,7 @@ mca_pmix_pmix120_la_CPPFLAGS = \
|
||||
-I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS)
|
||||
mca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS)
|
||||
mca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS)
|
||||
mca_pmix_pmix120_la_DEPENDENCIES = $(mca_pmix_pmix120_la_LIBADD)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_pmix_pmix120_la_SOURCES =$(sources)
|
||||
@ -49,3 +50,4 @@ libmca_pmix_pmix120_la_CFLAGS = $(opal_pmix_pmix120_CFLAGS)
|
||||
libmca_pmix_pmix120_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS)
|
||||
libmca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS)
|
||||
libmca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS)
|
||||
libmca_pmix_pmix120_la_DEPENDENCIES = $(libmca_pmix_pmix120_la_LIBADD)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
@ -63,8 +63,18 @@ BEGIN_C_DECLS
|
||||
#define PMIX_MAX_NSLEN 255
|
||||
#define PMIX_MAX_KEYLEN 511
|
||||
|
||||
/* define a *wildcard* value for requests involving rank */
|
||||
#define PMIX_RANK_WILDCARD -1
|
||||
/* define a value for requests for job-level data
|
||||
* where the info itself isn't associated with any
|
||||
* specific rank, or when a request involves
|
||||
* a rank that isn't known - e.g., when someone requests
|
||||
* info thru one of the legacy interfaces where the rank
|
||||
* is typically encoded into the key itself since there is
|
||||
* no rank parameter in the API itself */
|
||||
#define PMIX_RANK_UNDEF INT32_MAX
|
||||
/* define a value to indicate that the user wants the
|
||||
* data for the given key from every rank that posted
|
||||
* that key */
|
||||
#define PMIX_RANK_WILDCARD INT32_MAX-1
|
||||
|
||||
/* define a set of "standard" PMIx attributes that can
|
||||
* be queried. Implementations (and users) are free to extend as
|
||||
@ -163,7 +173,7 @@ BEGIN_C_DECLS
|
||||
|
||||
/* error handler registration and notification info keys */
|
||||
#define PMIX_ERROR_NAME "pmix.errname" // enum pmix_status_t specific error to be notified
|
||||
#define PMIX_ERROR_GROUP_COMM "pmix.errgroup.comm" // bool - set true to get comm errors notification
|
||||
#define PMIX_ERROR_GROUP_COMM "pmix.errgroup.comm" // bool - set true to get comm errors notification
|
||||
#define PMIX_ERROR_GROUP_ABORT "pmix.errgroup.abort" // bool -set true to get abort errors notification
|
||||
#define PMIX_ERROR_GROUP_MIGRATE "pmix.errgroup.migrate" // bool -set true to get migrate errors notification
|
||||
#define PMIX_ERROR_GROUP_RESOURCE "pmix.errgroup.resource" // bool -set true to get resource errors notification
|
||||
@ -199,7 +209,7 @@ BEGIN_C_DECLS
|
||||
|
||||
/**** PMIX ERROR CONSTANTS ****/
|
||||
/* PMIx errors are always negative, with 0 reserved for success */
|
||||
#define PMIX_ERROR_MIN -50 // set equal to number of non-zero entries in enum
|
||||
#define PMIX_ERROR_MIN -52 // set equal to number of non-zero entries in enum
|
||||
|
||||
typedef enum {
|
||||
PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER = PMIX_ERROR_MIN,
|
||||
@ -257,6 +267,8 @@ typedef enum {
|
||||
PMIX_ERR_SILENT,
|
||||
PMIX_ERROR,
|
||||
|
||||
PMIX_ERR_GRP_FOUND,
|
||||
PMIX_ERR_DFLT_FOUND,
|
||||
PMIX_SUCCESS
|
||||
} pmix_status_t;
|
||||
|
||||
@ -288,6 +300,9 @@ typedef enum {
|
||||
PMIX_TIMEVAL,
|
||||
PMIX_TIME,
|
||||
|
||||
PMIX_STATUS, // needs to be tracked separately from integer for those times
|
||||
// when we are embedded and it needs to be converted to the
|
||||
// host error definitions
|
||||
PMIX_HWLOC_TOPO,
|
||||
PMIX_VALUE,
|
||||
PMIX_INFO_ARRAY,
|
||||
@ -411,6 +426,7 @@ typedef struct {
|
||||
float fval;
|
||||
double dval;
|
||||
struct timeval tv;
|
||||
pmix_status_t status;
|
||||
pmix_info_array_t array;
|
||||
pmix_byte_object_t bo;
|
||||
} data;
|
||||
@ -494,6 +510,7 @@ extern void pmix_value_load(pmix_value_t *v, void *data,
|
||||
/**** PMIX INFO STRUCT ****/
|
||||
typedef struct {
|
||||
char key[PMIX_MAX_KEYLEN+1]; // ensure room for the NULL terminator
|
||||
bool required; // defaults to optional (i.e., required=false)
|
||||
pmix_value_t value;
|
||||
} pmix_info_t;
|
||||
|
||||
@ -531,6 +548,10 @@ typedef struct {
|
||||
(void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \
|
||||
pmix_value_load(&((m)->value), (v), (t)); \
|
||||
} while(0);
|
||||
#define PMIX_INFO_REQUIRED(m) \
|
||||
(m)->required = true;
|
||||
#define PMIX_INFO_OPTIONAL(m) \
|
||||
(m)->required = false;
|
||||
|
||||
|
||||
/**** PMIX LOOKUP RETURN STRUCT ****/
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -136,6 +136,10 @@ int pmix_bfrop_std_copy(void **dest, void *src, pmix_data_type_t type)
|
||||
datasize = sizeof(time_t);
|
||||
break;
|
||||
|
||||
case PMIX_STATUS:
|
||||
datasize = sizeof(pmix_status_t);
|
||||
break;
|
||||
|
||||
default:
|
||||
return PMIX_ERR_UNKNOWN_DATA_TYPE;
|
||||
}
|
||||
@ -166,7 +170,7 @@ int pmix_bfrop_copy_string(char **dest, char *src, pmix_data_type_t type)
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
/* compare function for pmix_value_t*/
|
||||
/* compare function for pmix_value_t */
|
||||
bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1)
|
||||
{
|
||||
bool rc = false;
|
||||
@ -213,6 +217,9 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1)
|
||||
case PMIX_STRING:
|
||||
rc = strcmp(p->data.string, p1->data.string);
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
rc = (p->data.status == p1->data.status);
|
||||
break;
|
||||
default:
|
||||
pmix_output(0, "COMPARE-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type);
|
||||
}
|
||||
@ -293,6 +300,9 @@ pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src)
|
||||
p->data.tv.tv_sec = src->data.tv.tv_sec;
|
||||
p->data.tv.tv_usec = src->data.tv.tv_usec;
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t));
|
||||
break;
|
||||
case PMIX_INFO_ARRAY:
|
||||
p->data.array.size = src->data.array.size;
|
||||
if (0 < src->data.array.size) {
|
||||
@ -343,6 +353,7 @@ int pmix_bfrop_copy_info(pmix_info_t **dest, pmix_info_t *src,
|
||||
{
|
||||
*dest = (pmix_info_t*)malloc(sizeof(pmix_info_t));
|
||||
(void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN);
|
||||
(*dest)->required = src->required;
|
||||
return pmix_value_xfer(&(*dest)->value, &src->value);
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -275,6 +275,8 @@ int pmix_bfrop_pack_timeval(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
int pmix_bfrop_pack_topo(pmix_buffer_t *buffer, const void *src,
|
||||
@ -337,6 +339,8 @@ int pmix_bfrop_unpack_timeval(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type);
|
||||
int pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
int pmix_bfrop_unpack_topo(pmix_buffer_t *buffer, void *dest,
|
||||
@ -427,6 +431,7 @@ int pmix_bfrop_print_double(char **output, char *prefix, double *src, pmix_data_
|
||||
|
||||
int pmix_bfrop_print_timeval(char **output, char *prefix, struct timeval *src, pmix_data_type_t type);
|
||||
int pmix_bfrop_print_time(char **output, char *prefix, time_t *src, pmix_data_type_t type);
|
||||
int pmix_bfrop_print_status(char **output, char *prefix, pmix_status_t *src, pmix_data_type_t type);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
int pmix_bfrop_print_topo(char **output, char *prefix,
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -289,6 +289,12 @@ pmix_status_t pmix_bfrop_open(void)
|
||||
pmix_bfrop_std_copy,
|
||||
pmix_bfrop_print_time);
|
||||
|
||||
PMIX_REGISTER_TYPE("PMIX_STATUS", PMIX_STATUS,
|
||||
pmix_bfrop_pack_status,
|
||||
pmix_bfrop_unpack_status,
|
||||
pmix_bfrop_std_copy,
|
||||
pmix_bfrop_print_status);
|
||||
|
||||
#if PMIX_HAVE_HWLOC
|
||||
PMIX_REGISTER_TYPE("PMIX_HWLOC_TOPO", PMIX_HWLOC_TOPO,
|
||||
pmix_bfrop_pack_topo,
|
||||
@ -395,6 +401,8 @@ pmix_status_t pmix_bfrop_close(void)
|
||||
void pmix_value_load(pmix_value_t *v, void *data,
|
||||
pmix_data_type_t type)
|
||||
{
|
||||
pmix_byte_object_t *bo;
|
||||
|
||||
v->type = type;
|
||||
if (NULL == data) {
|
||||
/* just set the fields to zero */
|
||||
@ -457,9 +465,13 @@ void pmix_value_load(pmix_value_t *v, void *data,
|
||||
case PMIX_TIMEVAL:
|
||||
memcpy(&(v->data.tv), data, sizeof(struct timeval));
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
memcpy(&(v->data.status), data, sizeof(pmix_status_t));
|
||||
break;
|
||||
case PMIX_BYTE_OBJECT:
|
||||
v->data.bo.bytes = data;
|
||||
memcpy(&(v->data.bo.size), data, sizeof(size_t));
|
||||
bo = (pmix_byte_object_t*)data;
|
||||
v->data.bo.bytes = bo->bytes;
|
||||
memcpy(&(v->data.bo.size), &bo->size, sizeof(size_t));
|
||||
break;
|
||||
case PMIX_TIME:
|
||||
case PMIX_HWLOC_TOPO:
|
||||
@ -569,6 +581,10 @@ pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data,
|
||||
memcpy(*data, &(kv->data.tv), sizeof(struct timeval));
|
||||
*sz = sizeof(struct timeval);
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
memcpy(*data, &(kv->data.status), sizeof(pmix_status_t));
|
||||
*sz = sizeof(pmix_status_t);
|
||||
break;
|
||||
case PMIX_BYTE_OBJECT:
|
||||
if (NULL != kv->data.bo.bytes && 0 < kv->data.bo.size) {
|
||||
*data = kv->data.bo.bytes;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
@ -406,6 +406,26 @@ int pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src,
|
||||
}
|
||||
|
||||
|
||||
/* STATUS */
|
||||
int pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src,
|
||||
int32_t num_vals, pmix_data_type_t type)
|
||||
{
|
||||
int ret = PMIX_SUCCESS;
|
||||
int32_t i;
|
||||
pmix_status_t *ssrc = (pmix_status_t *)src;
|
||||
int32_t status;
|
||||
|
||||
for (i = 0; i < num_vals; ++i) {
|
||||
status = (int32_t)ssrc[i];
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &status, 1, PMIX_INT32))) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/* PACK FUNCTIONS FOR GENERIC PMIX TYPES */
|
||||
static int pack_val(pmix_buffer_t *buffer,
|
||||
pmix_value_t *p)
|
||||
@ -503,6 +523,11 @@ static int pack_val(pmix_buffer_t *buffer,
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.status, 1, PMIX_STATUS))) {
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case PMIX_INFO_ARRAY:
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.array, 1, PMIX_INFO_ARRAY))) {
|
||||
return ret;
|
||||
@ -563,6 +588,10 @@ int pmix_bfrop_pack_info(pmix_buffer_t *buffer, const void *src,
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &foo, 1, PMIX_STRING))) {
|
||||
return ret;
|
||||
}
|
||||
/* pack required flag */
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_bool(buffer, &info[i].required, 1, PMIX_BOOL))) {
|
||||
return ret;
|
||||
}
|
||||
/* pack the type */
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &info[i].value.type, 1, PMIX_INT))) {
|
||||
return ret;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,6 +27,7 @@
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#include "src/util/error.h"
|
||||
#include "src/buffer_ops/internal.h"
|
||||
|
||||
int pmix_bfrop_print(char **output, char *prefix, void *src, pmix_data_type_t type)
|
||||
@ -540,6 +541,32 @@ int pmix_bfrop_print_timeval(char **output, char *prefix,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
int pmix_bfrop_print_status(char **output, char *prefix,
|
||||
pmix_status_t *src, pmix_data_type_t type)
|
||||
{
|
||||
char *prefx;
|
||||
|
||||
/* deal with NULL prefix */
|
||||
if (NULL == prefix) asprintf(&prefx, " ");
|
||||
else prefx = prefix;
|
||||
|
||||
/* if src is NULL, just print data type and return */
|
||||
if (NULL == src) {
|
||||
asprintf(output, "%sData type: PMIX_STATUS\tValue: NULL pointer", prefx);
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
asprintf(output, "%sData type: PMIX_STATUS\tValue: %s", prefx, PMIx_Error_string(*src));
|
||||
if (prefx != prefix) {
|
||||
free(prefx);
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
/* PRINT FUNCTIONS FOR GENERIC PMIX TYPES */
|
||||
|
||||
/*
|
||||
@ -632,6 +659,10 @@ int pmix_bfrop_print_value(char **output, char *prefix,
|
||||
asprintf(output, "%sPMIX_VALUE: Data type: PMIX_TIMEVAL\tValue: %ld.%06ld", prefx,
|
||||
(long)src->data.tv.tv_sec, (long)src->data.tv.tv_usec);
|
||||
break;
|
||||
case PMIX_STATUS:
|
||||
asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STATUS\tValue: %s", prefx,
|
||||
PMIx_Error_string(src->data.status));
|
||||
break;
|
||||
default:
|
||||
asprintf(output, "%sPMIX_VALUE: Data type: UNKNOWN\tValue: UNPRINTABLE", prefx);
|
||||
break;
|
||||
@ -648,8 +679,8 @@ int pmix_bfrop_print_info(char **output, char *prefix,
|
||||
char *tmp;
|
||||
|
||||
pmix_bfrop_print_value(&tmp, NULL, &src->value, PMIX_VALUE);
|
||||
asprintf(output, "%sKEY: %s %s", prefix, src->key,
|
||||
(NULL == tmp) ? "NULL" : tmp);
|
||||
asprintf(output, "%sKEY: %s REQD: %s %s", prefix, src->key,
|
||||
src->required ? "Y" : "N", (NULL == tmp) ? "PMIX_VALUE: NULL" : tmp);
|
||||
if (NULL != tmp) {
|
||||
free(tmp);
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
@ -500,6 +500,20 @@ int pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest,
|
||||
}
|
||||
|
||||
|
||||
int pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest,
|
||||
int32_t *num_vals, pmix_data_type_t type)
|
||||
{
|
||||
pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_status * %d\n", (int)*num_vals);
|
||||
/* check to see if there's enough data in buffer */
|
||||
if (pmix_bfrop_too_small(buffer, (*num_vals)*(sizeof(pmix_status_t)))) {
|
||||
return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER;
|
||||
}
|
||||
|
||||
/* unpack the data */
|
||||
return pmix_bfrop_unpack_int32(buffer, dest, num_vals, PMIX_INT32);
|
||||
}
|
||||
|
||||
|
||||
/* UNPACK FUNCTIONS FOR GENERIC PMIX TYPES */
|
||||
|
||||
/*
|
||||
@ -672,6 +686,11 @@ int pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest,
|
||||
}
|
||||
(void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN);
|
||||
free(tmp);
|
||||
/* unpack the required flag */
|
||||
m=1;
|
||||
if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_bool(buffer, &ptr[i].required, &m, PMIX_BOOL))) {
|
||||
return ret;
|
||||
}
|
||||
/* unpack value - since the value structure is statically-defined
|
||||
* instead of a pointer in this struct, we directly unpack it to
|
||||
* avoid the malloc */
|
||||
|
@ -10,7 +10,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
# Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -26,10 +26,14 @@ headers += \
|
||||
src/class/pmix_object.h \
|
||||
src/class/pmix_list.h \
|
||||
src/class/pmix_pointer_array.h \
|
||||
src/class/pmix_hash_table.h
|
||||
src/class/pmix_hash_table.h \
|
||||
src/class/pmix_hotel.h \
|
||||
src/class/pmix_ring_buffer.h
|
||||
|
||||
sources += \
|
||||
src/class/pmix_object.c \
|
||||
src/class/pmix_list.c \
|
||||
src/class/pmix_pointer_array.c \
|
||||
src/class/pmix_hash_table.c
|
||||
src/class/pmix_hash_table.c \
|
||||
src/class/pmix_hotel.c \
|
||||
src/class/pmix_ring_buffer.c
|
||||
|
136
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.c
Обычный файл
136
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.c
Обычный файл
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include PMIX_EVENT_HEADER
|
||||
#include "src/class/pmix_hotel.h"
|
||||
|
||||
|
||||
static void local_eviction_callback(int fd, short flags, void *arg)
|
||||
{
|
||||
pmix_hotel_room_eviction_callback_arg_t *eargs =
|
||||
(pmix_hotel_room_eviction_callback_arg_t*) arg;
|
||||
void *occupant = eargs->hotel->rooms[eargs->room_num].occupant;
|
||||
|
||||
/* Remove the occurpant from the room.
|
||||
|
||||
Do not change this logic without also changing the same logic
|
||||
in pmix_hotel_checkout() and
|
||||
pmix_hotel_checkout_and_return_occupant(). */
|
||||
pmix_hotel_t *hotel = eargs->hotel;
|
||||
pmix_hotel_room_t *room = &(hotel->rooms[eargs->room_num]);
|
||||
room->occupant = NULL;
|
||||
hotel->last_unoccupied_room++;
|
||||
assert(hotel->last_unoccupied_room < hotel->num_rooms);
|
||||
hotel->unoccupied_rooms[hotel->last_unoccupied_room] = eargs->room_num;
|
||||
|
||||
/* Invoke the user callback to tell them that they were evicted */
|
||||
hotel->evict_callback_fn(hotel,
|
||||
eargs->room_num,
|
||||
occupant);
|
||||
}
|
||||
|
||||
|
||||
int pmix_hotel_init(pmix_hotel_t *h, int num_rooms,
|
||||
pmix_event_base_t *evbase,
|
||||
uint32_t eviction_timeout,
|
||||
int eviction_event_priority,
|
||||
pmix_hotel_eviction_callback_fn_t evict_callback_fn)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Bozo check */
|
||||
if (num_rooms <= 0 ||
|
||||
NULL == evict_callback_fn) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
h->num_rooms = num_rooms;
|
||||
h->evbase = evbase;
|
||||
h->eviction_timeout.tv_usec = eviction_timeout % 1000000;
|
||||
h->eviction_timeout.tv_sec = eviction_timeout / 1000000;
|
||||
h->evict_callback_fn = evict_callback_fn;
|
||||
h->rooms = (pmix_hotel_room_t*)malloc(num_rooms * sizeof(pmix_hotel_room_t));
|
||||
if (NULL != evict_callback_fn) {
|
||||
h->eviction_args =
|
||||
(pmix_hotel_room_eviction_callback_arg_t*)malloc(num_rooms * sizeof(pmix_hotel_room_eviction_callback_arg_t));
|
||||
}
|
||||
h->unoccupied_rooms = (int*) malloc(num_rooms * sizeof(int));
|
||||
h->last_unoccupied_room = num_rooms - 1;
|
||||
|
||||
for (i = 0; i < num_rooms; ++i) {
|
||||
/* Mark this room as unoccupied */
|
||||
h->rooms[i].occupant = NULL;
|
||||
|
||||
/* Setup this room in the unoccupied index array */
|
||||
h->unoccupied_rooms[i] = i;
|
||||
|
||||
/* Setup the eviction callback args */
|
||||
h->eviction_args[i].hotel = h;
|
||||
h->eviction_args[i].room_num = i;
|
||||
|
||||
/* Create this room's event (but don't add it) */
|
||||
if (NULL != h->evbase) {
|
||||
event_assign(&(h->rooms[i].eviction_timer_event),
|
||||
h->evbase,
|
||||
-1, 0, local_eviction_callback,
|
||||
&(h->eviction_args[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void constructor(pmix_hotel_t *h)
|
||||
{
|
||||
h->num_rooms = 0;
|
||||
h->evbase = NULL;
|
||||
h->eviction_timeout.tv_sec = 0;
|
||||
h->eviction_timeout.tv_usec = 0;
|
||||
h->evict_callback_fn = NULL;
|
||||
h->rooms = NULL;
|
||||
h->eviction_args = NULL;
|
||||
h->unoccupied_rooms = NULL;
|
||||
h->last_unoccupied_room = -1;
|
||||
}
|
||||
|
||||
static void destructor(pmix_hotel_t *h)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Go through all occupied rooms and destroy their events */
|
||||
if (NULL != h->evbase) {
|
||||
for (i = 0; i < h->num_rooms; ++i) {
|
||||
if (NULL != h->rooms[i].occupant) {
|
||||
event_del(&(h->rooms[i].eviction_timer_event));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != h->rooms) {
|
||||
free(h->rooms);
|
||||
}
|
||||
if (NULL != h->eviction_args) {
|
||||
free(h->eviction_args);
|
||||
}
|
||||
if (NULL != h->unoccupied_rooms) {
|
||||
free(h->unoccupied_rooms);
|
||||
}
|
||||
}
|
||||
|
||||
PMIX_CLASS_INSTANCE(pmix_hotel_t,
|
||||
pmix_object_t,
|
||||
constructor,
|
||||
destructor);
|
354
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.h
Обычный файл
354
opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.h
Обычный файл
@ -0,0 +1,354 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/** @file
|
||||
*
|
||||
* This file provides a "hotel" class:
|
||||
*
|
||||
* - A hotel has a fixed number of rooms (i.e., storage slots)
|
||||
* - An arbitrary data pointer can check into an empty room at any time
|
||||
* - The occupant of a room can check out at any time
|
||||
* - Optionally, the occupant of a room can be forcibly evicted at a
|
||||
* given time (i.e., when an pmix timer event expires).
|
||||
* - The hotel has finite occupancy; if you try to checkin a new
|
||||
* occupant and the hotel is already full, it will gracefully fail
|
||||
* to checkin.
|
||||
*
|
||||
* One use case for this class is for ACK-based network retransmission
|
||||
* schemes (NACK-based retransmission schemes probably can use
|
||||
* pmix_ring_buffer).
|
||||
*
|
||||
* For ACK-based retransmission schemes, a hotel might be used
|
||||
* something like this:
|
||||
*
|
||||
* - when a message is sent, check it in to a hotel with a timer
|
||||
* - if an ACK is received, check it out of the hotel (which also cancels
|
||||
* the timer)
|
||||
* - if an ACK isn't received in time, the timer will expire and the
|
||||
* upper layer will get a callback with the message
|
||||
* - if an ACK is received late (i.e., after its timer has expired),
|
||||
* then checkout will gracefully fail
|
||||
*
|
||||
* Note that this class intentionally provides pretty minimal
|
||||
* functionality. It is intended to be used in performance-critical
|
||||
* code paths -- extra functionality would simply add latency.
|
||||
*
|
||||
* There is an pmix_hotel_init() function to create a hotel, but no
|
||||
* corresponding finalize; the destructor will handle all finalization
|
||||
* issues. Note that when a hotel is destroyed, it will delete all
|
||||
* pending events from the event base (i.e., all pending eviction
|
||||
* callbacks); no further eviction callbacks will be invoked.
|
||||
*/
|
||||
|
||||
#ifndef PMIX_HOTEL_H
|
||||
#define PMIX_HOTEL_H
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
#include "private/types.h"
|
||||
#include "private/prefetch.h"
|
||||
#include "pmix/pmix_common.h"
|
||||
#include "src/class/pmix_object.h"
|
||||
#include PMIX_EVENT_HEADER
|
||||
#include <pmix/rename.h>
|
||||
#include "src/util/output.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct pmix_hotel_t;
|
||||
|
||||
/* User-supplied function to be invoked when an occupant is evicted. */
|
||||
typedef void (*pmix_hotel_eviction_callback_fn_t)(struct pmix_hotel_t *hotel,
|
||||
int room_num,
|
||||
void *occupant);
|
||||
|
||||
/* Note that this is an internal data structure; it is not part of the
|
||||
public pmix_hotel interface. Public consumers of pmix_hotel
|
||||
shouldn't need to use this struct at all (we only have it here in
|
||||
this .h file because some functions are inlined for speed, and need
|
||||
to get to the internals of this struct).
|
||||
|
||||
The room struct should be as small as possible to be cache
|
||||
friendly. Specifically: it would be great if multiple rooms could
|
||||
fit in a single cache line because we'll always allocate a
|
||||
contiguous set of rooms in an array. */
|
||||
typedef struct {
|
||||
void *occupant;
|
||||
pmix_event_t eviction_timer_event;
|
||||
} pmix_hotel_room_t;
|
||||
|
||||
/* Note that this is an internal data structure; it is not part of the
|
||||
public pmix_hotel interface. Public consumers of pmix_hotel
|
||||
shouldn't need to use this struct at all (we only have it here in
|
||||
this .h file because some functions are inlined for speed, and need
|
||||
to get to the internals of this struct).
|
||||
|
||||
Use a unique struct for holding the arguments for eviction
|
||||
callbacks. We *could* make the to-be-evicted pmix_hotel_room_t
|
||||
instance as the argument, but we don't, for 2 reasons:
|
||||
|
||||
1. We want as many pmix_hotel_room_t's to fit in a cache line as
|
||||
possible (i.e., to be as cache-friendly as possible). The
|
||||
common/fast code path only needs to access the data in the
|
||||
pmix_hotel_room_t (and not the callback argument data).
|
||||
|
||||
2. Evictions will be uncommon, so we don't mind penalizing them a
|
||||
bit by making the data be in a separate cache line.
|
||||
*/
|
||||
typedef struct {
|
||||
struct pmix_hotel_t *hotel;
|
||||
int room_num;
|
||||
} pmix_hotel_room_eviction_callback_arg_t;
|
||||
|
||||
typedef struct pmix_hotel_t {
|
||||
/* make this an object */
|
||||
pmix_object_t super;
|
||||
|
||||
/* Max number of rooms in the hotel */
|
||||
int num_rooms;
|
||||
|
||||
/* event base to be used for eviction timeout */
|
||||
pmix_event_base_t *evbase;
|
||||
struct timeval eviction_timeout;
|
||||
pmix_hotel_eviction_callback_fn_t evict_callback_fn;
|
||||
|
||||
/* All rooms in this hotel */
|
||||
pmix_hotel_room_t *rooms;
|
||||
|
||||
/* Separate array for all the eviction callback arguments (see
|
||||
rationale above for why this is a separate array) */
|
||||
pmix_hotel_room_eviction_callback_arg_t *eviction_args;
|
||||
|
||||
/* All currently unoccupied rooms in this hotel (not necessarily
|
||||
in any particular order) */
|
||||
int *unoccupied_rooms;
|
||||
int last_unoccupied_room;
|
||||
} pmix_hotel_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_hotel_t);
|
||||
|
||||
/**
|
||||
* Initialize the hotel.
|
||||
*
|
||||
* @param hotel Pointer to a hotel (IN)
|
||||
* @param num_rooms The total number of rooms in the hotel (IN)
|
||||
* @param evbase Pointer to event base used for eviction timeout
|
||||
* @param eviction_timeout Max length of a stay at the hotel before
|
||||
* the eviction callback is invoked (in microseconds)
|
||||
* @param eviction_event_priority Event lib priority for the eviction timeout
|
||||
* @param evict_callback_fn Callback function invoked if an occupant
|
||||
* does not check out before the eviction_timeout.
|
||||
*
|
||||
* NOTE: If the callback function is NULL, then no eviction timer
|
||||
* will be set - occupants will remain checked into the hotel until
|
||||
* explicitly checked out.
|
||||
*
|
||||
* Also note: the eviction_callback_fn should absolutely not call any
|
||||
* of the hotel checkout functions. Specifically: the occupant has
|
||||
* already been ("forcibly") checked out *before* the
|
||||
* eviction_callback_fn is invoked.
|
||||
*
|
||||
* @return PMIX_SUCCESS if all initializations were succesful. Otherwise,
|
||||
* the error indicate what went wrong in the function.
|
||||
*/
|
||||
PMIX_DECLSPEC int pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms,
|
||||
pmix_event_base_t *evbase,
|
||||
uint32_t eviction_timeout,
|
||||
int eviction_event_priority,
|
||||
pmix_hotel_eviction_callback_fn_t evict_callback_fn);
|
||||
|
||||
/**
|
||||
* Check in an occupant to the hotel.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param occupant Occupant to check in (opaque to the hotel) (IN)
|
||||
* @param room The room number that identifies this occupant in the
|
||||
* hotel (OUT).
|
||||
*
|
||||
* If there is room in the hotel, the occupant is checked in and the
|
||||
* timer for that occupant is started. The occupant's room is
|
||||
* returned in the "room" param.
|
||||
*
|
||||
* Note that once a room's checkout_expire timer expires, the occupant
|
||||
* is forcibly checked out, and then the eviction callback is invoked.
|
||||
*
|
||||
* @return PMIX_SUCCESS if the occupant is successfully checked in,
|
||||
* and the room parameter will contain a valid value.
|
||||
* @return PMIX_ERR_TEMP_OUT_OF_RESOURCE is the hotel is full. Try
|
||||
* again later.
|
||||
*/
|
||||
static inline int pmix_hotel_checkin(pmix_hotel_t *hotel,
|
||||
void *occupant,
|
||||
int *room_num)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Do we have any rooms available? */
|
||||
if (PMIX_UNLIKELY(hotel->last_unoccupied_room < 0)) {
|
||||
return PMIX_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Put this occupant into the first empty room that we have */
|
||||
*room_num = hotel->unoccupied_rooms[hotel->last_unoccupied_room--];
|
||||
room = &(hotel->rooms[*room_num]);
|
||||
room->occupant = occupant;
|
||||
|
||||
/* Assign the event and make it pending */
|
||||
if (NULL != hotel->evbase) {
|
||||
event_add(&(room->eviction_timer_event),
|
||||
&(hotel->eviction_timeout));
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as pmix_hotel_checkin(), but slightly optimized for when the
|
||||
* caller *knows* that there is a room available.
|
||||
*/
|
||||
static inline void pmix_hotel_checkin_with_res(pmix_hotel_t *hotel,
|
||||
void *occupant,
|
||||
int *room_num)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Put this occupant into the first empty room that we have */
|
||||
*room_num = hotel->unoccupied_rooms[hotel->last_unoccupied_room--];
|
||||
room = &(hotel->rooms[*room_num]);
|
||||
assert(room->occupant == NULL);
|
||||
room->occupant = occupant;
|
||||
|
||||
/* Assign the event and make it pending */
|
||||
if (NULL != hotel->evbase) {
|
||||
event_add(&(room->eviction_timer_event),
|
||||
&(hotel->eviction_timeout));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the specified occupant out of the hotel.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param room Room number to checkout (IN)
|
||||
*
|
||||
* If there is an occupant in the room, their timer is canceled and
|
||||
* they are checked out.
|
||||
*
|
||||
* Nothing is returned (as a minor optimization).
|
||||
*/
|
||||
static inline void pmix_hotel_checkout(pmix_hotel_t *hotel, int room_num)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Bozo check */
|
||||
assert(room_num < hotel->num_rooms);
|
||||
|
||||
/* If there's an occupant in the room, check them out */
|
||||
room = &(hotel->rooms[room_num]);
|
||||
if (PMIX_LIKELY(NULL != room->occupant)) {
|
||||
/* Do not change this logic without also changing the same
|
||||
logic in pmix_hotel_checkout_and_return_occupant() and
|
||||
pmix_hotel.c:local_eviction_callback(). */
|
||||
room->occupant = NULL;
|
||||
if (NULL != hotel->evbase) {
|
||||
event_del(&(room->eviction_timer_event));
|
||||
}
|
||||
hotel->last_unoccupied_room++;
|
||||
assert(hotel->last_unoccupied_room < hotel->num_rooms);
|
||||
hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num;
|
||||
}
|
||||
|
||||
/* Don't bother returning whether we actually checked someone out
|
||||
or not (because this is in the critical performance path) --
|
||||
assume the upper layer knows what it's doing. */
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the specified occupant out of the hotel and return the occupant.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param room Room number to checkout (IN)
|
||||
* @param void * occupant (OUT)
|
||||
* If there is an occupant in the room, their timer is canceled and
|
||||
* they are checked out.
|
||||
*
|
||||
* Use this checkout and when caller needs the occupant
|
||||
*/
|
||||
static inline void pmix_hotel_checkout_and_return_occupant(pmix_hotel_t *hotel, int room_num, void **occupant)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Bozo check */
|
||||
assert(room_num < hotel->num_rooms);
|
||||
|
||||
/* If there's an occupant in the room, check them out */
|
||||
room = &(hotel->rooms[room_num]);
|
||||
if (PMIX_LIKELY(NULL != room->occupant)) {
|
||||
pmix_output (10, "checking out occupant %p from room num %d", room->occupant, room_num);
|
||||
/* Do not change this logic without also changing the same
|
||||
logic in pmix_hotel_checkout() and
|
||||
pmix_hotel.c:local_eviction_callback(). */
|
||||
*occupant = room->occupant;
|
||||
room->occupant = NULL;
|
||||
if (NULL != hotel->evbase) {
|
||||
event_del(&(room->eviction_timer_event));
|
||||
}
|
||||
hotel->last_unoccupied_room++;
|
||||
assert(hotel->last_unoccupied_room < hotel->num_rooms);
|
||||
hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num;
|
||||
}
|
||||
else {
|
||||
*occupant = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the hotel is empty (no occupant)
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @return bool true if empty false if there is a occupant(s)
|
||||
*
|
||||
*/
|
||||
static inline bool pmix_hotel_is_empty (pmix_hotel_t *hotel)
|
||||
{
|
||||
if (hotel->last_unoccupied_room == hotel->num_rooms - 1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Access the occupant of a room, but leave them checked into their room.
|
||||
*
|
||||
* @param hotel Pointer to hotel (IN)
|
||||
* @param room Room number to checkout (IN)
|
||||
* @param void * occupant (OUT)
|
||||
*
|
||||
* This accessor function is typically used to cycle across the occupants
|
||||
* to check for someone already present that matches a description.
|
||||
*/
|
||||
static inline void pmix_hotel_knock(pmix_hotel_t *hotel, int room_num, void **occupant)
|
||||
{
|
||||
pmix_hotel_room_t *room;
|
||||
|
||||
/* Bozo check */
|
||||
assert(room_num < hotel->num_rooms);
|
||||
|
||||
*occupant = NULL;
|
||||
|
||||
/* If there's an occupant in the room, have them come to the door */
|
||||
room = &(hotel->rooms[room_num]);
|
||||
if (PMIX_LIKELY(NULL != room->occupant)) {
|
||||
pmix_output (10, "occupant %p in room num %d responded to knock", room->occupant, room_num);
|
||||
*occupant = room->occupant;
|
||||
}
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* PMIX_HOTEL_H */
|
154
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.c
Обычный файл
154
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.c
Обычный файл
@ -0,0 +1,154 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "pmix/pmix_common.h"
|
||||
#include "src/class/pmix_ring_buffer.h"
|
||||
#include "src/util/output.h"
|
||||
|
||||
static void pmix_ring_buffer_construct(pmix_ring_buffer_t *);
|
||||
static void pmix_ring_buffer_destruct(pmix_ring_buffer_t *);
|
||||
|
||||
PMIX_CLASS_INSTANCE(pmix_ring_buffer_t, pmix_object_t,
|
||||
pmix_ring_buffer_construct,
|
||||
pmix_ring_buffer_destruct);
|
||||
|
||||
/*
|
||||
* pmix_ring_buffer constructor
|
||||
*/
|
||||
static void pmix_ring_buffer_construct(pmix_ring_buffer_t *ring)
|
||||
{
|
||||
ring->head = 0;
|
||||
ring->tail = -1;
|
||||
ring->size = 0;
|
||||
ring->addr = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* pmix_ring_buffer destructor
|
||||
*/
|
||||
static void pmix_ring_buffer_destruct(pmix_ring_buffer_t *ring)
|
||||
{
|
||||
if( NULL != ring->addr) {
|
||||
free(ring->addr);
|
||||
ring->addr = NULL;
|
||||
}
|
||||
|
||||
ring->size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize a ring object
|
||||
*/
|
||||
int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size)
|
||||
{
|
||||
/* check for errors */
|
||||
if (NULL == ring) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Allocate and set the ring to NULL */
|
||||
ring->addr = (char **)calloc(size * sizeof(char*), 1);
|
||||
if (NULL == ring->addr) { /* out of memory */
|
||||
return PMIX_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
ring->size = size;
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr)
|
||||
{
|
||||
char *p=NULL;
|
||||
|
||||
if (NULL != ring->addr[ring->head]) {
|
||||
p = (char*)ring->addr[ring->head];
|
||||
if (ring->tail == ring->size - 1) {
|
||||
ring->tail = 0;
|
||||
} else {
|
||||
ring->tail = ring->head + 1;
|
||||
}
|
||||
}
|
||||
ring->addr[ring->head] = (char*)ptr;
|
||||
if (ring->tail < 0) {
|
||||
ring->tail = ring->head;
|
||||
}
|
||||
if (ring->head == ring->size - 1) {
|
||||
ring->head = 0;
|
||||
} else {
|
||||
ring->head++;
|
||||
}
|
||||
return (void*)p;
|
||||
}
|
||||
|
||||
void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring)
|
||||
{
|
||||
char *p=NULL;
|
||||
|
||||
if (-1 == ring->tail) {
|
||||
/* nothing has been put on the ring yet */
|
||||
p = NULL;
|
||||
} else {
|
||||
p = (char*)ring->addr[ring->tail];
|
||||
ring->addr[ring->tail] = NULL;
|
||||
if (ring->tail == ring->size-1) {
|
||||
ring->tail = 0;
|
||||
} else {
|
||||
ring->tail++;
|
||||
}
|
||||
/* see if the ring is empty */
|
||||
if (ring->tail == ring->head) {
|
||||
ring->tail = -1;
|
||||
}
|
||||
}
|
||||
return (void*)p;
|
||||
}
|
||||
|
||||
void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i)
|
||||
{
|
||||
char *p=NULL;
|
||||
int offset;
|
||||
|
||||
if (ring->size <= i || -1 == ring->tail) {
|
||||
p = NULL;
|
||||
} else if (i < 0) {
|
||||
/* return the value at the head of the ring */
|
||||
if (ring->head == 0) {
|
||||
p = ring->addr[ring->size - 1];
|
||||
} else {
|
||||
p = ring->addr[ring->head - 1];
|
||||
}
|
||||
} else {
|
||||
/* calculate the offset of the tail in the ring */
|
||||
offset = ring->tail + i;
|
||||
/* correct for wrap-around */
|
||||
if (ring->size <= offset) {
|
||||
offset -= ring->size;
|
||||
}
|
||||
p = ring->addr[offset];
|
||||
}
|
||||
return (void*)p;
|
||||
}
|
102
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.h
Обычный файл
102
opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.h
Обычный файл
@ -0,0 +1,102 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef PMIX_RING_BUFFER_H
|
||||
#define PMIX_RING_BUFFER_H
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#include "src/class/pmix_object.h"
|
||||
#include "src/util/output.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* dynamic pointer ring
|
||||
*/
|
||||
struct pmix_ring_buffer_t {
|
||||
/** base class */
|
||||
pmix_object_t super;
|
||||
/* head/tail indices */
|
||||
int head;
|
||||
int tail;
|
||||
/** size of list, i.e. number of elements in addr */
|
||||
int size;
|
||||
/** pointer to ring */
|
||||
char **addr;
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct pmix_ring_buffer_t pmix_ring_buffer_t;
|
||||
/**
|
||||
* Class declaration
|
||||
*/
|
||||
PMIX_DECLSPEC PMIX_CLASS_DECLARATION(pmix_ring_buffer_t);
|
||||
|
||||
/**
|
||||
* Initialize the ring buffer, defining its size.
|
||||
*
|
||||
* @param ring Pointer to a ring buffer (IN/OUT)
|
||||
* @param size The number of elements in the ring (IN)
|
||||
*
|
||||
* @return PMIX_SUCCESS if all initializations were succesful. Otherwise,
|
||||
* the error indicate what went wrong in the function.
|
||||
*/
|
||||
PMIX_DECLSPEC int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size);
|
||||
|
||||
/**
|
||||
* Push an item onto the ring buffer, displacing the oldest
|
||||
* item on the ring if the ring is full
|
||||
*
|
||||
* @param ring Pointer to ring (IN)
|
||||
* @param ptr Pointer value (IN)
|
||||
*
|
||||
* @return Pointer to displaced item, NULL if ring
|
||||
* is not yet full
|
||||
*/
|
||||
PMIX_DECLSPEC void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr);
|
||||
|
||||
|
||||
/**
|
||||
* Pop an item off of the ring. The oldest entry on the ring will be
|
||||
* returned. If nothing on the ring, NULL is returned.
|
||||
*
|
||||
* @param ring Pointer to ring (IN)
|
||||
*
|
||||
* @return Error code. NULL indicates an error.
|
||||
*/
|
||||
|
||||
PMIX_DECLSPEC void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring);
|
||||
|
||||
/*
|
||||
* Access an element of the ring, without removing it, indexed
|
||||
* starting at the tail - a value of -1 will return the element
|
||||
* at the head of the ring
|
||||
*/
|
||||
PMIX_DECLSPEC void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* PMIX_RING_BUFFER_H */
|
@ -44,6 +44,7 @@
|
||||
#define PMI_MAX_KVSNAME_LEN PMIX_MAX_NSLEN /* Maximum size of KVS name */
|
||||
#define PMI_MAX_VAL_LEN 4096 /* Maximum size of a PMI value */
|
||||
|
||||
|
||||
#define PMI_CHECK() \
|
||||
do { \
|
||||
if (!pmi_init) { \
|
||||
@ -55,25 +56,37 @@
|
||||
static pmix_status_t convert_int(int *value, pmix_value_t *kv);
|
||||
static int convert_err(pmix_status_t rc);
|
||||
static pmix_proc_t myproc;
|
||||
static bool data_commited = false;
|
||||
static int pmi_init = 0;
|
||||
|
||||
int PMI_Init(int *spawned)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_status_t rc;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
if (PMIX_SUCCESS != PMIx_Init(&myproc)) {
|
||||
return PMI_ERR_INIT;
|
||||
}
|
||||
|
||||
/* getting internal key requires special rank value */
|
||||
memcpy(&proc, &myproc, sizeof(myproc));
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (NULL != spawned) {
|
||||
/* get the spawned flag */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_SPAWNED, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) {
|
||||
rc = convert_int(spawned, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
return convert_err(rc);
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
/* if not found, default to not spawned */
|
||||
@ -82,7 +95,12 @@ int PMI_Init(int *spawned)
|
||||
}
|
||||
pmi_init = 1;
|
||||
|
||||
return PMI_SUCCESS;
|
||||
rc = PMIX_SUCCESS;
|
||||
|
||||
error:
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Initialized(PMI_BOOL *initialized)
|
||||
@ -160,8 +178,6 @@ int PMI_KVS_Commit(const char kvsname[])
|
||||
kvsname);
|
||||
|
||||
rc = PMIx_Commit();
|
||||
/* PMIx permits only one data commit! */
|
||||
data_commited = true;
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -169,17 +185,14 @@ int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int lengt
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
uint32_t i;
|
||||
static pmix_proc_t proc;
|
||||
uint32_t procnum;
|
||||
proc = myproc;
|
||||
pmix_proc_t proc;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN)) {
|
||||
return PMI_ERR_INVALID_KVS;
|
||||
}
|
||||
if ((key == NULL) || (strlen(key) >PMI_MAX_KEY_LEN)) {
|
||||
if ((key == NULL) || (strlen(key) > PMI_MAX_KEY_LEN)) {
|
||||
return PMI_ERR_INVALID_KEY;
|
||||
}
|
||||
if (value == NULL) {
|
||||
@ -189,60 +202,22 @@ int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int lengt
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"PMI_KVS_Get: KVS=%s, key=%s value=%s", kvsname, key, value);
|
||||
|
||||
/* PMI-1 expects resource manager to set
|
||||
* process mapping in ANL notation. */
|
||||
if (!strcmp(key, ANL_MAPPING)) {
|
||||
/* we are looking in the job-data. If there is nothing there
|
||||
* we don't want to look in rank's data, thus set rank to widcard */
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) &&
|
||||
(NULL != val) && (PMIX_STRING == val->type)) {
|
||||
strncpy(value, val->data.string, length);
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
return PMI_SUCCESS;
|
||||
} else {
|
||||
/* artpol:
|
||||
* Some RM's (i.e. SLURM) already have ANL precomputed. The export it
|
||||
* through PMIX_ANL_MAP variable.
|
||||
* If we haven't found it we want to have our own packing functionality
|
||||
* since it's common.
|
||||
* Somebody else has to write it since I've already done that for
|
||||
* GPL'ed SLURM :) */
|
||||
return PMI_FAIL;
|
||||
/* retrieve the data from PMIx - since we don't have a rank,
|
||||
* we indicate that by passing the UNDEF value */
|
||||
(void)strncpy(proc.nspace, kvsname, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
rc = PMIx_Get(&proc, key, NULL, 0, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, length);
|
||||
}
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
|
||||
/* We don't know what process keeps this data. So it looks like we need to
|
||||
* check each process.
|
||||
* TODO: Is there any beter way?
|
||||
* WARNING: this may lead to the VERY long HANG's if we ask for the unknown key
|
||||
* before we've done Commit on all nodes. We need a workaround for that.
|
||||
*
|
||||
* SOLUTION: perhaps rovide "OK if nothing" info flag to tell PMIx that
|
||||
* the key supposed to already be there and if nothing there - gave up with
|
||||
* an error and don't try to use direct modex.
|
||||
*/
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmi1: executing put for KVS %s, key %s value %s", kvsname, key,
|
||||
value);
|
||||
return convert_err(rc);
|
||||
}
|
||||
procnum = val->data.uint32;
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
|
||||
for (i = 0; i < procnum; i++) {
|
||||
proc.rank = i;
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, key, NULL, 0, &val) && (NULL != val)
|
||||
&& (PMIX_STRING == val->type)) {
|
||||
strncpy(value, val->data.string, length);
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
return PMI_SUCCESS;
|
||||
}
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
}
|
||||
return PMI_FAIL;
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
/* Barrier only applies to our own nspace, and we want all
|
||||
@ -253,28 +228,28 @@ int PMI_Barrier(void)
|
||||
pmix_info_t buf;
|
||||
int ninfo = 0;
|
||||
pmix_info_t *info = NULL;
|
||||
bool val = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if (data_commited) {
|
||||
bool val = 1;
|
||||
info = &buf;
|
||||
PMIX_INFO_CONSTRUCT(info);
|
||||
PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL);
|
||||
ninfo = 1;
|
||||
}
|
||||
info = &buf;
|
||||
PMIX_INFO_CONSTRUCT(info);
|
||||
PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL);
|
||||
ninfo = 1;
|
||||
rc = PMIx_Fence(NULL, 0, info, ninfo);
|
||||
|
||||
if (NULL != info) {
|
||||
PMIX_INFO_DESTRUCT(info);
|
||||
}
|
||||
return rc;
|
||||
PMIX_INFO_DESTRUCT(info);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_size(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
@ -282,13 +257,23 @@ int PMI_Get_size(int *size)
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val)) {
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_JOB_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
return PMI_FAIL;
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_rank(int *rk)
|
||||
@ -307,6 +292,9 @@ int PMI_Get_universe_size(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
@ -314,29 +302,56 @@ int PMI_Get_universe_size(int *size)
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val)) {
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
return PMI_FAIL;
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_appnum(int *appnum)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if (NULL != appnum &&
|
||||
PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_APPNUM, NULL, 0, &val)) {
|
||||
rc = convert_int(appnum, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
if (NULL == appnum) {
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
return PMI_FAIL;
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) {
|
||||
rc = convert_int(appnum, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Publish_name(const char service_name[], const char port[])
|
||||
@ -461,24 +476,34 @@ int PMI_Get_clique_size(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI_CHECK();
|
||||
|
||||
if (NULL == size) {
|
||||
return PMI_ERR_INVALID_ARGS;
|
||||
return PMI_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) {
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
return PMI_FAIL;
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_Get_clique_ranks(int ranks[], int length)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
char **rks;
|
||||
int i;
|
||||
@ -498,9 +523,9 @@ int PMI_Get_clique_ranks(int ranks[], int length)
|
||||
}
|
||||
pmix_argv_free(rks);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI_SUCCESS;
|
||||
}
|
||||
return PMI_FAIL;
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI_KVS_Get_my_name(char kvsname[], int length)
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "src/util/error.h"
|
||||
#include "src/util/output.h"
|
||||
|
||||
|
||||
#define PMI2_CHECK() \
|
||||
do { \
|
||||
if (!pmi2_init) { \
|
||||
@ -55,6 +56,8 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
if (PMIX_SUCCESS != PMIx_Init(&myproc)) {
|
||||
return PMI2_ERR_INIT;
|
||||
@ -65,14 +68,20 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
|
||||
/* getting internal key requires special rank value */
|
||||
memcpy(&proc, &myproc, sizeof(myproc));
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (NULL != size) {
|
||||
/* get the universe size - this will likely pull
|
||||
* down all attributes assigned to the job, thus
|
||||
* making all subsequent "get" operations purely
|
||||
* local */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
@ -80,13 +89,14 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
}
|
||||
} else {
|
||||
/* cannot continue without this info */
|
||||
return PMI2_ERR_INIT;
|
||||
rc = PMIX_ERR_INIT;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != spawned) {
|
||||
/* get the spawned flag */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) {
|
||||
rc = convert_int(spawned, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
@ -100,7 +110,7 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
|
||||
if (NULL != appnum) {
|
||||
/* get our appnum */
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, NULL, 0, &val)) {
|
||||
if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) {
|
||||
rc = convert_int(appnum, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
@ -113,9 +123,11 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum)
|
||||
}
|
||||
pmi2_init = 1;
|
||||
|
||||
return PMI2_SUCCESS;
|
||||
rc = PMIX_SUCCESS;
|
||||
|
||||
error:
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -175,20 +187,25 @@ int PMI2_KVS_Fence(void)
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
pmix_output_verbose(3, pmix_globals.debug_output, "PMI2_KVS_Fence");
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Commit())) {
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
/* we want all data to be collected upon completion */
|
||||
{
|
||||
pmix_info_t info;
|
||||
int ninfo = 1;
|
||||
bool val = 1;
|
||||
pmix_info_t info[1];
|
||||
bool val_data = 1;
|
||||
|
||||
PMIX_INFO_CONSTRUCT(&info);
|
||||
PMIX_INFO_LOAD(&info, PMIX_COLLECT_DATA, &val, PMIX_BOOL);
|
||||
rc = PMIx_Fence(NULL, 0, &info, ninfo);
|
||||
PMIX_INFO_DESTRUCT(&info);
|
||||
/* set controlling parameters
|
||||
* PMIX_COLLECT_DATA - meet legacy PMI2 requirement
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &val_data, PMIX_BOOL);
|
||||
|
||||
rc = PMIx_Fence(NULL, 0, &info[0], 1);
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
}
|
||||
|
||||
return convert_err(rc);
|
||||
@ -206,10 +223,12 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id,
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
uint32_t procnum = 0;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
/* set default */
|
||||
*vallen = 0;
|
||||
|
||||
if ((NULL == key) || (NULL == value)) {
|
||||
return PMI2_ERR_INVALID_ARG;
|
||||
}
|
||||
@ -219,37 +238,22 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id,
|
||||
|
||||
(void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), PMIX_MAX_NSLEN);
|
||||
if (src_pmi_id == PMI2_ID_NULL) {
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) {
|
||||
return convert_err(rc);
|
||||
}
|
||||
procnum = val->data.uint32;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
proc.rank = 0;
|
||||
/* the rank is UNDEF */
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
} else {
|
||||
proc.rank = src_pmi_id;
|
||||
}
|
||||
|
||||
do {
|
||||
rc = PMIx_Get(&proc, key, NULL, 0, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI2_FAIL;
|
||||
}
|
||||
if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, maxvalue);
|
||||
*vallen = strlen(val->data.string);
|
||||
}
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
break;
|
||||
} else if (PMIX_ERR_NOT_FOUND == rc) {
|
||||
proc.rank++;
|
||||
} else {
|
||||
break;
|
||||
rc = PMIx_Get(&proc, key, NULL, 0, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, maxvalue);
|
||||
*vallen = strlen(val->data.string);
|
||||
}
|
||||
} while (proc.rank < (int)procnum);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
@ -258,6 +262,8 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
@ -265,15 +271,18 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo
|
||||
return PMI2_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
*found = 0;
|
||||
rc = PMIx_Get(&myproc, name, NULL, 0, &val);
|
||||
rc = PMIx_Get(&myproc, name, info, 1, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI2_FAIL;
|
||||
}
|
||||
if (NULL != val->data.string) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, valuelen);
|
||||
*found = 1;
|
||||
}
|
||||
@ -281,6 +290,9 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo
|
||||
} else if (PMIX_ERR_NOT_FOUND == rc) {
|
||||
rc = PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -307,6 +319,8 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_value_t *val;
|
||||
pmix_proc_t proc;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
@ -316,17 +330,20 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou
|
||||
|
||||
/* getting internal key requires special rank value */
|
||||
memcpy(&proc, &myproc, sizeof(myproc));
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
proc.rank = PMIX_RANK_UNDEF;
|
||||
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
*found = 0;
|
||||
rc = PMIx_Get(&proc, name, NULL, 0, &val);
|
||||
rc = PMIx_Get(&proc, name, info, 1, &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
if (PMIX_STRING != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return PMI2_FAIL;
|
||||
}
|
||||
if (NULL != val->data.string) {
|
||||
rc = PMIX_ERROR;
|
||||
} else if (NULL != val->data.string) {
|
||||
(void)strncpy(value, val->data.string, valuelen);
|
||||
*found = 1;
|
||||
}
|
||||
@ -334,6 +351,9 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou
|
||||
} else if (PMIX_ERR_NOT_FOUND == rc) {
|
||||
rc = PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -482,8 +502,10 @@ int PMI2_Job_GetRank(int *rank)
|
||||
|
||||
int PMI2_Info_GetSize(int *size)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_status_t rc = PMIX_ERROR;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t info[1];
|
||||
bool val_optinal = 1;
|
||||
|
||||
PMI2_CHECK();
|
||||
|
||||
@ -491,13 +513,20 @@ int PMI2_Info_GetSize(int *size)
|
||||
return PMI2_ERR_INVALID_ARGS;
|
||||
}
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) {
|
||||
/* set controlling parameters
|
||||
* PMIX_OPTIONAL - expect that these keys should be available on startup
|
||||
*/
|
||||
PMIX_INFO_CONSTRUCT(&info[0]);
|
||||
PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL);
|
||||
|
||||
if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) {
|
||||
rc = convert_int(size, val);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
return PMI2_FAIL;
|
||||
PMIX_INFO_DESTRUCT(&info[0]);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -273,13 +273,13 @@ int PMIx_Init(pmix_proc_t *proc)
|
||||
/* get our effective id's */
|
||||
pmix_globals.uid = geteuid();
|
||||
pmix_globals.gid = getegid();
|
||||
/* default to our internal errhandler */
|
||||
pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref);
|
||||
/* initialize the output system */
|
||||
if (!pmix_output_init()) {
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
/* default to our internal errhandler */
|
||||
pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref);
|
||||
/* see if debug is requested */
|
||||
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
|
||||
debug_level = strtol(evar, NULL, 10);
|
||||
@ -1270,6 +1270,7 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
int ret, cnt;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: regevents callback recvd");
|
||||
|
||||
@ -1286,8 +1287,7 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
rc = pmix_remove_errhandler(cb->errhandler_ref);
|
||||
/* call the callback with error */
|
||||
cb->errreg_cbfunc(PMIX_ERR_SERVER_FAILED_REQUEST, -1, cb->cbdata);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* complete err handler registration with success status*/
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"client:reg events cbfunc received status %d for errhandler %d",
|
||||
@ -1303,28 +1303,36 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
void *cbdata)
|
||||
{
|
||||
/* add err handler, process info keys and register for events and call the callback */
|
||||
int rc, index = 0;
|
||||
int index = 0;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler");
|
||||
"pmix: register errhandler with %d infos", (int)ninfo);
|
||||
|
||||
/* check if this handler is already registered if so return error */
|
||||
if (PMIX_SUCCESS == pmix_lookup_errhandler (errhandler, &index)) {
|
||||
/* complete request with error status and return its original reference */
|
||||
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(info, ninfo, &index))) {
|
||||
/* complete request with error status and return its original reference */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - already registered");
|
||||
cbfunc(PMIX_EXISTS, index, cbdata);
|
||||
|
||||
} else if (PMIX_ERR_GRP_FOUND == rc) {
|
||||
/* just acknowledge it */
|
||||
cbfunc(PMIX_SUCCESS, index, cbdata);
|
||||
} else if (PMIX_ERR_DFLT_FOUND == rc && NULL == info) {
|
||||
/* if they are registering a default errhandler, then
|
||||
* overwrite the existing one with it - the index will
|
||||
* contain its location */
|
||||
pmix_add_errhandler(errhandler, info, ninfo, &index);
|
||||
} else {
|
||||
if(PMIX_SUCCESS != (rc = pmix_add_errhandler (errhandler, info, ninfo, &index))) {
|
||||
/* need to add this errhandler */
|
||||
if (PMIX_SUCCESS != (rc = pmix_add_errhandler(errhandler, info, ninfo, &index))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - error status rc=%d", rc);
|
||||
/* complete request with error*/
|
||||
cbfunc(rc, index, cbdata);
|
||||
}
|
||||
else {
|
||||
/* To do: need to determine if the client needs to process the info keys before passing it to
|
||||
server */
|
||||
} else {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - added index=%d, ninfo =%lu", index, ninfo);
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
@ -1334,11 +1342,10 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
PMIX_RELEASE(msg);
|
||||
pmix_remove_errhandler(index);
|
||||
cbfunc(PMIX_ERR_PACK_FAILURE, -1, cbdata);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
* the server acks/nacks the register events request */
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - pack events success status=%d", rc);
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
@ -1358,6 +1365,7 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
int ret, cnt =1;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: deregevents_cbfunc recvd");
|
||||
|
||||
@ -1371,7 +1379,7 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
PMIX_ERROR_LOG(rc);
|
||||
|
||||
}
|
||||
/* remove the err handler and call the error handler reg completion callback fn.*/
|
||||
/* remove the err handler and call the error handler dereg completion callback fn.*/
|
||||
pmix_remove_errhandler(cb->errhandler_ref);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"client:dereg events cbfunc received status %d for errhandler %d",
|
||||
@ -1388,17 +1396,18 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
pmix_error_reg_info_t *errreg;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cb_t *cb;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_client_deregister_errhandler errhandler_ref = %d", errhandler_ref);
|
||||
errreg = (pmix_error_reg_info_t *) pmix_pointer_array_get_item (&pmix_globals.errregs, errhandler_ref);
|
||||
|
||||
errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, errhandler_ref);
|
||||
if (NULL != errreg ) {
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_DEREGEVENTS_CMD, errreg->info, errreg->ninfo))) {
|
||||
PMIX_RELEASE(msg);
|
||||
pmix_remove_errhandler(errhandler_ref);
|
||||
cbfunc(PMIX_ERR_PACK_FAILURE, cbdata);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
@ -1409,17 +1418,18 @@ void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb);
|
||||
}
|
||||
}
|
||||
else
|
||||
} else {
|
||||
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata)
|
||||
pmix_buffer_t *buf, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
int ret, cnt;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: notifyerror_cbfunc recvd");
|
||||
|
||||
@ -1427,14 +1437,15 @@ static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
return;
|
||||
}
|
||||
|
||||
/* unpack the status code */
|
||||
if ((PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_INT))) ||
|
||||
(PMIX_SUCCESS != ret)) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
|
||||
}
|
||||
/* call the notify error completion callback fn.*/
|
||||
|
||||
/* call the notify error completion callback fn.*/
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"client: notified error cbfunc received status %d ",
|
||||
ret);
|
||||
@ -1449,9 +1460,13 @@ pmix_status_t pmix_client_notify_error(pmix_status_t status,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
pmix_buffer_t *msg = PMIX_NEW(pmix_buffer_t);
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cmd_t cmd = PMIX_NOTIFY_CMD;
|
||||
pmix_cb_t *cb;
|
||||
|
||||
/* get the message buffer */
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
|
||||
/* pack the command */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -1484,17 +1499,22 @@ pmix_status_t pmix_client_notify_error(pmix_status_t status,
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->op_cbfunc = cbfunc;
|
||||
cb->cbdata = cbdata;
|
||||
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, notifyerror_cbfunc, cb);
|
||||
return PMIX_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
PMIX_RELEASE(msg);
|
||||
cbfunc(rc, cbdata);
|
||||
/* never call a callback function when returning an error as
|
||||
* the error tells the caller that they will never recv a
|
||||
* callback */
|
||||
return rc;
|
||||
}
|
||||
|
@ -56,16 +56,16 @@
|
||||
|
||||
#include "pmix_client_ops.h"
|
||||
|
||||
static pmix_buffer_t* pack_get(char *nspace, int rank,
|
||||
static pmix_buffer_t* _pack_get(char *nspace, int rank,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_cmd_t cmd);
|
||||
|
||||
static void _getnbfn(int sd, short args, void *cbdata);
|
||||
|
||||
static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata);
|
||||
|
||||
static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata);
|
||||
static void _value_cbfunc(int status, pmix_value_t *kv, void *cbdata);
|
||||
|
||||
int PMIx_Get(const pmix_proc_t *proc, const char key[],
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
@ -74,16 +74,6 @@ int PMIx_Get(const pmix_proc_t *proc, const char key[],
|
||||
pmix_cb_t *cb;
|
||||
int rc;
|
||||
|
||||
if (NULL == proc) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: %s:%d getting value for proc %s:%d key %s",
|
||||
pmix_globals.myid.nspace, pmix_globals.myid.rank,
|
||||
proc->nspace, proc->rank,
|
||||
(NULL == key) ? "NULL" : key);
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
@ -93,7 +83,7 @@ int PMIx_Get(const pmix_proc_t *proc, const char key[],
|
||||
* the return message is recvd */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, value_cbfunc, cb))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, _value_cbfunc, cb))) {
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
@ -115,30 +105,60 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
|
||||
pmix_value_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb;
|
||||
|
||||
if (NULL == proc) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: get_nb value for proc %s:%d key %s",
|
||||
proc->nspace, proc->rank,
|
||||
(NULL == key) ? "NULL" : key);
|
||||
int rank;
|
||||
char *nm;
|
||||
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
|
||||
/* protect against bozo input */
|
||||
if (NULL == key) {
|
||||
/* if the proc is NULL, then the caller is assuming
|
||||
* that the key is universally unique within the caller's
|
||||
* own nspace. This most likely indicates that the code
|
||||
* was originally written for a legacy version of PMI.
|
||||
*
|
||||
* If the key is NULL, then the caller wants all
|
||||
* data from the specified proc. Again, this likely
|
||||
* indicates use of a legacy version of PMI.
|
||||
*
|
||||
* Either case is supported. However, we don't currently
|
||||
* support the case where -both- values are NULL */
|
||||
if (NULL == proc && NULL == key) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if the key is NULL, the rank cannot be WILDCARD as
|
||||
* we cannot return all info from every rank */
|
||||
if (NULL != proc && PMIX_RANK_WILDCARD == proc->rank && NULL == key) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if the given proc param is NULL, or the nspace is
|
||||
* empty, then the caller is referencing our own nspace */
|
||||
if (NULL == proc || 0 == strlen(proc->nspace)) {
|
||||
nm = pmix_globals.myid.nspace;
|
||||
} else {
|
||||
nm = (char*)proc->nspace;
|
||||
}
|
||||
|
||||
/* if the proc param is NULL, then we are seeking a key that
|
||||
* must be globally unique, so communicate this to the hash
|
||||
* functions with the UNDEF rank */
|
||||
if (NULL == proc) {
|
||||
rank = PMIX_RANK_UNDEF;
|
||||
} else {
|
||||
rank = proc->rank;
|
||||
}
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: get_nb value for proc %s:%d key %s",
|
||||
nm, rank, (NULL == key) ? "NULL" : key);
|
||||
|
||||
/* thread-shift so we can check global objects */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
(void)strncpy(cb->nspace, proc->nspace, PMIX_MAX_NSLEN);
|
||||
cb->rank = proc->rank;
|
||||
(void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN);
|
||||
cb->rank = rank;
|
||||
cb->key = (char*)key;
|
||||
cb->info = (pmix_info_t*)info;
|
||||
cb->ninfo = ninfo;
|
||||
@ -149,7 +169,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata)
|
||||
static void _value_cbfunc(int status, pmix_value_t *kv, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
@ -163,7 +183,7 @@ static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata)
|
||||
cb->active = false;
|
||||
}
|
||||
|
||||
static pmix_buffer_t* pack_get(char *nspace, int rank,
|
||||
static pmix_buffer_t* _pack_get(char *nspace, int rank,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_cmd_t cmd)
|
||||
{
|
||||
@ -209,7 +229,7 @@ static pmix_buffer_t* pack_get(char *nspace, int rank,
|
||||
/* this callback is coming from the usock recv, and thus
|
||||
* is occurring inside of our progress thread - hence, no
|
||||
* need to thread shift */
|
||||
static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
@ -218,18 +238,19 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
pmix_value_t *val = NULL;
|
||||
int32_t cnt;
|
||||
pmix_buffer_t *bptr;
|
||||
pmix_kval_t *kp;
|
||||
pmix_nspace_t *ns, *nptr;
|
||||
int rank;
|
||||
int cur_rank;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: get_nb callback recvd");
|
||||
|
||||
if (NULL == cb) {
|
||||
/* nothing we can do */
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
return;
|
||||
}
|
||||
// cache the rank
|
||||
/* cache the rank */
|
||||
rank = cb->rank;
|
||||
|
||||
/* unpack the status */
|
||||
@ -262,31 +283,36 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
* unpack and store it in the modex - this could consist
|
||||
* of buffers from multiple scopes */
|
||||
cnt = 1;
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) {
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &cur_rank, &cnt, PMIX_INT))) {
|
||||
pmix_kval_t *cur_kval;
|
||||
|
||||
cnt = 1;
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, kp, &cnt, PMIX_KVAL))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: unpacked key %s", kp->key);
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cb->rank, kp))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
if (NULL != cb->key && 0 == strcmp(cb->key, kp->key)) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: found requested value");
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, kp->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(kp);
|
||||
val = NULL;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain
|
||||
if (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) {
|
||||
cnt = 1;
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
cur_kval = PMIX_NEW(pmix_kval_t);
|
||||
while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, cur_kval, &cnt, PMIX_KVAL))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: unpacked key %s", cur_kval->key);
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cur_rank, cur_kval))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
if (NULL != cb->key && 0 == strcmp(cb->key, cur_kval->key)) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: found requested value");
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, cur_kval->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(cur_kval);
|
||||
val = NULL;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
PMIX_RELEASE(cur_kval); // maintain acctg - hash_store does a retain
|
||||
cnt = 1;
|
||||
cur_kval = PMIX_NEW(pmix_kval_t);
|
||||
}
|
||||
cnt = 1;
|
||||
PMIX_RELEASE(cur_kval);
|
||||
}
|
||||
cnt = 1;
|
||||
PMIX_RELEASE(kp);
|
||||
PMIX_RELEASE(bptr); // free's the data region
|
||||
if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -338,28 +364,21 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
pmix_cb_t *cbret;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_value_t *val;
|
||||
pmix_info_t *info, *iptr;
|
||||
pmix_pointer_array_t results;
|
||||
pmix_status_t rc;
|
||||
char *nm;
|
||||
pmix_nspace_t *ns, *nptr;
|
||||
size_t n;
|
||||
size_t n, nvals;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: getnbfn value for proc %s:%d key %s",
|
||||
cb->nspace, cb->rank,
|
||||
(NULL == cb->key) ? "NULL" : cb->key);
|
||||
|
||||
/* if the nspace is empty, then the caller is referencing
|
||||
* our own nspace */
|
||||
if (0 == strlen(cb->nspace)) {
|
||||
nm = pmix_globals.myid.nspace;
|
||||
} else {
|
||||
nm = (char*)cb->nspace;
|
||||
}
|
||||
|
||||
/* find the nspace object */
|
||||
nptr = NULL;
|
||||
PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) {
|
||||
if (0 == strcmp(nm, ns->nspace)) {
|
||||
if (0 == strcmp(cb->nspace, ns->nspace)) {
|
||||
nptr = ns;
|
||||
break;
|
||||
}
|
||||
@ -370,13 +389,105 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
* server has never heard of it, the server will return
|
||||
* an error */
|
||||
nptr = PMIX_NEW(pmix_nspace_t);
|
||||
(void)strncpy(nptr->nspace, nm, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(nptr->nspace, cb->nspace, PMIX_MAX_NSLEN);
|
||||
pmix_list_append(&pmix_globals.nspaces, &nptr->super);
|
||||
/* there is no point in looking for data in this nspace
|
||||
* object, so let's just go generate the request */
|
||||
goto request;
|
||||
}
|
||||
|
||||
/* if the key is NULL, then we have to check both the job-data
|
||||
* and the modex tables. If we don't yet have the modex data,
|
||||
* then we are going to have to go get it. So let's check that
|
||||
* case first */
|
||||
if (NULL == cb->key) {
|
||||
PMIX_CONSTRUCT(&results, pmix_pointer_array_t);
|
||||
pmix_pointer_array_init(&results, 2, INT_MAX, 1);
|
||||
nvals = 0;
|
||||
/* if the rank is WILDCARD, then they want all the job-level info,
|
||||
* so no need to check the modex */
|
||||
if (PMIX_RANK_WILDCARD != cb->rank) {
|
||||
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: value retrieved from dstore");
|
||||
/* since we didn't provide them with a key, the hash function
|
||||
* must return the results in the pmix_info_array field of the
|
||||
* value */
|
||||
if (NULL == val || PMIX_INFO_ARRAY != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
cb->value_cbfunc(PMIX_ERR_BAD_PARAM, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
/* save the results */
|
||||
info = (pmix_info_t*)val->data.array.array;
|
||||
for (n=0; n < val->data.array.size; n++) {
|
||||
pmix_pointer_array_add(&results, &info[n]);
|
||||
++nvals;
|
||||
}
|
||||
val->data.array.array = NULL; // protect the data
|
||||
val->data.array.size = 0;
|
||||
/* cleanup */
|
||||
if (NULL != val) {
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
} else {
|
||||
/* if we didn't find a modex for this rank, then we need
|
||||
* to go get it. Recall that the NULL==key scenario only
|
||||
* pertains to cases where legacy PMI methods are being
|
||||
* employed. Thus, the caller wants -all- information for
|
||||
* the specified rank, not just the job-level info. */
|
||||
goto request;
|
||||
}
|
||||
}
|
||||
/* now get any data from the job-level info */
|
||||
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, NULL, &val))) {
|
||||
/* since we didn't provide them with a key, the hash function
|
||||
* must return the results in the pmix_info_array field of the
|
||||
* value */
|
||||
if (NULL == val || PMIX_INFO_ARRAY != val->type) {
|
||||
/* this is an error */
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
cb->value_cbfunc(PMIX_ERR_BAD_PARAM, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
/* save the results */
|
||||
info = (pmix_info_t*)val->data.array.array;
|
||||
for (n=0; n < val->data.array.size; n++) {
|
||||
pmix_pointer_array_add(&results, &info[n]);
|
||||
++nvals;
|
||||
}
|
||||
val->data.array.array = NULL; // protect the data
|
||||
val->data.array.size = 0;
|
||||
/* cleanup */
|
||||
if (NULL != val) {
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
}
|
||||
/* now let's package up the results */
|
||||
PMIX_VALUE_CREATE(val, 1);
|
||||
val->type = PMIX_INFO_ARRAY;
|
||||
val->data.array.size = nvals;
|
||||
PMIX_INFO_CREATE(iptr, nvals);
|
||||
val->data.array.array = (struct pmix_info_t*)iptr;
|
||||
for (n=0; n < (size_t)results.size && n < nvals; n++) {
|
||||
if (NULL != (info = (pmix_info_t*)pmix_pointer_array_get_item(&results, n))) {
|
||||
(void)strncpy(iptr[n].key, info->key, PMIX_MAX_KEYLEN);
|
||||
pmix_value_xfer(&iptr[n].value, &info->value);
|
||||
PMIX_INFO_FREE(info, 1);
|
||||
}
|
||||
}
|
||||
/* done with results array */
|
||||
PMIX_DESTRUCT(&results);
|
||||
/* return the result to the caller */
|
||||
cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata);
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
}
|
||||
|
||||
/* the requested data could be in the job-data table, so let's
|
||||
* just check there first. */
|
||||
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, cb->key, &val))) {
|
||||
@ -433,7 +544,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
* the error */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"Error requesting key=%s for rank = %d, namespace = %s",
|
||||
cb->key, cb->rank, nm);
|
||||
cb->key, cb->rank, cb->nspace);
|
||||
cb->value_cbfunc(rc, NULL, cb->cbdata);
|
||||
/* protect the data */
|
||||
cb->procs = NULL;
|
||||
@ -461,7 +572,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
/* they don't want us to try and retrieve it */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",
|
||||
cb->key, cb->rank, nm);
|
||||
cb->key, cb->rank, cb->nspace);
|
||||
cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
return;
|
||||
@ -472,7 +583,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
* this nspace:rank. If we do, then no need to ask again as the
|
||||
* request will return _all_ data from that proc */
|
||||
PMIX_LIST_FOREACH(cbret, &pmix_client_globals.pending_requests, pmix_cb_t) {
|
||||
if (0 == strncmp(cbret->nspace, nm, PMIX_MAX_NSLEN) &&
|
||||
if (0 == strncmp(cbret->nspace, cb->nspace, PMIX_MAX_NSLEN) &&
|
||||
cbret->rank == cb->rank) {
|
||||
/* we do have a pending request, but we still need to track this
|
||||
* outstanding request so we can satisfy it once the data is returned */
|
||||
@ -483,7 +594,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
|
||||
/* we don't have a pending request, so let's create one - don't worry
|
||||
* about packing the key as we return everything from that proc */
|
||||
msg = pack_get(nm, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD);
|
||||
msg = _pack_get(cb->nspace, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD);
|
||||
if (NULL == msg) {
|
||||
cb->value_cbfunc(PMIX_ERROR, NULL, cb->cbdata);
|
||||
PMIX_RELEASE(cb);
|
||||
@ -496,5 +607,5 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
||||
pmix_list_append(&pmix_client_globals.pending_requests, &cb->super);
|
||||
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, getnb_cbfunc, cb);
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, _getnb_cbfunc, cb);
|
||||
}
|
||||
|
@ -150,10 +150,12 @@ pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t ninfo,
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, apps, napps, PMIX_APP))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
if (0 < napps) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, apps, napps, PMIX_APP))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* create a callback object as we need to pass it to the
|
||||
|
@ -43,10 +43,10 @@ void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
* call pmix_server_register_for_events, and call cbfunc with
|
||||
* reference to the errhandler */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"registering client err handler");
|
||||
"registering client err handler with %d info", (int)ninfo);
|
||||
pmix_client_register_errhandler(info, ninfo,
|
||||
errhandler,
|
||||
cbfunc, cbdata);
|
||||
errhandler,
|
||||
cbfunc, cbdata);
|
||||
}
|
||||
}
|
||||
|
||||
@ -80,17 +80,17 @@ pmix_status_t PMIx_Notify_error(pmix_status_t status,
|
||||
int rc;
|
||||
|
||||
if (pmix_globals.server) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server_notify_error error =%d, rc=%d", status, rc);
|
||||
rc = pmix_server_notify_error(status, procs, nprocs, error_procs,
|
||||
error_nprocs, info, ninfo,
|
||||
cbfunc, cbdata);
|
||||
pmix_output_verbose(0, pmix_globals.debug_output,
|
||||
"pmix_server_notify_error error =%d, rc=%d", status, rc);
|
||||
cbfunc, cbdata);
|
||||
} else {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_client_notify_error error =%d, rc=%d", status, rc);
|
||||
rc = pmix_client_notify_error(status, procs, nprocs, error_procs,
|
||||
error_nprocs, info, ninfo,
|
||||
cbfunc, cbdata);
|
||||
pmix_output_verbose(0, pmix_globals.debug_output,
|
||||
"pmix_client_notify_error error =%d, rc=%d", status, rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -56,7 +56,8 @@ void pmix_globals_init(void)
|
||||
{
|
||||
memset(&pmix_globals.myid, 0, sizeof(pmix_proc_t));
|
||||
PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t);
|
||||
pmix_pointer_array_init(&pmix_globals.errregs, 1, PMIX_MAX_ERROR_REGISTRATIONS, 1);
|
||||
PMIX_CONSTRUCT(&pmix_globals.errregs, pmix_pointer_array_t);
|
||||
pmix_pointer_array_init(&pmix_globals.errregs, 16, PMIX_MAX_ERROR_REGISTRATIONS, 16);
|
||||
}
|
||||
|
||||
void pmix_globals_finalize(void)
|
||||
@ -68,6 +69,7 @@ void pmix_globals_finalize(void)
|
||||
if (NULL != pmix_globals.cache_remote) {
|
||||
PMIX_RELEASE(pmix_globals.cache_remote);
|
||||
}
|
||||
PMIX_DESTRUCT(&pmix_globals.errregs);
|
||||
}
|
||||
|
||||
|
||||
@ -158,6 +160,7 @@ PMIX_CLASS_INSTANCE(pmix_rank_info_t,
|
||||
|
||||
static void errcon(pmix_error_reg_info_t *p)
|
||||
{
|
||||
p->sglhdlr = false;
|
||||
p->errhandler = NULL;
|
||||
p->info = NULL;
|
||||
p->ninfo = 0;
|
||||
@ -165,7 +168,9 @@ static void errcon(pmix_error_reg_info_t *p)
|
||||
static void errdes(pmix_error_reg_info_t *p)
|
||||
{
|
||||
p->errhandler = NULL;
|
||||
// PMIX_INFO_FREE(p->info, p->ninfo);
|
||||
if (NULL != p->info) {
|
||||
PMIX_INFO_FREE(p->info, p->ninfo);
|
||||
}
|
||||
}
|
||||
PMIX_CLASS_INSTANCE(pmix_error_reg_info_t,
|
||||
pmix_object_t,
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -44,6 +44,7 @@ BEGIN_C_DECLS
|
||||
/* define a structure for tracking error registrations */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
bool sglhdlr; // registers a specific error status handler
|
||||
pmix_notification_fn_t errhandler; /* registered err handler callback fn */
|
||||
pmix_info_t *info; /* error info keys registered with the handler */
|
||||
size_t ninfo; /* size of info */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -241,6 +241,8 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.client_eventregs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t);
|
||||
PMIX_CONSTRUCT(&pmix_server_globals.notifications, pmix_ring_buffer_t);
|
||||
pmix_ring_buffer_init(&pmix_server_globals.notifications, 256);
|
||||
|
||||
/* see if debug is requested */
|
||||
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
|
||||
@ -1111,6 +1113,7 @@ static bool match_error_registration(pmix_regevents_info_t *reginfoptr, pmix_not
|
||||
static void _notify_error(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_notify_caddy_t *cd = (pmix_notify_caddy_t*)cbdata;
|
||||
pmix_notify_caddy_t *rbout;
|
||||
pmix_status_t rc;
|
||||
pmix_cmd_t cmd = PMIX_NOTIFY_CMD;
|
||||
int i;
|
||||
@ -1119,25 +1122,27 @@ static void _notify_error(int sd, short args, void *cbdata)
|
||||
pmix_regevents_info_t *reginfoptr;
|
||||
bool notify, notifyall;
|
||||
|
||||
pmix_output_verbose(0, pmix_globals.debug_output,
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server: _notify_error notifying client of error %d",
|
||||
cd->status);
|
||||
|
||||
/* pack the command */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cmd, 1, PMIX_CMD))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* pack the status */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* pack the error procs */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->error_nprocs, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (0 < cd->error_nprocs) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, cd->error_procs, cd->error_nprocs, PMIX_PROC))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -1145,74 +1150,37 @@ static void _notify_error(int sd, short args, void *cbdata)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* pack the info */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->ninfo, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
if (0 < cd->ninfo) {
|
||||
|
||||
if (0 < cd->ninfo) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, cd->info, cd->ninfo, PMIX_INFO))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* if the RM gave us a NULL proc list, then we are notifying everyone */
|
||||
if (NULL == cd->procs) {
|
||||
notifyall = true;
|
||||
} else {
|
||||
notifyall = false;
|
||||
|
||||
/* we cannot know if everyone who wants this notice has had a chance
|
||||
* to register for it - the notice may be coming too early. So cache
|
||||
* the message until all local procs have received it, or it ages to
|
||||
* the point where it gets pushed out by more recent events */
|
||||
PMIX_RETAIN(cd);
|
||||
rbout = pmix_ring_buffer_push(&pmix_server_globals.notifications, cd);
|
||||
|
||||
/* if an older event was bumped, release it */
|
||||
if (NULL != rbout) {
|
||||
PMIX_RELEASE(rbout);
|
||||
}
|
||||
|
||||
/* cycle across our connected clients and send the message to
|
||||
/* cycle across our registered events and send the message to
|
||||
* any within the specified proc array */
|
||||
for (i=0; i < pmix_server_globals.clients.size; i++) {
|
||||
if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
|
||||
continue;
|
||||
}
|
||||
if (!notifyall) {
|
||||
/* check to see if this proc matches that of one in the specified array */
|
||||
notify = false;
|
||||
for (j=0; j < cd->nprocs; j++) {
|
||||
if (0 != strncmp(peer->info->nptr->nspace, cd->procs[j].nspace, PMIX_MAX_NSLEN)) {
|
||||
continue;
|
||||
}
|
||||
if (PMIX_RANK_WILDCARD == cd->procs[j].rank ||
|
||||
cd->procs[j].rank == peer->info->rank) {
|
||||
notify = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!notify) {
|
||||
/* if we are not notifying everyone, and this proc isn't to
|
||||
* be notified, then just continue the main loop */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* get the client's error registration and check if client
|
||||
* requested notification of this error */
|
||||
reginfoptr = NULL;
|
||||
notify = false;
|
||||
PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.client_eventregs, pmix_regevents_info_t) {
|
||||
if (reginfoptr->peer == peer) {
|
||||
/* check if the client has registered for this error
|
||||
* by parsing the info keys */
|
||||
notify = match_error_registration(reginfoptr, cd);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server _notify_error - match error registration returned notify =%d ", notify);
|
||||
}
|
||||
if (notify) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (notify) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server: _notify_error - notifying process rank %d error %d",
|
||||
peer->info->rank, cd->status);
|
||||
PMIX_RETAIN(cd->buf);
|
||||
PMIX_SERVER_QUEUE_REPLY(peer, 0, cd->buf);
|
||||
}
|
||||
PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.client_eventregs, pmix_regevents_info_t) {
|
||||
pmix_server_check_notifications(reginfoptr, cd);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
@ -1220,7 +1188,7 @@ static void _notify_error(int sd, short args, void *cbdata)
|
||||
if (NULL != cd->cbfunc) {
|
||||
cd->cbfunc(rc, cd->cbdata);
|
||||
}
|
||||
PMIX_RELEASE(cd);
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
|
||||
pmix_status_t pmix_server_notify_error(pmix_status_t status,
|
||||
@ -1234,12 +1202,32 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status,
|
||||
|
||||
cd = PMIX_NEW(pmix_notify_caddy_t);
|
||||
cd->status = status;
|
||||
cd->procs = procs;
|
||||
cd->nprocs = nprocs;
|
||||
cd->error_procs = error_procs;
|
||||
cd->error_nprocs = error_nprocs;
|
||||
cd->info = info;
|
||||
cd->ninfo = ninfo;
|
||||
/* have to copy the info here as we may have to cache this
|
||||
* notification until procs have a chance to register for it */
|
||||
if (NULL != procs) {
|
||||
cd->nprocs = nprocs;
|
||||
PMIX_PROC_CREATE(cd->procs, cd->nprocs);
|
||||
for (n=0; n < cd->nprocs; n++) {
|
||||
(void)strncpy(cd->procs[n].nspace, procs[n].nspace, PMIX_MAX_NSLEN);
|
||||
cd->procs[n].rank = procs[n].rank;
|
||||
}
|
||||
}
|
||||
if (NULL != error_procs) {
|
||||
cd->error_nprocs = error_nprocs;
|
||||
PMIX_PROC_CREATE(cd->error_procs, cd->error_nprocs);
|
||||
for (n=0; n < cd->error_nprocs; n++) {
|
||||
(void)strncpy(cd->error_procs[n].nspace, error_procs[n].nspace, PMIX_MAX_NSLEN);
|
||||
cd->error_procs[n].rank = error_procs[n].rank;
|
||||
}
|
||||
}
|
||||
if (NULL != info) {
|
||||
cd->ninfo = ninfo;
|
||||
PMIX_INFO_CREATE(cd->info, cd->ninfo);
|
||||
for (n=0; n < cd->ninfo; n++) {
|
||||
PMIX_INFO_LOAD(&cd->info[n], info[n].key,
|
||||
&info[n].value.data, info[n].value.type);
|
||||
}
|
||||
}
|
||||
cd->cbfunc = cbfunc;
|
||||
cd->cbdata = cbdata;
|
||||
|
||||
@ -1253,33 +1241,75 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
|
||||
pmix_notify_caddy_t *cd)
|
||||
{
|
||||
bool notify;
|
||||
size_t j;
|
||||
|
||||
/* if the RM gave us a NULL proc list, then we are notifying everyone */
|
||||
if (NULL != cd->procs) {
|
||||
/* check to see if this proc matches that of one in the specified array */
|
||||
notify = false;
|
||||
for (j=0; j < cd->nprocs; j++) {
|
||||
if (0 != strncmp(reginfo->peer->info->nptr->nspace, cd->procs[j].nspace, PMIX_MAX_NSLEN)) {
|
||||
continue;
|
||||
}
|
||||
if (PMIX_RANK_WILDCARD == cd->procs[j].rank ||
|
||||
cd->procs[j].rank == reginfo->peer->info->rank) {
|
||||
notify = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!notify) {
|
||||
/* if we are not notifying everyone, and this proc isn't to
|
||||
* be notified, so just return */
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* check if the client has registered for this error
|
||||
* by parsing the info keys */
|
||||
if (match_error_registration(reginfo, cd)) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server: check notifications - notifying process rank %d error %d",
|
||||
reginfo->peer->info->rank, cd->status);
|
||||
PMIX_RETAIN(cd->buf);
|
||||
PMIX_SERVER_QUEUE_REPLY(reginfo->peer, 0, cd->buf);
|
||||
}
|
||||
|
||||
}
|
||||
static void reg_errhandler(int sd, short args, void *cbdata)
|
||||
{
|
||||
int index = 0;
|
||||
pmix_status_t rc;
|
||||
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
||||
pmix_notify_caddy_t *rb;
|
||||
|
||||
/* check if this handler is already registered if so return error */
|
||||
if (PMIX_SUCCESS == pmix_lookup_errhandler(cd->err, &index)) {
|
||||
/* complete request with error status and return its original reference */
|
||||
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(cd->info, cd->ninfo, &index))) {
|
||||
/* complete request with error status and return its original reference */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server_register_errhandler error - hdlr already registered index = %d",
|
||||
index);
|
||||
cd->cbfunc.errregcbfn(PMIX_EXISTS, index, cd->cbdata);
|
||||
} else {
|
||||
rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_server_register_errhandler - success index =%d", index);
|
||||
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
|
||||
}
|
||||
cd->active = false;
|
||||
/* cycle across any cached notifications and see if any are
|
||||
* pending for us and match this description */
|
||||
|
||||
/* acknowledge the registration so the caller can release
|
||||
* their data */
|
||||
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
|
||||
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
|
||||
void pmix_server_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
pmix_notification_fn_t errhandler,
|
||||
pmix_errhandler_reg_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
pmix_notification_fn_t errhandler,
|
||||
pmix_errhandler_reg_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
pmix_shift_caddy_t *cd;
|
||||
|
||||
@ -2161,44 +2191,51 @@ static void cnct_cbfunc(pmix_status_t status, void *cbdata)
|
||||
PMIX_THREADSHIFT(scd, _cnct);
|
||||
}
|
||||
|
||||
void regevents_cbfunc (pmix_status_t status, void *cbdata)
|
||||
void regevents_cbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata;
|
||||
pmix_regevents_info_t *reginfo, *reginfo_next;
|
||||
pmix_buffer_t *reply;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server:regevents_cbfunc called status = %d", status);
|
||||
|
||||
if (PMIX_SUCCESS != status) {
|
||||
/* need to delete the stored event reg info when server
|
||||
nacks reg events request */
|
||||
* nacks reg events request */
|
||||
PMIX_LIST_FOREACH_SAFE(reginfo, reginfo_next, &pmix_server_globals.client_eventregs,
|
||||
pmix_regevents_info_t) {
|
||||
if(reginfo->peer == cd->peer) {
|
||||
pmix_list_remove_item (&pmix_server_globals.client_eventregs,
|
||||
®info->super);
|
||||
if (reginfo->peer == cd->peer) {
|
||||
pmix_list_remove_item(&pmix_server_globals.client_eventregs,
|
||||
®info->super);
|
||||
PMIX_RELEASE(reginfo);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
reply = PMIX_NEW(pmix_buffer_t);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
|
||||
static void deregevents_cbfunc (pmix_status_t status, void *cbdata)
|
||||
static void deregevents_cbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata;
|
||||
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server:deregevents_cbfunc called status = %d", status);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
@ -2209,10 +2246,14 @@ static void notifyerror_cbfunc (pmix_status_t status, void *cbdata)
|
||||
pmix_status_t rc;
|
||||
pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata;
|
||||
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server:notifyerror_cbfunc called status = %d", status);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT)))
|
||||
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
// send reply
|
||||
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
|
||||
PMIX_RELEASE(cd);
|
||||
@ -2387,6 +2428,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (PMIX_DEREGEVENTS_CMD == cmd) {
|
||||
PMIX_PEER_CADDY(cd, peer, tag);
|
||||
if (PMIX_SUCCESS != (rc = pmix_server_deregister_events(peer, buf, deregevents_cbfunc, cd))) {
|
||||
@ -2395,6 +2437,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (PMIX_NOTIFY_CMD == cmd) {
|
||||
PMIX_PEER_CADDY(cd, peer, tag);
|
||||
if (PMIX_SUCCESS != (rc = pmix_server_notify_error_client(peer, buf, notifyerror_cbfunc, cd))) {
|
||||
@ -2402,6 +2445,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
return PMIX_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
@ -215,6 +215,20 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* do not force dmodex logic for non-specific ranks
|
||||
* let return not found status instead of doing fence with
|
||||
* data exchange. User can make a decision to do such call getting
|
||||
* not found status
|
||||
*/
|
||||
if (PMIX_RANK_UNDEF == rank || PMIX_RANK_WILDCARD == rank) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"%s:%d not found data for namespace = %s, rank = %d "
|
||||
"(do not request resource manager server for non-specified rank)",
|
||||
pmix_globals.myid.nspace,
|
||||
pmix_globals.myid.rank, nspace, rank);
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* If we get here, then we don't have the data at this time. Check
|
||||
* to see if we already have a pending request for the data - if
|
||||
* we do, then we can just wait for it to arrive */
|
||||
@ -362,31 +376,52 @@ static pmix_status_t _satisfy_request(pmix_hash_table_t *ht, int rank,
|
||||
pmix_value_t *val;
|
||||
char *data;
|
||||
size_t sz;
|
||||
int cur_rank;
|
||||
int found = 0;
|
||||
pmix_buffer_t xfer, pbkt, *xptr;
|
||||
void *last;
|
||||
|
||||
/* check to see if this data already has been
|
||||
* obtained as a result of a prior direct modex request from
|
||||
* a remote peer, or due to data from a local client
|
||||
* having been committed */
|
||||
rc = pmix_hash_fetch(ht, rank, "modex", &val);
|
||||
if (PMIX_SUCCESS == rc && NULL != val) {
|
||||
/* the client is expecting this to arrive as a byte object
|
||||
* containing a buffer, so package it accordingly */
|
||||
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
|
||||
PMIX_CONSTRUCT(&xfer, pmix_buffer_t);
|
||||
xptr = &xfer;
|
||||
PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size);
|
||||
pmix_bfrop.pack(&pbkt, &xptr, 1, PMIX_BUFFER);
|
||||
xfer.base_ptr = NULL; // protect the passed data
|
||||
xfer.bytes_used = 0;
|
||||
PMIX_DESTRUCT(&xfer);
|
||||
PMIX_UNLOAD_BUFFER(&pbkt, data, sz);
|
||||
PMIX_DESTRUCT(&pbkt);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
/* pass it back */
|
||||
cbfunc(rc, data, sz, cbdata, relfn, data);
|
||||
return rc;
|
||||
cur_rank = rank;
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
rc = pmix_hash_fetch_by_key(ht, "modex", &cur_rank, &val, &last);
|
||||
} else {
|
||||
rc = pmix_hash_fetch(ht, cur_rank, "modex", &val);
|
||||
}
|
||||
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
|
||||
while (PMIX_SUCCESS == rc) {
|
||||
if (NULL != val) {
|
||||
pmix_bfrop.pack(&pbkt, &cur_rank, 1, PMIX_INT);
|
||||
/* the client is expecting this to arrive as a byte object
|
||||
* containing a buffer, so package it accordingly */
|
||||
PMIX_CONSTRUCT(&xfer, pmix_buffer_t);
|
||||
xptr = &xfer;
|
||||
PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
pmix_bfrop.pack(&pbkt, &xptr, 1, PMIX_BUFFER);
|
||||
xfer.base_ptr = NULL; // protect the passed data
|
||||
xfer.bytes_used = 0;
|
||||
PMIX_DESTRUCT(&xfer);
|
||||
found++;
|
||||
}
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
rc = pmix_hash_fetch_by_key(ht, NULL, &cur_rank, &val, &last);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
PMIX_UNLOAD_BUFFER(&pbkt, data, sz);
|
||||
PMIX_DESTRUCT(&pbkt);
|
||||
|
||||
if (found) {
|
||||
/* pass it back */
|
||||
cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data);
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
@ -132,7 +132,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
pmix_nspace_t *nptr;
|
||||
pmix_rank_info_t *info;
|
||||
pmix_dmdx_remote_t *dcd, *dcdnext;
|
||||
pmix_buffer_t pbkt;
|
||||
pmix_buffer_t *pbkt;
|
||||
pmix_value_t *val;
|
||||
char *data;
|
||||
size_t sz;
|
||||
@ -141,6 +141,12 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
info = peer->info;
|
||||
nptr = info->nptr;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"%s:%d EXECUTE COMMIT FOR %s:%d",
|
||||
pmix_globals.myid.nspace,
|
||||
pmix_globals.myid.rank,
|
||||
nptr->nspace, info->rank);
|
||||
|
||||
/* this buffer will contain one or more buffers, each
|
||||
* representing a different scope. These need to be locally
|
||||
* stored separately so we can provide required data based
|
||||
@ -162,17 +168,39 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
kp->key = strdup("modex");
|
||||
PMIX_VALUE_CREATE(kp->value, 1);
|
||||
kp->value->type = PMIX_BYTE_OBJECT;
|
||||
PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size);
|
||||
PMIX_RELEASE(b2);
|
||||
/* store it in the appropriate hash */
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
/* see if we already have info for this proc */
|
||||
if (PMIX_SUCCESS == pmix_hash_fetch(ht, info->rank, "modex", &val) && NULL != val) {
|
||||
/* create the new data storage */
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
kp->key = strdup("modex");
|
||||
PMIX_VALUE_CREATE(kp->value, 1);
|
||||
kp->value->type = PMIX_BYTE_OBJECT;
|
||||
/* get space for the new new data blob */
|
||||
kp->value->data.bo.bytes = (char*)malloc(b2->bytes_used + val->data.bo.size);
|
||||
memcpy(kp->value->data.bo.bytes, val->data.bo.bytes, val->data.bo.size);
|
||||
memcpy(kp->value->data.bo.bytes+val->data.bo.size, b2->base_ptr, b2->bytes_used);
|
||||
kp->value->data.bo.size = val->data.bo.size + b2->bytes_used;
|
||||
/* release the storage */
|
||||
PMIX_VALUE_FREE(val, 1);
|
||||
/* store it in the appropriate hash */
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
PMIX_RELEASE(kp); // maintain acctg
|
||||
} else {
|
||||
/* create a new kval to hold this data */
|
||||
kp = PMIX_NEW(pmix_kval_t);
|
||||
kp->key = strdup("modex");
|
||||
PMIX_VALUE_CREATE(kp->value, 1);
|
||||
kp->value->type = PMIX_BYTE_OBJECT;
|
||||
PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size);
|
||||
PMIX_RELEASE(b2);
|
||||
/* store it in the appropriate hash */
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
}
|
||||
PMIX_RELEASE(kp); // maintain acctg
|
||||
}
|
||||
PMIX_RELEASE(kp); // maintain acctg
|
||||
cnt = 1;
|
||||
}
|
||||
if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
|
||||
@ -191,16 +219,16 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
|
||||
if (dcd->cd->proc.rank == info->rank) {
|
||||
/* we can now fulfill this request - collect the
|
||||
* remote/global data from this proc */
|
||||
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
|
||||
pbkt = PMIX_NEW(pmix_buffer_t);
|
||||
/* get any remote contribution - note that there
|
||||
* may not be a contribution */
|
||||
if (PMIX_SUCCESS == pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val) &&
|
||||
NULL != val) {
|
||||
PMIX_LOAD_BUFFER(&pbkt, val->data.bo.bytes, val->data.bo.size);
|
||||
PMIX_LOAD_BUFFER(pbkt, val->data.bo.bytes, val->data.bo.size);
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
}
|
||||
PMIX_UNLOAD_BUFFER(&pbkt, data, sz);
|
||||
PMIX_DESTRUCT(&pbkt);
|
||||
PMIX_UNLOAD_BUFFER(pbkt, data, sz);
|
||||
PMIX_RELEASE(pbkt);
|
||||
/* execute the callback */
|
||||
dcd->cd->cbfunc(PMIX_SUCCESS, data, sz, dcd->cd->cbdata);
|
||||
if (NULL != data) {
|
||||
@ -805,7 +833,7 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer,
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the array of apps */
|
||||
/* unpack the array of directives */
|
||||
if (0 < ninfo) {
|
||||
PMIX_INFO_CREATE(info, ninfo);
|
||||
cnt=ninfo;
|
||||
@ -958,6 +986,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer,
|
||||
pmix_info_t *info = NULL;
|
||||
size_t ninfo, n;
|
||||
pmix_regevents_info_t *reginfo;
|
||||
pmix_notify_caddy_t *cd;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"recvd register events");
|
||||
@ -995,13 +1024,20 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer,
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server register events: calling host server reg events");
|
||||
/* call the local server */
|
||||
if(PMIX_SUCCESS != (rc = pmix_host_server.register_events(reginfo->info,
|
||||
reginfo->ninfo, cbfunc, cbdata)))
|
||||
{
|
||||
|
||||
if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(reginfo->info,
|
||||
reginfo->ninfo, cbfunc, cbdata))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server register events: host server reg events returned rc =%d", rc);
|
||||
}
|
||||
|
||||
/* check if any matching notifications have been cached */
|
||||
for (n=0; n < pmix_server_globals.notifications.size; n++) {
|
||||
if (NULL == (cd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_server_globals.notifications, n))) {
|
||||
break;
|
||||
}
|
||||
pmix_server_check_notifications(reginfo, cd);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"server register events: ninfo =%lu rc =%d", ninfo, rc);
|
||||
@ -1197,6 +1233,15 @@ static void ncon(pmix_notify_caddy_t *p)
|
||||
}
|
||||
static void ndes(pmix_notify_caddy_t *p)
|
||||
{
|
||||
if (NULL != p->procs) {
|
||||
PMIX_PROC_FREE(p->procs, p->nprocs);
|
||||
}
|
||||
if (NULL != p->error_procs) {
|
||||
PMIX_PROC_FREE(p->error_procs, p->error_nprocs);
|
||||
}
|
||||
if (NULL != p->info) {
|
||||
PMIX_INFO_FREE(p->info, p->ninfo);
|
||||
}
|
||||
if (NULL != p->buf) {
|
||||
PMIX_RELEASE(p->buf);
|
||||
}
|
||||
@ -1261,3 +1306,4 @@ static void regdes(pmix_regevents_info_t *p)
|
||||
PMIX_CLASS_INSTANCE(pmix_regevents_info_t,
|
||||
pmix_list_item_t,
|
||||
regcon, regdes);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
@ -15,6 +15,7 @@
|
||||
#include <private/autogen/config.h>
|
||||
#include <pmix/rename.h>
|
||||
#include <pmix/pmix_common.h>
|
||||
#include <src/class/pmix_ring_buffer.h>
|
||||
#include <pmix_server.h>
|
||||
#include "src/usock/usock.h"
|
||||
#include "src/util/hash.h"
|
||||
@ -157,6 +158,7 @@ typedef struct {
|
||||
int stop_thread[2]; // pipe used to stop listener thread
|
||||
pmix_buffer_t gdata; // cache of data given to me for passing to all clients
|
||||
pmix_list_t client_eventregs; // list of registered events per client.
|
||||
pmix_ring_buffer_t notifications; // ring buffer of pending notifications
|
||||
} pmix_server_globals_t;
|
||||
|
||||
#define PMIX_PEER_CADDY(c, p, t) \
|
||||
@ -272,6 +274,8 @@ pmix_status_t pmix_server_notify_error_client(pmix_peer_t *peer,
|
||||
pmix_buffer_t *buf,
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata);
|
||||
void pmix_server_check_notifications(pmix_regevents_info_t *reginfo,
|
||||
pmix_notify_caddy_t *cd);
|
||||
|
||||
void regevents_cbfunc (pmix_status_t status, void *cbdata);
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -137,10 +137,16 @@ const char* PMIx_Error_string(pmix_status_t errnum)
|
||||
return "PROC-ABORT-REQUESTED";
|
||||
case PMIX_ERR_PROC_ABORTED:
|
||||
return "PROC-ABORTED";
|
||||
case PMIX_ERR_DEBUGGER_RELEASE:
|
||||
return "DEBUGGER-RELEASE";
|
||||
case PMIX_ERR_SILENT:
|
||||
return "SILENT_ERROR";
|
||||
case PMIX_ERROR:
|
||||
return "ERROR";
|
||||
case PMIX_ERR_GRP_FOUND:
|
||||
return "GROUP-FOUND";
|
||||
case PMIX_ERR_DFLT_FOUND:
|
||||
return "DEFAULT-FOUND";
|
||||
case PMIX_SUCCESS:
|
||||
return "SUCCESS";
|
||||
|
||||
@ -162,15 +168,26 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
pmix_error_reg_info_t *errreg, *errdflt=NULL;
|
||||
pmix_info_t *iptr;
|
||||
|
||||
/* we will need to provide the errhandler reference id when
|
||||
* we provide the callback. Since the callback function doesn't
|
||||
* provide a param for that purpose, we have to add it to any
|
||||
* info array that came from the RM, so extend the array by 1 */
|
||||
PMIX_INFO_CREATE(iptr, ninfo+1);
|
||||
/* put the reference id in the first location */
|
||||
(void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN);
|
||||
iptr[0].value.type = PMIX_INT;
|
||||
/* we don't know the reference id yet, but we'll fill that in
|
||||
* later - for now, just copy the incoming info array across */
|
||||
if (NULL != info) {
|
||||
for (j=0; j < ninfo; j++) {
|
||||
PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type);
|
||||
}
|
||||
}
|
||||
|
||||
/* search our array of errhandlers for a match. We take any specific
|
||||
* error status first, then take the group of the incoming status next.
|
||||
* If neither of those have been registered, then use any default
|
||||
* errhandler - otherwise, ignore it */
|
||||
for (i = 0; i < pmix_globals.errregs.size; i++) {
|
||||
if (NULL == (errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item(&pmix_globals.errregs, i))) {
|
||||
continue;
|
||||
@ -194,7 +211,7 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!exact_match) {
|
||||
if (!exact_match && NULL != info) {
|
||||
/* if no exact match was found, then we will fire the errhandler
|
||||
* for any matching info key. This may be too lax and need to be adjusted
|
||||
* later */
|
||||
@ -217,22 +234,102 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
||||
PMIX_INFO_FREE(iptr, ninfo+1);
|
||||
}
|
||||
|
||||
pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err,
|
||||
/* lookup an errhandler during registration */
|
||||
pmix_status_t pmix_lookup_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
int *index)
|
||||
{
|
||||
int i;
|
||||
pmix_status_t rc = PMIX_ERR_NOT_FOUND;
|
||||
pmix_error_reg_info_t *errreg = NULL;
|
||||
int i, idflt=-1, igrp=-1;
|
||||
pmix_error_reg_info_t *errreg;
|
||||
size_t sz, n;
|
||||
char errgrp[PMIX_MAX_KEYLEN];
|
||||
bool exact_given = false;
|
||||
int given = -1;
|
||||
pmix_status_t status;
|
||||
char *grp;
|
||||
|
||||
for (i = 0; i < pmix_pointer_array_get_size(&pmix_globals.errregs) ; i++) {
|
||||
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i);
|
||||
if ((NULL != errreg) && (err == errreg->errhandler)) {
|
||||
*index = i;
|
||||
rc = PMIX_SUCCESS;
|
||||
break;
|
||||
/* scan the incoming specification to see if it is a general errhandler,
|
||||
* a group errhandler, or an error handler for a specific status. Only
|
||||
* one of these options can be specified! */
|
||||
if (NULL == info) {
|
||||
/* this is the general error handler */
|
||||
given = 0;
|
||||
} else {
|
||||
for (n=0; n < ninfo; n++) {
|
||||
if (0 == strncmp(info[n].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) {
|
||||
/* this is a specific errhandler */
|
||||
given = 1;
|
||||
status = info[n].value.data.integer;
|
||||
break;
|
||||
} else if (0 == strcmp(info[n].key, "pmix.errgroup")) {
|
||||
/* this is a group errhandler */
|
||||
given = 2;
|
||||
grp = info[n].value.data.string;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
|
||||
/* search our array of errhandlers for a match */
|
||||
for (i = 0; i < pmix_globals.errregs.size ; i++) {
|
||||
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i);
|
||||
if (NULL == errreg) {
|
||||
continue;
|
||||
}
|
||||
if (NULL == errreg->info) {
|
||||
/* this is the general errhandler - if they gave us
|
||||
* another general errhandler, then we should
|
||||
* replace it */
|
||||
if (0 == given) {
|
||||
*index = i;
|
||||
return PMIX_ERR_DFLT_FOUND;
|
||||
}
|
||||
/* save this spot as we will default to it if nothing else is found */
|
||||
idflt = i;
|
||||
continue;
|
||||
}
|
||||
if (0 == given) {
|
||||
/* they are looking for the general errhandler */
|
||||
continue;
|
||||
}
|
||||
/* if this registration is for a single specific errhandler, then
|
||||
* see if the incoming one matches */
|
||||
if (1 == given && errreg->sglhdlr) {
|
||||
for (sz=0; sz < errreg->ninfo; sz++) {
|
||||
if (0 == strncmp(errreg->info[sz].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) {
|
||||
if (status == errreg->info[sz].value.data.integer) {
|
||||
/* we have an exact match - return this errhandler and
|
||||
* let the caller know it was an exact match */
|
||||
*index = i;
|
||||
return PMIX_EXISTS;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (2 == given && !errreg->sglhdlr) {
|
||||
/* this registration is for a group, so check that case */
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* if we get here, then no match was found. If they
|
||||
* gave us a specific error, then we have to return not_found */
|
||||
if (exact_given) {
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* If we have a group match, then that takes precedence */
|
||||
if (0 <= igrp) {
|
||||
*index = igrp;
|
||||
return PMIX_ERR_GRP_FOUND;
|
||||
}
|
||||
|
||||
/* if we found a default errhandler, then use it */
|
||||
if (0 <= idflt) {
|
||||
*index = idflt;
|
||||
return PMIX_ERR_DFLT_FOUND;
|
||||
}
|
||||
|
||||
/* otherwise, it wasn't found */
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err,
|
||||
@ -242,25 +339,45 @@ pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err,
|
||||
int i;
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_error_reg_info_t *errreg;
|
||||
bool sglhdlr = false;
|
||||
|
||||
errreg = PMIX_NEW(pmix_error_reg_info_t);
|
||||
errreg->errhandler = err;
|
||||
errreg->ninfo = ninfo;
|
||||
if (0 != *index) {
|
||||
/* overwrite an existing entry */
|
||||
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, *index);
|
||||
if (NULL == errreg) {
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
errreg->errhandler = err;
|
||||
PMIX_INFO_FREE(errreg->info, errreg->ninfo);
|
||||
errreg->ninfo = ninfo;
|
||||
} else {
|
||||
errreg = PMIX_NEW(pmix_error_reg_info_t);
|
||||
errreg->errhandler = err;
|
||||
errreg->ninfo = ninfo;
|
||||
*index = pmix_pointer_array_add(&pmix_globals.errregs, errreg);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_add_errhandler index =%d", *index);
|
||||
if (*index < 0) {
|
||||
PMIX_RELEASE(errreg);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
}
|
||||
/* sadly, we have to copy the info objects as we cannot
|
||||
* rely on them to remain in-memory */
|
||||
if (NULL != info && 0 < ninfo) {
|
||||
PMIX_INFO_CREATE(errreg->info, ninfo);
|
||||
for (i=0; i < ninfo; i++) {
|
||||
/* if this is a specific, single errhandler, then
|
||||
* mark it accordingly */
|
||||
if (0 == strncmp(info[i].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) {
|
||||
errreg->sglhdlr = true;
|
||||
}
|
||||
(void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN);
|
||||
pmix_value_xfer(&errreg->info[i].value, &info[i].value);
|
||||
}
|
||||
}
|
||||
*index = pmix_pointer_array_add(&pmix_globals.errregs, errreg);
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_add_errhandler index =%d", *index);
|
||||
if (*index < 0) {
|
||||
PMIX_RELEASE(errreg);
|
||||
rc = PMIX_ERROR;
|
||||
}
|
||||
return rc;
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
pmix_status_t pmix_remove_errhandler(int errhandler_ref)
|
||||
|
@ -42,11 +42,19 @@ BEGIN_C_DECLS
|
||||
pmix_errhandler_invoke(e, NULL, 0, NULL, 0); \
|
||||
} while(0);
|
||||
|
||||
/* invoke the error handler that is registered against the given
|
||||
* status, passing it the provided info on the procs that were
|
||||
* affected, plus any additional info provided by the server */
|
||||
PMIX_DECLSPEC void pmix_errhandler_invoke(pmix_status_t status,
|
||||
pmix_proc_t procs[], size_t nprocs,
|
||||
pmix_info_t info[], size_t ninfo);
|
||||
|
||||
PMIX_DECLSPEC pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err,
|
||||
/* lookup the errhandler registered against the given status. If there
|
||||
* is none, but an errhandler has been registered against the group
|
||||
* that this status belongs to, then return that errhandler. If neither
|
||||
* of those is true, but a general errhandler has been registered, then
|
||||
* return that errhandler. Otherwise, return NOT_FOUND */
|
||||
PMIX_DECLSPEC pmix_status_t pmix_lookup_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
int *index);
|
||||
|
||||
PMIX_DECLSPEC pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err,
|
||||
|
@ -67,33 +67,28 @@ int pmix_hash_store(pmix_hash_table_t *table,
|
||||
{
|
||||
pmix_proc_data_t *proc_data;
|
||||
uint64_t id;
|
||||
pmix_kval_t *kv;
|
||||
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:STORE rank %d key %s",
|
||||
rank, kin->key);
|
||||
|
||||
if (PMIX_RANK_WILDCARD == rank) {
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
id = UINT64_MAX;
|
||||
} else {
|
||||
id = (uint64_t)rank;
|
||||
}
|
||||
|
||||
/* lookup the proc data object for this proc - create
|
||||
* it if we don't */
|
||||
* it if we don't already have it */
|
||||
if (NULL == (proc_data = lookup_proc(table, id, true))) {
|
||||
return PMIX_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* see if we already have this key in the data - means we are updating
|
||||
* a pre-existing value
|
||||
*/
|
||||
kv = lookup_keyval(&proc_data->data, kin->key);
|
||||
if (NULL != kv) {
|
||||
pmix_list_remove_item(&proc_data->data, &kv->super);
|
||||
PMIX_RELEASE(kv);
|
||||
}
|
||||
/* store the new value */
|
||||
/* add the new value - note that if the user is updating
|
||||
* a value, the ordering of the stored blobs will cause
|
||||
* an update to eventually occur. In other words, the
|
||||
* receiving process will first unpack the "old" data,
|
||||
* and then unpack the update and overwrite it */
|
||||
PMIX_RETAIN(kin);
|
||||
pmix_list_append(&proc_data->data, &kin->super);
|
||||
|
||||
@ -103,46 +98,131 @@ int pmix_hash_store(pmix_hash_table_t *table,
|
||||
pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank,
|
||||
const char *key, pmix_value_t **kvs)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_proc_data_t *proc_data;
|
||||
pmix_kval_t *hv;
|
||||
uint64_t id;
|
||||
pmix_status_t rc;
|
||||
char *node;
|
||||
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH rank %d key %s",
|
||||
rank, (NULL == key) ? "NULL" : key);
|
||||
|
||||
/* NULL keys are not supported */
|
||||
if (NULL == key) {
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
if (PMIX_RANK_WILDCARD == rank) {
|
||||
id = UINT64_MAX;
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
/* PMIX_RANK_UNDEF should return following statuses
|
||||
* PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_SUCCESS
|
||||
* special logic is basing on these statuses on a client and a server */
|
||||
rc = pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data, (void**)&node);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH proc data for rank %d not found",
|
||||
rank);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* specified rank can return following statuses
|
||||
* PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_ERR_NOT_FOUND | PMIX_SUCCESS
|
||||
* special logic is basing on these statuses on a client and a server */
|
||||
id = (uint64_t)rank;
|
||||
}
|
||||
|
||||
/* lookup the proc data object for this proc */
|
||||
if (NULL == (proc_data = lookup_proc(table, id, false))) {
|
||||
while (PMIX_SUCCESS == rc) {
|
||||
proc_data = lookup_proc(table, id, false);
|
||||
if (NULL == proc_data) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH proc data for rank %d not found",
|
||||
rank);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* if the key is NULL, then the user wants -all- data
|
||||
* put by the specified rank */
|
||||
if (NULL == key) {
|
||||
/* we will return the data as an array of pmix_info_t
|
||||
* in the kvs pmix_value_t */
|
||||
|
||||
} else {
|
||||
/* find the value from within this proc_data object */
|
||||
hv = lookup_keyval(&proc_data->data, key);
|
||||
if (hv) {
|
||||
/* create the copy */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
break;
|
||||
} else if (PMIX_RANK_UNDEF != rank) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH data for key %s not found", key);
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
rc = pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data, node, (void**)&node);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH data for key %s not found", key);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
pmix_status_t pmix_hash_fetch_by_key(pmix_hash_table_t *table, const char *key,
|
||||
int *rank, pmix_value_t **kvs, void **last)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_proc_data_t *proc_data;
|
||||
pmix_kval_t *hv;
|
||||
uint64_t id;
|
||||
char *node;
|
||||
static const char *key_r = NULL;
|
||||
|
||||
if (key == NULL && (node = *last) == NULL) {
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (key == NULL && key_r == NULL) {
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (key) {
|
||||
rc = pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data, (void**)&node);
|
||||
key_r = key;
|
||||
} else {
|
||||
rc = pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data, node, (void**)&node);
|
||||
}
|
||||
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH BY KEY rank %d key %s",
|
||||
(int)id, key_r);
|
||||
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH proc data for rank %d not found",
|
||||
rank);
|
||||
"HASH:FETCH proc data for key %s not found",
|
||||
key_r);
|
||||
return PMIX_ERR_PROC_ENTRY_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* find the value from within this proc_data object */
|
||||
if (NULL == (hv = lookup_keyval(&proc_data->data, key))) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"HASH:FETCH data for key %s not found", key);
|
||||
hv = lookup_keyval(&proc_data->data, key_r);
|
||||
if (hv) {
|
||||
/* create the copy */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* create the copy */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
*rank = (int)id;
|
||||
*last = node;
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
@ -150,6 +230,7 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank,
|
||||
int pmix_hash_remove_data(pmix_hash_table_t *table,
|
||||
int rank, const char *key)
|
||||
{
|
||||
pmix_status_t rc = PMIX_SUCCESS;
|
||||
pmix_proc_data_t *proc_data;
|
||||
pmix_kval_t *kv;
|
||||
uint64_t id;
|
||||
@ -157,11 +238,11 @@ int pmix_hash_remove_data(pmix_hash_table_t *table,
|
||||
|
||||
/* if the rank is wildcard, we want to apply this to
|
||||
* all rank entries */
|
||||
if (PMIX_RANK_WILDCARD == rank) {
|
||||
if (PMIX_RANK_UNDEF == rank) {
|
||||
id = UINT64_MAX;
|
||||
if (PMIX_SUCCESS == pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data,
|
||||
(void**)&node)) {
|
||||
rc = pmix_hash_table_get_first_key_uint64(table, &id,
|
||||
(void**)&proc_data, (void**)&node);
|
||||
while (PMIX_SUCCESS == rc) {
|
||||
if (NULL != proc_data) {
|
||||
if (NULL == key) {
|
||||
PMIX_RELEASE(proc_data);
|
||||
@ -175,23 +256,8 @@ int pmix_hash_remove_data(pmix_hash_table_t *table,
|
||||
}
|
||||
}
|
||||
}
|
||||
while (PMIX_SUCCESS == pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data,
|
||||
node, (void**)&node)) {
|
||||
if (NULL != proc_data) {
|
||||
if (NULL == key) {
|
||||
PMIX_RELEASE(proc_data);
|
||||
} else {
|
||||
PMIX_LIST_FOREACH(kv, &proc_data->data, pmix_kval_t) {
|
||||
if (0 == strcmp(key, kv->key)) {
|
||||
pmix_list_remove_item(&proc_data->data, &kv->super);
|
||||
PMIX_RELEASE(kv);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
rc = pmix_hash_table_get_next_key_uint64(table, &id,
|
||||
(void**)&proc_data, node, (void**)&node);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,11 +27,19 @@ BEGIN_C_DECLS
|
||||
pmix_status_t pmix_hash_store(pmix_hash_table_t *table,
|
||||
int rank, pmix_kval_t *kv);
|
||||
|
||||
/* Fetch the value for a specified key from within
|
||||
/* Fetch the value for a specified key and rank from within
|
||||
* the given hash_table */
|
||||
pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank,
|
||||
const char *key, pmix_value_t **kvs);
|
||||
|
||||
/* Fetch the value for a specified key from within
|
||||
* the given hash_table
|
||||
* It gets the next portion of data from table, where matching key.
|
||||
* To get the first data from table, function is called with key parameter as string.
|
||||
* Remaining data from table are obtained by calling function with a null pointer for the key parameter.*/
|
||||
pmix_status_t pmix_hash_fetch_by_key(pmix_hash_table_t *table, const char *key,
|
||||
int *rank, pmix_value_t **kvs, void **last);
|
||||
|
||||
/* remove the specified key-value from the given hash_table.
|
||||
* A NULL key will result in removal of all data for the
|
||||
* given rank. A rank of PMIX_RANK_WILDCARD indicates that
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <pmix/pmix_common.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
@ -468,6 +469,56 @@ void pmix_output_set_output_file_info(const char *dir,
|
||||
}
|
||||
}
|
||||
|
||||
void pmix_output_hexdump(int verbose_level, int output_id,
|
||||
void *ptr, int buflen)
|
||||
{
|
||||
unsigned char *buf = (unsigned char *) ptr;
|
||||
char out_buf[120];
|
||||
int ret = 0;
|
||||
int out_pos = 0;
|
||||
int i, j;
|
||||
|
||||
if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS &&
|
||||
info[output_id].ldi_verbose_level >= verbose_level) {
|
||||
pmix_output_verbose(verbose_level, output_id, "dump data at %p %d bytes\n", ptr, buflen);
|
||||
for (i = 0; i < buflen; i += 16) {
|
||||
out_pos = 0;
|
||||
ret = sprintf(out_buf + out_pos, "%06x: ", i);
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
for (j = 0; j < 16; j++) {
|
||||
if (i + j < buflen)
|
||||
ret = sprintf(out_buf + out_pos, "%02x ",
|
||||
buf[i + j]);
|
||||
else
|
||||
ret = sprintf(out_buf + out_pos, " ");
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
}
|
||||
ret = sprintf(out_buf + out_pos, " ");
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
for (j = 0; j < 16; j++)
|
||||
if (i + j < buflen) {
|
||||
ret = sprintf(out_buf + out_pos, "%c",
|
||||
isprint(buf[i+j]) ?
|
||||
buf[i + j] :
|
||||
'.');
|
||||
if (ret < 0)
|
||||
return;
|
||||
out_pos += ret;
|
||||
}
|
||||
ret = sprintf(out_buf + out_pos, "\n");
|
||||
if (ret < 0)
|
||||
return;
|
||||
pmix_output_verbose(verbose_level, output_id, "%s", out_buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Shut down the output stream system
|
||||
|
@ -507,6 +507,12 @@ struct pmix_output_stream_t {
|
||||
char **olddir,
|
||||
char **oldprefix);
|
||||
|
||||
/**
|
||||
* Same as pmix_output_verbose(), but pointer to buffer and size.
|
||||
*/
|
||||
PMIX_DECLSPEC void pmix_output_hexdump(int verbose_level, int output_id,
|
||||
void *ptr, int buflen);
|
||||
|
||||
#if PMIX_ENABLE_DEBUG
|
||||
/**
|
||||
* Main macro for use in sending debugging output to output streams;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
|
@ -211,6 +211,11 @@ static void reg_thread(int sd, short args, void *cbdata)
|
||||
int rc;
|
||||
opal_pmix120_etracker_t *trk;
|
||||
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s register complete with status %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
||||
cd->status);
|
||||
|
||||
/* convert the status */
|
||||
rc = pmix120_convert_rc(cd->status);
|
||||
|
||||
@ -251,6 +256,11 @@ static void pmix120_register_errhandler(opal_list_t *info,
|
||||
size_t n;
|
||||
opal_value_t *ival;
|
||||
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s REGISTER ERRHDNLR INFO %s",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
||||
(NULL == info) ? "NULL" : "NOT-NULL");
|
||||
|
||||
/* setup a caddy for the operation so we can free
|
||||
* the array when done */
|
||||
cd = OBJ_NEW(pmix120_opcaddy_t);
|
||||
@ -266,7 +276,8 @@ static void pmix120_register_errhandler(opal_list_t *info,
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
|
||||
(void)strncpy(cd->info[n].key, ival->key, PMIX_MAX_KEYLEN);
|
||||
pmix120_value_load(&cd->info[n].value, ival);
|
||||
cd->info[n].value.type = PMIX_INT;
|
||||
cd->info[n].value.data.status = pmix120_convert_opalrc(ival->data.integer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -248,8 +248,14 @@ opal_err2str(int errnum, const char **errmsg)
|
||||
case OPAL_ERR_SERVER_NOT_AVAIL:
|
||||
retval = "Server not available";
|
||||
break;
|
||||
case OPAL_ERR_IN_PROCESS:
|
||||
retval = "Operation in process";
|
||||
break;
|
||||
case OPAL_ERR_DEBUGGER_RELEASE:
|
||||
retval = "Release debugger";
|
||||
break;
|
||||
default:
|
||||
retval = NULL;
|
||||
retval = "UNRECOGNIZED";
|
||||
}
|
||||
|
||||
*errmsg = retval;
|
||||
|
@ -540,6 +540,7 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata)
|
||||
sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
|
||||
sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
sig->signature[0].vpid = ORTE_VPID_WILDCARD;
|
||||
sig->sz = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buffer))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buffer);
|
||||
@ -693,9 +694,6 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* need to init_after_spawn for debuggers */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS);
|
||||
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(caddy);
|
||||
}
|
||||
|
@ -110,8 +110,8 @@ BEGIN_C_DECLS
|
||||
/* show help */
|
||||
#define ORTE_RML_TAG_SHOW_HELP 36
|
||||
|
||||
/* debugger release */
|
||||
#define ORTE_RML_TAG_DEBUGGER_RELEASE 37
|
||||
/* error notifications */
|
||||
#define ORTE_RML_TAG_NOTIFICATION 37
|
||||
|
||||
/* bootstrap */
|
||||
#define ORTE_RML_TAG_BOOTSTRAP 38
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
@ -197,6 +197,7 @@ int pmix_server_init(void)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
OBJ_CONSTRUCT(&orte_pmix_server_globals.notifications, opal_list_t);
|
||||
|
||||
/* setup recv for direct modex requests */
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX,
|
||||
@ -214,6 +215,10 @@ int pmix_server_init(void)
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT,
|
||||
ORTE_RML_PERSISTENT, pmix_server_keyval_client, NULL);
|
||||
|
||||
/* setup recv for notifications */
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION,
|
||||
ORTE_RML_PERSISTENT, pmix_server_notify, NULL);
|
||||
|
||||
/* ensure the PMIx server uses the proper rendezvous directory */
|
||||
opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ);
|
||||
|
||||
@ -348,12 +353,16 @@ void pmix_server_finalize(void)
|
||||
/* stop receives */
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX_RESP);
|
||||
|
||||
/* cleanup collectives */
|
||||
OBJ_DESTRUCT(&orte_pmix_server_globals.reqs);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT);
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION);
|
||||
|
||||
/* shutdown the local server */
|
||||
opal_pmix.server_finalize();
|
||||
|
||||
/* cleanup collectives */
|
||||
OBJ_DESTRUCT(&orte_pmix_server_globals.reqs);
|
||||
OPAL_LIST_DESTRUCT(&orte_pmix_server_globals.notifications);
|
||||
}
|
||||
|
||||
static void send_error(int status, opal_process_name_t *idreq,
|
||||
@ -634,6 +643,7 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
|
||||
static void opcon(orte_pmix_server_op_caddy_t *p)
|
||||
{
|
||||
p->procs = NULL;
|
||||
p->eprocs = NULL;
|
||||
p->info = NULL;
|
||||
p->cbdata = NULL;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -111,24 +111,204 @@ int pmix_server_abort_fn(opal_process_name_t *proc, void *server_object,
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static void _register_events(int sd, short args, void *cbdata)
|
||||
{
|
||||
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
|
||||
opal_value_t *info;
|
||||
|
||||
/* the OPAL layer "owns" the list, but let's deconstruct it
|
||||
* here so we don't have to duplicate the data */
|
||||
while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) {
|
||||
/* don't worry about duplication as the underlying host
|
||||
* server is already protecting us from it */
|
||||
opal_list_append(&orte_pmix_server_globals.notifications, &info->super);
|
||||
}
|
||||
|
||||
if (NULL != cd->cbfunc) {
|
||||
cd->cbfunc(ORTE_SUCCESS, cd->cbdata);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
|
||||
/* hook for the local PMIX server to pass event registrations
|
||||
* up to us - we will assume the responsibility for providing
|
||||
* notifications for registered events */
|
||||
int pmix_server_register_events_fn(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
/* for now, just execute the cbfunc */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(OPAL_SUCCESS, cbdata);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
/* need to thread-shift this request as we are going
|
||||
* to access our global list of registered events */
|
||||
ORTE_PMIX_OPERATION(NULL, info, _register_events, cbfunc, cbdata);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void _deregister_events(int sd, short args, void *cbdata)
|
||||
{
|
||||
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
|
||||
opal_value_t *info, *iptr, *nptr;
|
||||
|
||||
/* the OPAL layer "owns" the list, but let's deconstruct it
|
||||
* here for consistency */
|
||||
while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) {
|
||||
/* search for matching requests */
|
||||
OPAL_LIST_FOREACH_SAFE(iptr, nptr, &orte_pmix_server_globals.notifications, opal_value_t) {
|
||||
if (OPAL_EQUAL == opal_dss.compare(iptr, info, OPAL_VALUE)) {
|
||||
opal_list_remove_item(&orte_pmix_server_globals.notifications, &iptr->super);
|
||||
OBJ_RELEASE(iptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(info);
|
||||
}
|
||||
|
||||
if (NULL != cd->cbfunc) {
|
||||
cd->cbfunc(ORTE_SUCCESS, cd->cbdata);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
/* hook for the local PMIX server to pass event deregistrations
|
||||
* up to us */
|
||||
int pmix_server_deregister_events_fn(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
/* for now, just execute the cbfunc */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(OPAL_SUCCESS, cbdata);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
/* need to thread-shift this request as we are going
|
||||
* to access our global list of registered events */
|
||||
ORTE_PMIX_OPERATION(NULL, info, _deregister_events, cbfunc, cbdata);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void _notify_release(int status, void *cbdata)
|
||||
{
|
||||
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
|
||||
|
||||
if (NULL != cd->procs) {
|
||||
OPAL_LIST_RELEASE(cd->procs);
|
||||
}
|
||||
if (NULL != cd->eprocs) {
|
||||
OPAL_LIST_RELEASE(cd->eprocs);
|
||||
}
|
||||
if (NULL != cd->info) {
|
||||
OPAL_LIST_RELEASE(cd->info);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
void pmix_server_notify(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tg, void *cbdata)
|
||||
{
|
||||
opal_list_t *procs = NULL, *eprocs = NULL, *info = NULL;
|
||||
int cnt, rc, ret, nprocs, n;
|
||||
opal_namelist_t *nm;
|
||||
opal_value_t *val;
|
||||
orte_pmix_server_op_caddy_t *cd;
|
||||
|
||||
opal_output_verbose(2, orte_pmix_server_globals.output,
|
||||
"%s Notification received",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* unpack the status */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* unpack the target procs that are to be notified */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any were provided, add them to the list */
|
||||
if (0 < nprocs) {
|
||||
procs = OBJ_NEW(opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
opal_list_append(procs, &nm->super);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nm->name, &cnt, OPAL_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the procs that were impacted by the error */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any were provided, add them to the list */
|
||||
if (0 < nprocs) {
|
||||
eprocs = OBJ_NEW(opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
opal_list_append(eprocs, &nm->super);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nm->name, &cnt, OPAL_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
OPAL_LIST_RELEASE(eprocs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the infos that were provided */
|
||||
cnt = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any were provided, add them to the list */
|
||||
if (0 < nprocs) {
|
||||
info = OBJ_NEW(opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
opal_list_append(info, &val->super);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &val, &cnt, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
if (NULL != eprocs) {
|
||||
OPAL_LIST_RELEASE(eprocs);
|
||||
}
|
||||
OPAL_LIST_RELEASE(info);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cd = OBJ_NEW(orte_pmix_server_op_caddy_t);
|
||||
cd->procs = procs;
|
||||
cd->eprocs = eprocs;
|
||||
cd->info = info;
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.server_notify_error(ret, procs, eprocs, info, _notify_release, cd))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != procs) {
|
||||
OPAL_LIST_RELEASE(procs);
|
||||
}
|
||||
if (NULL != eprocs) {
|
||||
OPAL_LIST_RELEASE(eprocs);
|
||||
}
|
||||
if (NULL != info) {
|
||||
OPAL_LIST_RELEASE(info);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
}
|
||||
|
@ -75,6 +75,7 @@ typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
opal_list_t *procs;
|
||||
opal_list_t *eprocs;
|
||||
opal_list_t *info;
|
||||
opal_pmix_op_cbfunc_t cbfunc;
|
||||
void *cbdata;
|
||||
@ -175,6 +176,10 @@ extern void pmix_server_keyval_client(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tg, void *cbdata);
|
||||
|
||||
extern void pmix_server_notify(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tg, void *cbdata);
|
||||
|
||||
/* exposed shared variables */
|
||||
typedef struct {
|
||||
bool initialized;
|
||||
@ -186,6 +191,7 @@ typedef struct {
|
||||
char *server_uri;
|
||||
bool wait_for_server;
|
||||
orte_process_name_t server;
|
||||
opal_list_t notifications;
|
||||
} pmix_server_globals_t;
|
||||
|
||||
extern pmix_server_globals_t orte_pmix_server_globals;
|
||||
|
@ -2243,7 +2243,7 @@ static void run_debugger(char *basename, opal_cmd_line_t *cmd_line,
|
||||
* - fills in the table MPIR_proctable, and sets MPIR_proctable_size
|
||||
* - sets MPIR_debug_state to MPIR_DEBUG_SPAWNED ( = 1)
|
||||
* - calls MPIR_Breakpoint() which the debugger will have a
|
||||
* breakpoint on.
|
||||
* breakpoint on.
|
||||
*
|
||||
* b) Applications start and then spin until MPIR_debug_gate is set
|
||||
* non-zero by the debugger.
|
||||
@ -2382,8 +2382,8 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
|
||||
return;
|
||||
}
|
||||
strncpy(MPIR_attach_fifo, attach_fifo, MPIR_MAX_PATH_LENGTH - 1);
|
||||
free(attach_fifo);
|
||||
open_fifo();
|
||||
free(attach_fifo);
|
||||
open_fifo();
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2511,6 +2511,58 @@ static void setup_debugger_job(void)
|
||||
|
||||
static bool mpir_breakpoint_fired = false;
|
||||
|
||||
static void _send_notification(void)
|
||||
{
|
||||
opal_buffer_t buf;
|
||||
int status = OPAL_ERR_DEBUGGER_RELEASE;
|
||||
orte_grpcomm_signature_t sig;
|
||||
int rc;
|
||||
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
||||
/* pack the debugger_attached status */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
status = 0;
|
||||
|
||||
/* notify all procs */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* all procs are impacted */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* no further info to provide */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* xcast it to everyone */
|
||||
OBJ_CONSTRUCT(&sig, orte_grpcomm_signature_t);
|
||||
sig.signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
|
||||
sig.signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
sig.signature[0].vpid = ORTE_VPID_WILDCARD;
|
||||
sig.sz = 1;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(&sig, ORTE_RML_TAG_NOTIFICATION, &buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
OBJ_DESTRUCT(&sig);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialization of data structures for running under a debugger
|
||||
* using the MPICH/TotalView parallel debugger interface. This stage
|
||||
@ -2527,8 +2579,6 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
orte_proc_t *proc;
|
||||
orte_app_context_t *appctx;
|
||||
orte_vpid_t i, j;
|
||||
opal_buffer_t *buf;
|
||||
int rc, k;
|
||||
char **aliases, *aptr;
|
||||
|
||||
/* if we couldn't get thru the mapper stage, we might
|
||||
@ -2548,31 +2598,8 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
/* trigger the debugger */
|
||||
MPIR_Breakpoint();
|
||||
|
||||
/* send a message to rank=0 of any app jobs to release it */
|
||||
for (k=1; k < orte_job_data->size; k++) {
|
||||
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, k))) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
|
||||
/* ignore debugger jobs */
|
||||
continue;
|
||||
}
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 0)) ||
|
||||
ORTE_PROC_STATE_UNTERMINATED < proc->state ||
|
||||
NULL == proc->rml_uri) {
|
||||
/* proc is already dead or never registered with us (so we don't have
|
||||
* contact info for him)
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
buf = OBJ_NEW(opal_buffer_t); /* don't need anything in this */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&proc->name, buf,
|
||||
ORTE_RML_TAG_DEBUGGER_RELEASE,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
}
|
||||
/* notify all procs that the debugger is ready */
|
||||
_send_notification();
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2665,35 +2692,8 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
|
||||
/* trigger the debugger */
|
||||
MPIR_Breakpoint();
|
||||
|
||||
/* send a message to rank=0 of any app jobs to release it */
|
||||
for (k=1; k < orte_job_data->size; k++) {
|
||||
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, k))) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
|
||||
/* ignore debugger jobs */
|
||||
continue;
|
||||
}
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 0)) ||
|
||||
ORTE_PROC_STATE_UNTERMINATED < proc->state ||
|
||||
NULL == proc->rml_uri) {
|
||||
/* proc is already dead or never registered with us (so we don't have
|
||||
* contact info for him)
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(2, orte_debug_output,
|
||||
"%s sending debugger release to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc->name));
|
||||
buf = OBJ_NEW(opal_buffer_t); /* don't need anything in this */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&proc->name, buf,
|
||||
ORTE_RML_TAG_DEBUGGER_RELEASE,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
}
|
||||
/* notify all procs that the debugger is ready */
|
||||
_send_notification();
|
||||
} else {
|
||||
/* if I am launching debugger daemons, then I need to do so now
|
||||
* that the job has been started and I know which nodes have
|
||||
@ -2727,14 +2727,14 @@ static void orte_debugger_detached(int fd, short event, void *cbdata)
|
||||
static void open_fifo (void)
|
||||
{
|
||||
if (attach_fd > 0) {
|
||||
close(attach_fd);
|
||||
close(attach_fd);
|
||||
}
|
||||
|
||||
attach_fd = open(MPIR_attach_fifo, O_RDONLY | O_NONBLOCK, 0);
|
||||
if (attach_fd < 0) {
|
||||
opal_output(0, "%s unable to open debugger attach fifo",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return;
|
||||
opal_output(0, "%s unable to open debugger attach fifo",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set this fd to be close-on-exec so that children don't see it */
|
||||
@ -2747,9 +2747,9 @@ static void open_fifo (void)
|
||||
}
|
||||
|
||||
opal_output_verbose(2, orte_debug_output,
|
||||
"%s Monitoring debugger attach fifo %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
MPIR_attach_fifo);
|
||||
"%s Monitoring debugger attach fifo %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
MPIR_attach_fifo);
|
||||
attach = (opal_event_t*)malloc(sizeof(opal_event_t));
|
||||
opal_event_set(orte_event_base, attach, attach_fd, OPAL_EV_READ, attach_debugger, attach);
|
||||
|
||||
@ -2766,16 +2766,16 @@ static void attach_debugger(int fd, short event, void *arg)
|
||||
|
||||
if (fifo_active) {
|
||||
attach = (opal_event_t*)arg;
|
||||
fifo_active = false;
|
||||
fifo_active = false;
|
||||
|
||||
rc = read(attach_fd, &fifo_cmd, sizeof(fifo_cmd));
|
||||
if (!rc) {
|
||||
if (!rc) {
|
||||
/* release the current event */
|
||||
opal_event_free(attach);
|
||||
/* reopen device to clear hangup */
|
||||
open_fifo();
|
||||
return;
|
||||
}
|
||||
/* reopen device to clear hangup */
|
||||
open_fifo();
|
||||
return;
|
||||
}
|
||||
if (1 != fifo_cmd) {
|
||||
/* ignore the cmd */
|
||||
fifo_active = true;
|
||||
@ -2805,7 +2805,7 @@ static void attach_debugger(int fd, short event, void *arg)
|
||||
* data is already available, so we only need to
|
||||
* check to see if we should spawn any daemons
|
||||
*/
|
||||
if ('\0' != MPIR_executable_path[0] || NULL != orte_debugger_test_daemon) {
|
||||
if ('\0' != MPIR_executable_path[0] || NULL != orte_debugger_test_daemon) {
|
||||
opal_output_verbose(2, orte_debug_output,
|
||||
"%s Spawning debugger daemons %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user