diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index b71a6e8323..9e5c8b3ea3 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -91,7 +91,7 @@ OMPI_DECLSPEC void __opal_attribute_noreturn__ #define OMPI_ERROR_LOG ORTE_ERROR_LOG /* Init and finalize objects and operations */ -#define ompi_rte_init(a, b) orte_init(a, b, ORTE_PROC_MPI) +OMPI_DECLSPEC int ompi_rte_init(int *pargc, char ***pargv); #define ompi_rte_finalize() orte_finalize() OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void); diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index c82b25ce40..1a678380f0 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -52,6 +52,79 @@ extern ompi_rte_orte_component_t mca_rte_orte_component; +typedef struct { + volatile bool active; + int status; + int errhandler; +} errhandler_t; + +static void register_cbfunc(int status, int errhndler, void *cbdata) +{ + errhandler_t *cd = (errhandler_t*)cbdata; + cd->status = status; + cd->errhandler = errhndler; + cd->active = false; +} + +static volatile bool wait_for_release = true; +static int errhandler = -1; + +static void notify_cbfunc(int status, + opal_list_t *procs, + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, + void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(cbdata); + } + wait_for_release = false; +} + + +int ompi_rte_init(int *pargc, char ***pargv) +{ + int rc; + opal_list_t info; + opal_value_t val; + errhandler_t cd; + + if (ORTE_SUCCESS != (rc = orte_init(pargc, pargv, ORTE_PROC_MPI))) { + return rc; + } + + if (!orte_standalone_operation) { + /* register to receive any debugger release */ + OBJ_CONSTRUCT(&info, opal_list_t); + OBJ_CONSTRUCT(&val, opal_value_t); + val.key = strdup(OPAL_PMIX_ERROR_NAME); + val.type = OPAL_INT; + val.data.integer = OPAL_ERR_DEBUGGER_RELEASE; + opal_list_append(&info, &val.super); + cd.status = ORTE_ERROR; + cd.errhandler = -1; + cd.active = true; + + opal_pmix.register_errhandler(&info, notify_cbfunc, register_cbfunc, &cd); + + /* let the MPI progress engine run while we wait for + * registration to complete */ + OMPI_WAIT_FOR_COMPLETION(cd.active); + /* safely deconstruct the list */ + opal_list_remove_first(&info); + OBJ_DESTRUCT(&val); + OBJ_DESTRUCT(&info); + if (OPAL_SUCCESS != cd.status) { + /* ouch - we are doomed */ + ORTE_ERROR_LOG(cd.status); + return OMPI_ERROR; + } + errhandler = cd.errhandler; + } + + return OMPI_SUCCESS; +} + void ompi_rte_abort(int error_code, char *fmt, ...) { va_list arglist; @@ -100,10 +173,10 @@ void ompi_rte_abort(int error_code, char *fmt, ...) * attaching debuggers -- see big comment in * orte/tools/orterun/debuggers.c explaining the two scenarios. */ + void ompi_rte_wait_for_debugger(void) { int debugger; - orte_rml_recv_cb_t xfer; /* See lengthy comment in orte/tools/orterun/debuggers.c about orte_in_parallel_debugger */ @@ -117,12 +190,12 @@ void ompi_rte_wait_for_debugger(void) /* if not, just return */ return; } - /* if we are being debugged, then we need to find * the correct plug-ins */ ompi_debugger_setup_dlls(); + /* wait for the debugger to attach */ if (orte_standalone_operation) { /* spin until debugger attaches and releases us */ while (MPIR_debug_gate == 0) { @@ -133,23 +206,9 @@ void ompi_rte_wait_for_debugger(void) #endif } } else { - /* only the rank=0 proc waits for either a message from the - * HNP or for the debugger to attach - everyone else will just - * spin in * the grpcomm barrier in ompi_mpi_init until rank=0 - * joins them. - */ - if (0 != ORTE_PROC_MY_NAME->vpid) { - return; - } - - /* VPID 0 waits for a message from the HNP */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(OMPI_NAME_WILDCARD, - ORTE_RML_TAG_DEBUGGER_RELEASE, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - /* let the MPI progress engine run while we wait */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); + /* now wait for the notification to occur */ + OMPI_WAIT_FOR_COMPLETION(wait_for_release); + /* deregister the errhandler */ + opal_pmix.deregister_errhandler(errhandler, NULL, NULL); } } diff --git a/opal/dss/dss_compare.c b/opal/dss/dss_compare.c index 2b7e27bcfe..20ae1f0fe7 100644 --- a/opal/dss/dss_compare.c +++ b/opal/dss/dss_compare.c @@ -10,9 +10,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -449,3 +449,12 @@ int opal_dss_compare_jobid(opal_jobid_t *value1, return OPAL_EQUAL; } +int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type) +{ + if (*value1 > *value2) return OPAL_VALUE1_GREATER; + + if (*value2 > *value1) return OPAL_VALUE2_GREATER; + + return OPAL_EQUAL; +} + diff --git a/opal/dss/dss_copy.c b/opal/dss/dss_copy.c index f0d1544ba5..839ddc648b 100644 --- a/opal/dss/dss_copy.c +++ b/opal/dss/dss_copy.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -61,6 +61,7 @@ int opal_dss_std_copy(void **dest, void *src, opal_data_type_t type) case OPAL_INT: case OPAL_UINT: + case OPAL_STATUS: datasize = sizeof(int); break; diff --git a/opal/dss/dss_internal.h b/opal/dss/dss_internal.h index b00a37ffd0..2c1e3af73b 100644 --- a/opal/dss/dss_internal.h +++ b/opal/dss/dss_internal.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -332,6 +332,9 @@ int opal_dss_pack_jobid(opal_buffer_t *buffer, const void *src, int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src, int32_t num_vals, opal_data_type_t type); +int opal_dss_pack_status(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); + /* * Internal unpack functions */ @@ -401,6 +404,8 @@ int opal_dss_unpack_jobid(opal_buffer_t *buffer, void *dest, int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest, int32_t *num_vals, opal_data_type_t type); +int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); /* * Internal copy functions @@ -497,6 +502,8 @@ int opal_dss_compare_jobid(opal_jobid_t *value1, opal_jobid_t *value2, opal_data_type_t type); +int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type); + /* * Internal print functions */ @@ -536,6 +543,7 @@ int opal_dss_print_time(char **output, char *prefix, time_t *src, opal_data_type int opal_dss_print_name(char **output, char *prefix, opal_process_name_t *name, opal_data_type_t type); int opal_dss_print_jobid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type); int opal_dss_print_vpid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type); +int opal_dss_print_status(char **output, char *prefix, int *src, opal_data_type_t type); /* diff --git a/opal/dss/dss_open_close.c b/opal/dss/dss_open_close.c index 628806b570..366cf2586a 100644 --- a/opal/dss/dss_open_close.c +++ b/opal/dss/dss_open_close.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -611,6 +611,17 @@ int opal_dss_open(void) return rc; } + + tmp = OPAL_STATUS; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_status, + opal_dss_unpack_status, + (opal_dss_copy_fn_t)opal_dss_std_copy, + (opal_dss_compare_fn_t)opal_dss_compare_status, + (opal_dss_print_fn_t)opal_dss_print_status, + OPAL_DSS_UNSTRUCTURED, + "OPAL_STATUS", &tmp))) { + return rc; + } /* All done */ opal_dss_initialized = true; diff --git a/opal/dss/dss_pack.c b/opal/dss/dss_pack.c index a68ad12930..396c351d25 100644 --- a/opal/dss/dss_pack.c +++ b/opal/dss/dss_pack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -1240,3 +1240,20 @@ int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src, return ret; } +/* + * STATUS + */ +int opal_dss_pack_status(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + + /* Turn around and pack the real type */ + ret = opal_dss_pack_buffer(buffer, src, num_vals, OPAL_INT); + if (OPAL_SUCCESS != ret) { + OPAL_ERROR_LOG(ret); + } + + return ret; +} + diff --git a/opal/dss/dss_print.c b/opal/dss/dss_print.c index ece4572eec..f8e413efea 100644 --- a/opal/dss/dss_print.c +++ b/opal/dss/dss_print.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -25,6 +25,7 @@ #include "opal_stdint.h" #include +#include "opal/util/error.h" #include "opal/dss/dss_internal.h" int opal_dss_print(char **output, char *prefix, void *src, opal_data_type_t type) @@ -1060,3 +1061,29 @@ int opal_dss_print_vpid(char **output, char *prefix, return OPAL_SUCCESS; } + +int opal_dss_print_status(char **output, char *prefix, + int *src, opal_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_STATUS\tValue: NULL pointer", prefx); + if (prefx != prefix) { + free(prefx); + } + return OPAL_SUCCESS; + } + + asprintf(output, "%sData type: OPAL_STATUS\tValue: %s", prefx, opal_strerror(*src)); + if (prefx != prefix) { + free(prefx); + } + + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h index c2612231f3..8424d2b908 100644 --- a/opal/dss/dss_types.h +++ b/opal/dss/dss_types.h @@ -13,9 +13,9 @@ * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -120,6 +120,8 @@ typedef struct { #define OPAL_NAME (opal_data_type_t) 50 #define OPAL_JOBID (opal_data_type_t) 51 #define OPAL_VPID (opal_data_type_t) 52 +#define OPAL_STATUS (opal_data_type_t) 53 + /* OPAL Dynamic */ #define OPAL_DSS_ID_DYNAMIC (opal_data_type_t) 100 @@ -245,6 +247,7 @@ typedef struct { float fval; double dval; struct timeval tv; + int status; opal_process_name_t name; opal_bool_array_t flag_array; opal_uint8_array_t byte_array; diff --git a/opal/dss/dss_unpack.c b/opal/dss/dss_unpack.c index 99e62d097f..4f66e5aacb 100644 --- a/opal/dss/dss_unpack.c +++ b/opal/dss/dss_unpack.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -1519,3 +1519,20 @@ int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest, return ret; } + +/* + * STATUS + */ +int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + int ret; + + /* Turn around and unpack the real type */ + ret = opal_dss_unpack_buffer(buffer, dest, num_vals, OPAL_INT); + if (OPAL_SUCCESS != ret) { + OPAL_ERROR_LOG(ret); + } + + return ret; +} diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 82c046946f..bb0a575d50 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,7 +80,8 @@ enum { OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50), OPAL_ERR_COMM_FAILURE = (OPAL_ERR_BASE - 51), OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52), - OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53) + OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53), + OPAL_ERR_DEBUGGER_RELEASE = (OPAL_ERR_BASE - 54) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/pmix/pmix112/Makefile.am b/opal/mca/pmix/pmix112/Makefile.am index 7c669d400c..e4dcaa34a1 100644 --- a/opal/mca/pmix/pmix112/Makefile.am +++ b/opal/mca/pmix/pmix112/Makefile.am @@ -42,6 +42,7 @@ mca_pmix_pmix112_la_CPPFLAGS = \ -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS) mca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS) mca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS) +mca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD) noinst_LTLIBRARIES = $(component_noinst) libmca_pmix_pmix112_la_SOURCES =$(sources) @@ -49,3 +50,4 @@ libmca_pmix_pmix112_la_CFLAGS = $(opal_pmix_pmix112_CFLAGS) libmca_pmix_pmix112_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS) libmca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS) libmca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS) +libmca_pmix_pmix112_la_DEPENDENCIES = $(mca_pmix_pmix112_la_LIBADD) diff --git a/opal/mca/pmix/pmix112/pmix1_server_south.c b/opal/mca/pmix/pmix112/pmix1_server_south.c index efde1eab99..77b7315b68 100644 --- a/opal/mca/pmix/pmix112/pmix1_server_south.c +++ b/opal/mca/pmix/pmix112/pmix1_server_south.c @@ -431,6 +431,7 @@ int pmix1_server_notify_error(int status, op->cbdata = cbdata; rc = pmix1_convert_opalrc(status); + opal_output(0, "CALLING NOTIFY ERROR"); rc = PMIx_Notify_error(rc, ps, psz, eps, esz, pinfo, sz, opcbfunc, op); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix120/Makefile.am b/opal/mca/pmix/pmix120/Makefile.am index c48b0f143c..2d413b2ff5 100644 --- a/opal/mca/pmix/pmix120/Makefile.am +++ b/opal/mca/pmix/pmix120/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. @@ -42,6 +42,7 @@ mca_pmix_pmix120_la_CPPFLAGS = \ -I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS) mca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS) mca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS) +mca_pmix_pmix120_la_DEPENDENCIES = $(mca_pmix_pmix120_la_LIBADD) noinst_LTLIBRARIES = $(component_noinst) libmca_pmix_pmix120_la_SOURCES =$(sources) @@ -49,3 +50,4 @@ libmca_pmix_pmix120_la_CFLAGS = $(opal_pmix_pmix120_CFLAGS) libmca_pmix_pmix120_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix120_CPPFLAGS) libmca_pmix_pmix120_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix120_LDFLAGS) libmca_pmix_pmix120_la_LIBADD = $(opal_pmix_pmix120_LIBS) +libmca_pmix_pmix120_la_DEPENDENCIES = $(libmca_pmix_pmix120_la_LIBADD) diff --git a/opal/mca/pmix/pmix120/pmix/include/pmix/pmix_common.h b/opal/mca/pmix/pmix120/pmix/include/pmix/pmix_common.h index 81b5f93dd7..c3c73763f5 100644 --- a/opal/mca/pmix/pmix120/pmix/include/pmix/pmix_common.h +++ b/opal/mca/pmix/pmix120/pmix/include/pmix/pmix_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -63,8 +63,18 @@ BEGIN_C_DECLS #define PMIX_MAX_NSLEN 255 #define PMIX_MAX_KEYLEN 511 -/* define a *wildcard* value for requests involving rank */ -#define PMIX_RANK_WILDCARD -1 +/* define a value for requests for job-level data + * where the info itself isn't associated with any + * specific rank, or when a request involves + * a rank that isn't known - e.g., when someone requests + * info thru one of the legacy interfaces where the rank + * is typically encoded into the key itself since there is + * no rank parameter in the API itself */ +#define PMIX_RANK_UNDEF INT32_MAX +/* define a value to indicate that the user wants the + * data for the given key from every rank that posted + * that key */ +#define PMIX_RANK_WILDCARD INT32_MAX-1 /* define a set of "standard" PMIx attributes that can * be queried. Implementations (and users) are free to extend as @@ -163,7 +173,7 @@ BEGIN_C_DECLS /* error handler registration and notification info keys */ #define PMIX_ERROR_NAME "pmix.errname" // enum pmix_status_t specific error to be notified -#define PMIX_ERROR_GROUP_COMM "pmix.errgroup.comm" // bool - set true to get comm errors notification +#define PMIX_ERROR_GROUP_COMM "pmix.errgroup.comm" // bool - set true to get comm errors notification #define PMIX_ERROR_GROUP_ABORT "pmix.errgroup.abort" // bool -set true to get abort errors notification #define PMIX_ERROR_GROUP_MIGRATE "pmix.errgroup.migrate" // bool -set true to get migrate errors notification #define PMIX_ERROR_GROUP_RESOURCE "pmix.errgroup.resource" // bool -set true to get resource errors notification @@ -199,7 +209,7 @@ BEGIN_C_DECLS /**** PMIX ERROR CONSTANTS ****/ /* PMIx errors are always negative, with 0 reserved for success */ -#define PMIX_ERROR_MIN -50 // set equal to number of non-zero entries in enum +#define PMIX_ERROR_MIN -52 // set equal to number of non-zero entries in enum typedef enum { PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER = PMIX_ERROR_MIN, @@ -257,6 +267,8 @@ typedef enum { PMIX_ERR_SILENT, PMIX_ERROR, + PMIX_ERR_GRP_FOUND, + PMIX_ERR_DFLT_FOUND, PMIX_SUCCESS } pmix_status_t; @@ -288,6 +300,9 @@ typedef enum { PMIX_TIMEVAL, PMIX_TIME, + PMIX_STATUS, // needs to be tracked separately from integer for those times + // when we are embedded and it needs to be converted to the + // host error definitions PMIX_HWLOC_TOPO, PMIX_VALUE, PMIX_INFO_ARRAY, @@ -411,6 +426,7 @@ typedef struct { float fval; double dval; struct timeval tv; + pmix_status_t status; pmix_info_array_t array; pmix_byte_object_t bo; } data; @@ -494,6 +510,7 @@ extern void pmix_value_load(pmix_value_t *v, void *data, /**** PMIX INFO STRUCT ****/ typedef struct { char key[PMIX_MAX_KEYLEN+1]; // ensure room for the NULL terminator + bool required; // defaults to optional (i.e., required=false) pmix_value_t value; } pmix_info_t; @@ -531,6 +548,10 @@ typedef struct { (void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ pmix_value_load(&((m)->value), (v), (t)); \ } while(0); +#define PMIX_INFO_REQUIRED(m) \ + (m)->required = true; +#define PMIX_INFO_OPTIONAL(m) \ + (m)->required = false; /**** PMIX LOOKUP RETURN STRUCT ****/ diff --git a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/copy.c b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/copy.c index ece18b3a13..d7b6734372 100644 --- a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/copy.c +++ b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/copy.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -136,6 +136,10 @@ int pmix_bfrop_std_copy(void **dest, void *src, pmix_data_type_t type) datasize = sizeof(time_t); break; + case PMIX_STATUS: + datasize = sizeof(pmix_status_t); + break; + default: return PMIX_ERR_UNKNOWN_DATA_TYPE; } @@ -166,7 +170,7 @@ int pmix_bfrop_copy_string(char **dest, char *src, pmix_data_type_t type) return PMIX_SUCCESS; } -/* compare function for pmix_value_t*/ +/* compare function for pmix_value_t */ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1) { bool rc = false; @@ -213,6 +217,9 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1) case PMIX_STRING: rc = strcmp(p->data.string, p1->data.string); break; + case PMIX_STATUS: + rc = (p->data.status == p1->data.status); + break; default: pmix_output(0, "COMPARE-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type); } @@ -293,6 +300,9 @@ pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) p->data.tv.tv_sec = src->data.tv.tv_sec; p->data.tv.tv_usec = src->data.tv.tv_usec; break; + case PMIX_STATUS: + memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t)); + break; case PMIX_INFO_ARRAY: p->data.array.size = src->data.array.size; if (0 < src->data.array.size) { @@ -343,6 +353,7 @@ int pmix_bfrop_copy_info(pmix_info_t **dest, pmix_info_t *src, { *dest = (pmix_info_t*)malloc(sizeof(pmix_info_t)); (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + (*dest)->required = src->required; return pmix_value_xfer(&(*dest)->value, &src->value); } diff --git a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/internal.h b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/internal.h index 1f99363824..ce8d972a32 100644 --- a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/internal.h +++ b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/internal.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -275,6 +275,8 @@ int pmix_bfrop_pack_timeval(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type); int pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type); +int pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); #if PMIX_HAVE_HWLOC int pmix_bfrop_pack_topo(pmix_buffer_t *buffer, const void *src, @@ -337,6 +339,8 @@ int pmix_bfrop_unpack_timeval(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type); int pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type); +int pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); #if PMIX_HAVE_HWLOC int pmix_bfrop_unpack_topo(pmix_buffer_t *buffer, void *dest, @@ -427,6 +431,7 @@ int pmix_bfrop_print_double(char **output, char *prefix, double *src, pmix_data_ int pmix_bfrop_print_timeval(char **output, char *prefix, struct timeval *src, pmix_data_type_t type); int pmix_bfrop_print_time(char **output, char *prefix, time_t *src, pmix_data_type_t type); +int pmix_bfrop_print_status(char **output, char *prefix, pmix_status_t *src, pmix_data_type_t type); #if PMIX_HAVE_HWLOC int pmix_bfrop_print_topo(char **output, char *prefix, diff --git a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/open_close.c b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/open_close.c index 9809a35eb5..963861a9b2 100644 --- a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/open_close.c +++ b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/open_close.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -289,6 +289,12 @@ pmix_status_t pmix_bfrop_open(void) pmix_bfrop_std_copy, pmix_bfrop_print_time); + PMIX_REGISTER_TYPE("PMIX_STATUS", PMIX_STATUS, + pmix_bfrop_pack_status, + pmix_bfrop_unpack_status, + pmix_bfrop_std_copy, + pmix_bfrop_print_status); + #if PMIX_HAVE_HWLOC PMIX_REGISTER_TYPE("PMIX_HWLOC_TOPO", PMIX_HWLOC_TOPO, pmix_bfrop_pack_topo, @@ -395,6 +401,8 @@ pmix_status_t pmix_bfrop_close(void) void pmix_value_load(pmix_value_t *v, void *data, pmix_data_type_t type) { + pmix_byte_object_t *bo; + v->type = type; if (NULL == data) { /* just set the fields to zero */ @@ -457,9 +465,13 @@ void pmix_value_load(pmix_value_t *v, void *data, case PMIX_TIMEVAL: memcpy(&(v->data.tv), data, sizeof(struct timeval)); break; + case PMIX_STATUS: + memcpy(&(v->data.status), data, sizeof(pmix_status_t)); + break; case PMIX_BYTE_OBJECT: - v->data.bo.bytes = data; - memcpy(&(v->data.bo.size), data, sizeof(size_t)); + bo = (pmix_byte_object_t*)data; + v->data.bo.bytes = bo->bytes; + memcpy(&(v->data.bo.size), &bo->size, sizeof(size_t)); break; case PMIX_TIME: case PMIX_HWLOC_TOPO: @@ -569,6 +581,10 @@ pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data, memcpy(*data, &(kv->data.tv), sizeof(struct timeval)); *sz = sizeof(struct timeval); break; + case PMIX_STATUS: + memcpy(*data, &(kv->data.status), sizeof(pmix_status_t)); + *sz = sizeof(pmix_status_t); + break; case PMIX_BYTE_OBJECT: if (NULL != kv->data.bo.bytes && 0 < kv->data.bo.size) { *data = kv->data.bo.bytes; diff --git a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/pack.c b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/pack.c index be3a894111..162815fe18 100644 --- a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/pack.c +++ b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/pack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. @@ -406,6 +406,26 @@ int pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src, } +/* STATUS */ +int pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret = PMIX_SUCCESS; + int32_t i; + pmix_status_t *ssrc = (pmix_status_t *)src; + int32_t status; + + for (i = 0; i < num_vals; ++i) { + status = (int32_t)ssrc[i]; + if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &status, 1, PMIX_INT32))) { + return ret; + } + } + + return PMIX_SUCCESS; +} + + /* PACK FUNCTIONS FOR GENERIC PMIX TYPES */ static int pack_val(pmix_buffer_t *buffer, pmix_value_t *p) @@ -503,6 +523,11 @@ static int pack_val(pmix_buffer_t *buffer, return ret; } break; + case PMIX_STATUS: + if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.status, 1, PMIX_STATUS))) { + return ret; + } + break; case PMIX_INFO_ARRAY: if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.array, 1, PMIX_INFO_ARRAY))) { return ret; @@ -563,6 +588,10 @@ int pmix_bfrop_pack_info(pmix_buffer_t *buffer, const void *src, if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &foo, 1, PMIX_STRING))) { return ret; } + /* pack required flag */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_bool(buffer, &info[i].required, 1, PMIX_BOOL))) { + return ret; + } /* pack the type */ if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &info[i].value.type, 1, PMIX_INT))) { return ret; diff --git a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/print.c b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/print.c index e63756376c..f61acaaf1f 100644 --- a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/print.c +++ b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/print.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,6 +27,7 @@ #include #endif +#include "src/util/error.h" #include "src/buffer_ops/internal.h" int pmix_bfrop_print(char **output, char *prefix, void *src, pmix_data_type_t type) @@ -540,6 +541,32 @@ int pmix_bfrop_print_timeval(char **output, char *prefix, return PMIX_SUCCESS; } +int pmix_bfrop_print_status(char **output, char *prefix, + pmix_status_t *src, pmix_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: PMIX_STATUS\tValue: NULL pointer", prefx); + if (prefx != prefix) { + free(prefx); + } + return PMIX_SUCCESS; + } + + asprintf(output, "%sData type: PMIX_STATUS\tValue: %s", prefx, PMIx_Error_string(*src)); + if (prefx != prefix) { + free(prefx); + } + + return PMIX_SUCCESS; +} + /* PRINT FUNCTIONS FOR GENERIC PMIX TYPES */ /* @@ -632,6 +659,10 @@ int pmix_bfrop_print_value(char **output, char *prefix, asprintf(output, "%sPMIX_VALUE: Data type: PMIX_TIMEVAL\tValue: %ld.%06ld", prefx, (long)src->data.tv.tv_sec, (long)src->data.tv.tv_usec); break; + case PMIX_STATUS: + asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STATUS\tValue: %s", prefx, + PMIx_Error_string(src->data.status)); + break; default: asprintf(output, "%sPMIX_VALUE: Data type: UNKNOWN\tValue: UNPRINTABLE", prefx); break; @@ -648,8 +679,8 @@ int pmix_bfrop_print_info(char **output, char *prefix, char *tmp; pmix_bfrop_print_value(&tmp, NULL, &src->value, PMIX_VALUE); - asprintf(output, "%sKEY: %s %s", prefix, src->key, - (NULL == tmp) ? "NULL" : tmp); + asprintf(output, "%sKEY: %s REQD: %s %s", prefix, src->key, + src->required ? "Y" : "N", (NULL == tmp) ? "PMIX_VALUE: NULL" : tmp); if (NULL != tmp) { free(tmp); } diff --git a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/unpack.c index 2f7f8a4cb2..99a48f0625 100644 --- a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/unpack.c +++ b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/unpack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. @@ -500,6 +500,20 @@ int pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest, } +int pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_status * %d\n", (int)*num_vals); + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*(sizeof(pmix_status_t)))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + return pmix_bfrop_unpack_int32(buffer, dest, num_vals, PMIX_INT32); +} + + /* UNPACK FUNCTIONS FOR GENERIC PMIX TYPES */ /* @@ -672,6 +686,11 @@ int pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, } (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); free(tmp); + /* unpack the required flag */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_bool(buffer, &ptr[i].required, &m, PMIX_BOOL))) { + return ret; + } /* unpack value - since the value structure is statically-defined * instead of a pointer in this struct, we directly unpack it to * avoid the malloc */ diff --git a/opal/mca/pmix/pmix120/pmix/src/class/Makefile.am b/opal/mca/pmix/pmix120/pmix/src/class/Makefile.am index a173ff46d9..d13dad2d5d 100644 --- a/opal/mca/pmix/pmix120/pmix/src/class/Makefile.am +++ b/opal/mca/pmix/pmix120/pmix/src/class/Makefile.am @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2013-2015 Intel, Inc. All rights reserved +# Copyright (c) 2013-2016 Intel, Inc. All rights reserved # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,10 +26,14 @@ headers += \ src/class/pmix_object.h \ src/class/pmix_list.h \ src/class/pmix_pointer_array.h \ - src/class/pmix_hash_table.h + src/class/pmix_hash_table.h \ + src/class/pmix_hotel.h \ + src/class/pmix_ring_buffer.h sources += \ src/class/pmix_object.c \ src/class/pmix_list.c \ src/class/pmix_pointer_array.c \ - src/class/pmix_hash_table.c + src/class/pmix_hash_table.c \ + src/class/pmix_hotel.c \ + src/class/pmix_ring_buffer.c diff --git a/opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.c b/opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.c new file mode 100644 index 0000000000..9b03ecdc8e --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#include + +#include PMIX_EVENT_HEADER +#include "src/class/pmix_hotel.h" + + +static void local_eviction_callback(int fd, short flags, void *arg) +{ + pmix_hotel_room_eviction_callback_arg_t *eargs = + (pmix_hotel_room_eviction_callback_arg_t*) arg; + void *occupant = eargs->hotel->rooms[eargs->room_num].occupant; + + /* Remove the occurpant from the room. + + Do not change this logic without also changing the same logic + in pmix_hotel_checkout() and + pmix_hotel_checkout_and_return_occupant(). */ + pmix_hotel_t *hotel = eargs->hotel; + pmix_hotel_room_t *room = &(hotel->rooms[eargs->room_num]); + room->occupant = NULL; + hotel->last_unoccupied_room++; + assert(hotel->last_unoccupied_room < hotel->num_rooms); + hotel->unoccupied_rooms[hotel->last_unoccupied_room] = eargs->room_num; + + /* Invoke the user callback to tell them that they were evicted */ + hotel->evict_callback_fn(hotel, + eargs->room_num, + occupant); +} + + +int pmix_hotel_init(pmix_hotel_t *h, int num_rooms, + pmix_event_base_t *evbase, + uint32_t eviction_timeout, + int eviction_event_priority, + pmix_hotel_eviction_callback_fn_t evict_callback_fn) +{ + int i; + + /* Bozo check */ + if (num_rooms <= 0 || + NULL == evict_callback_fn) { + return PMIX_ERR_BAD_PARAM; + } + + h->num_rooms = num_rooms; + h->evbase = evbase; + h->eviction_timeout.tv_usec = eviction_timeout % 1000000; + h->eviction_timeout.tv_sec = eviction_timeout / 1000000; + h->evict_callback_fn = evict_callback_fn; + h->rooms = (pmix_hotel_room_t*)malloc(num_rooms * sizeof(pmix_hotel_room_t)); + if (NULL != evict_callback_fn) { + h->eviction_args = + (pmix_hotel_room_eviction_callback_arg_t*)malloc(num_rooms * sizeof(pmix_hotel_room_eviction_callback_arg_t)); + } + h->unoccupied_rooms = (int*) malloc(num_rooms * sizeof(int)); + h->last_unoccupied_room = num_rooms - 1; + + for (i = 0; i < num_rooms; ++i) { + /* Mark this room as unoccupied */ + h->rooms[i].occupant = NULL; + + /* Setup this room in the unoccupied index array */ + h->unoccupied_rooms[i] = i; + + /* Setup the eviction callback args */ + h->eviction_args[i].hotel = h; + h->eviction_args[i].room_num = i; + + /* Create this room's event (but don't add it) */ + if (NULL != h->evbase) { + event_assign(&(h->rooms[i].eviction_timer_event), + h->evbase, + -1, 0, local_eviction_callback, + &(h->eviction_args[i])); + } + } + + return PMIX_SUCCESS; +} + +static void constructor(pmix_hotel_t *h) +{ + h->num_rooms = 0; + h->evbase = NULL; + h->eviction_timeout.tv_sec = 0; + h->eviction_timeout.tv_usec = 0; + h->evict_callback_fn = NULL; + h->rooms = NULL; + h->eviction_args = NULL; + h->unoccupied_rooms = NULL; + h->last_unoccupied_room = -1; +} + +static void destructor(pmix_hotel_t *h) +{ + int i; + + /* Go through all occupied rooms and destroy their events */ + if (NULL != h->evbase) { + for (i = 0; i < h->num_rooms; ++i) { + if (NULL != h->rooms[i].occupant) { + event_del(&(h->rooms[i].eviction_timer_event)); + } + } + } + + if (NULL != h->rooms) { + free(h->rooms); + } + if (NULL != h->eviction_args) { + free(h->eviction_args); + } + if (NULL != h->unoccupied_rooms) { + free(h->unoccupied_rooms); + } +} + +PMIX_CLASS_INSTANCE(pmix_hotel_t, + pmix_object_t, + constructor, + destructor); diff --git a/opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.h b/opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.h new file mode 100644 index 0000000000..87ecbc864b --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/src/class/pmix_hotel.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** @file + * + * This file provides a "hotel" class: + * + * - A hotel has a fixed number of rooms (i.e., storage slots) + * - An arbitrary data pointer can check into an empty room at any time + * - The occupant of a room can check out at any time + * - Optionally, the occupant of a room can be forcibly evicted at a + * given time (i.e., when an pmix timer event expires). + * - The hotel has finite occupancy; if you try to checkin a new + * occupant and the hotel is already full, it will gracefully fail + * to checkin. + * + * One use case for this class is for ACK-based network retransmission + * schemes (NACK-based retransmission schemes probably can use + * pmix_ring_buffer). + * + * For ACK-based retransmission schemes, a hotel might be used + * something like this: + * + * - when a message is sent, check it in to a hotel with a timer + * - if an ACK is received, check it out of the hotel (which also cancels + * the timer) + * - if an ACK isn't received in time, the timer will expire and the + * upper layer will get a callback with the message + * - if an ACK is received late (i.e., after its timer has expired), + * then checkout will gracefully fail + * + * Note that this class intentionally provides pretty minimal + * functionality. It is intended to be used in performance-critical + * code paths -- extra functionality would simply add latency. + * + * There is an pmix_hotel_init() function to create a hotel, but no + * corresponding finalize; the destructor will handle all finalization + * issues. Note that when a hotel is destroyed, it will delete all + * pending events from the event base (i.e., all pending eviction + * callbacks); no further eviction callbacks will be invoked. + */ + +#ifndef PMIX_HOTEL_H +#define PMIX_HOTEL_H + +#include +#include "private/types.h" +#include "private/prefetch.h" +#include "pmix/pmix_common.h" +#include "src/class/pmix_object.h" +#include PMIX_EVENT_HEADER +#include +#include "src/util/output.h" + +BEGIN_C_DECLS + +struct pmix_hotel_t; + +/* User-supplied function to be invoked when an occupant is evicted. */ +typedef void (*pmix_hotel_eviction_callback_fn_t)(struct pmix_hotel_t *hotel, + int room_num, + void *occupant); + +/* Note that this is an internal data structure; it is not part of the + public pmix_hotel interface. Public consumers of pmix_hotel + shouldn't need to use this struct at all (we only have it here in + this .h file because some functions are inlined for speed, and need + to get to the internals of this struct). + + The room struct should be as small as possible to be cache + friendly. Specifically: it would be great if multiple rooms could + fit in a single cache line because we'll always allocate a + contiguous set of rooms in an array. */ +typedef struct { + void *occupant; + pmix_event_t eviction_timer_event; +} pmix_hotel_room_t; + +/* Note that this is an internal data structure; it is not part of the + public pmix_hotel interface. Public consumers of pmix_hotel + shouldn't need to use this struct at all (we only have it here in + this .h file because some functions are inlined for speed, and need + to get to the internals of this struct). + + Use a unique struct for holding the arguments for eviction + callbacks. We *could* make the to-be-evicted pmix_hotel_room_t + instance as the argument, but we don't, for 2 reasons: + + 1. We want as many pmix_hotel_room_t's to fit in a cache line as + possible (i.e., to be as cache-friendly as possible). The + common/fast code path only needs to access the data in the + pmix_hotel_room_t (and not the callback argument data). + + 2. Evictions will be uncommon, so we don't mind penalizing them a + bit by making the data be in a separate cache line. +*/ +typedef struct { + struct pmix_hotel_t *hotel; + int room_num; +} pmix_hotel_room_eviction_callback_arg_t; + +typedef struct pmix_hotel_t { + /* make this an object */ + pmix_object_t super; + + /* Max number of rooms in the hotel */ + int num_rooms; + + /* event base to be used for eviction timeout */ + pmix_event_base_t *evbase; + struct timeval eviction_timeout; + pmix_hotel_eviction_callback_fn_t evict_callback_fn; + + /* All rooms in this hotel */ + pmix_hotel_room_t *rooms; + + /* Separate array for all the eviction callback arguments (see + rationale above for why this is a separate array) */ + pmix_hotel_room_eviction_callback_arg_t *eviction_args; + + /* All currently unoccupied rooms in this hotel (not necessarily + in any particular order) */ + int *unoccupied_rooms; + int last_unoccupied_room; +} pmix_hotel_t; +PMIX_CLASS_DECLARATION(pmix_hotel_t); + +/** + * Initialize the hotel. + * + * @param hotel Pointer to a hotel (IN) + * @param num_rooms The total number of rooms in the hotel (IN) + * @param evbase Pointer to event base used for eviction timeout + * @param eviction_timeout Max length of a stay at the hotel before + * the eviction callback is invoked (in microseconds) + * @param eviction_event_priority Event lib priority for the eviction timeout + * @param evict_callback_fn Callback function invoked if an occupant + * does not check out before the eviction_timeout. + * + * NOTE: If the callback function is NULL, then no eviction timer + * will be set - occupants will remain checked into the hotel until + * explicitly checked out. + * + * Also note: the eviction_callback_fn should absolutely not call any + * of the hotel checkout functions. Specifically: the occupant has + * already been ("forcibly") checked out *before* the + * eviction_callback_fn is invoked. + * + * @return PMIX_SUCCESS if all initializations were succesful. Otherwise, + * the error indicate what went wrong in the function. + */ +PMIX_DECLSPEC int pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms, + pmix_event_base_t *evbase, + uint32_t eviction_timeout, + int eviction_event_priority, + pmix_hotel_eviction_callback_fn_t evict_callback_fn); + +/** + * Check in an occupant to the hotel. + * + * @param hotel Pointer to hotel (IN) + * @param occupant Occupant to check in (opaque to the hotel) (IN) + * @param room The room number that identifies this occupant in the + * hotel (OUT). + * + * If there is room in the hotel, the occupant is checked in and the + * timer for that occupant is started. The occupant's room is + * returned in the "room" param. + * + * Note that once a room's checkout_expire timer expires, the occupant + * is forcibly checked out, and then the eviction callback is invoked. + * + * @return PMIX_SUCCESS if the occupant is successfully checked in, + * and the room parameter will contain a valid value. + * @return PMIX_ERR_TEMP_OUT_OF_RESOURCE is the hotel is full. Try + * again later. + */ +static inline int pmix_hotel_checkin(pmix_hotel_t *hotel, + void *occupant, + int *room_num) +{ + pmix_hotel_room_t *room; + + /* Do we have any rooms available? */ + if (PMIX_UNLIKELY(hotel->last_unoccupied_room < 0)) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + + /* Put this occupant into the first empty room that we have */ + *room_num = hotel->unoccupied_rooms[hotel->last_unoccupied_room--]; + room = &(hotel->rooms[*room_num]); + room->occupant = occupant; + + /* Assign the event and make it pending */ + if (NULL != hotel->evbase) { + event_add(&(room->eviction_timer_event), + &(hotel->eviction_timeout)); + } + + return PMIX_SUCCESS; +} + +/** + * Same as pmix_hotel_checkin(), but slightly optimized for when the + * caller *knows* that there is a room available. + */ +static inline void pmix_hotel_checkin_with_res(pmix_hotel_t *hotel, + void *occupant, + int *room_num) +{ + pmix_hotel_room_t *room; + + /* Put this occupant into the first empty room that we have */ + *room_num = hotel->unoccupied_rooms[hotel->last_unoccupied_room--]; + room = &(hotel->rooms[*room_num]); + assert(room->occupant == NULL); + room->occupant = occupant; + + /* Assign the event and make it pending */ + if (NULL != hotel->evbase) { + event_add(&(room->eviction_timer_event), + &(hotel->eviction_timeout)); + } +} + +/** + * Check the specified occupant out of the hotel. + * + * @param hotel Pointer to hotel (IN) + * @param room Room number to checkout (IN) + * + * If there is an occupant in the room, their timer is canceled and + * they are checked out. + * + * Nothing is returned (as a minor optimization). + */ +static inline void pmix_hotel_checkout(pmix_hotel_t *hotel, int room_num) +{ + pmix_hotel_room_t *room; + + /* Bozo check */ + assert(room_num < hotel->num_rooms); + + /* If there's an occupant in the room, check them out */ + room = &(hotel->rooms[room_num]); + if (PMIX_LIKELY(NULL != room->occupant)) { + /* Do not change this logic without also changing the same + logic in pmix_hotel_checkout_and_return_occupant() and + pmix_hotel.c:local_eviction_callback(). */ + room->occupant = NULL; + if (NULL != hotel->evbase) { + event_del(&(room->eviction_timer_event)); + } + hotel->last_unoccupied_room++; + assert(hotel->last_unoccupied_room < hotel->num_rooms); + hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num; + } + + /* Don't bother returning whether we actually checked someone out + or not (because this is in the critical performance path) -- + assume the upper layer knows what it's doing. */ +} + +/** + * Check the specified occupant out of the hotel and return the occupant. + * + * @param hotel Pointer to hotel (IN) + * @param room Room number to checkout (IN) + * @param void * occupant (OUT) + * If there is an occupant in the room, their timer is canceled and + * they are checked out. + * + * Use this checkout and when caller needs the occupant + */ +static inline void pmix_hotel_checkout_and_return_occupant(pmix_hotel_t *hotel, int room_num, void **occupant) +{ + pmix_hotel_room_t *room; + + /* Bozo check */ + assert(room_num < hotel->num_rooms); + + /* If there's an occupant in the room, check them out */ + room = &(hotel->rooms[room_num]); + if (PMIX_LIKELY(NULL != room->occupant)) { + pmix_output (10, "checking out occupant %p from room num %d", room->occupant, room_num); + /* Do not change this logic without also changing the same + logic in pmix_hotel_checkout() and + pmix_hotel.c:local_eviction_callback(). */ + *occupant = room->occupant; + room->occupant = NULL; + if (NULL != hotel->evbase) { + event_del(&(room->eviction_timer_event)); + } + hotel->last_unoccupied_room++; + assert(hotel->last_unoccupied_room < hotel->num_rooms); + hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num; + } + else { + *occupant = NULL; + } +} + +/** + * Returns true if the hotel is empty (no occupant) + * @param hotel Pointer to hotel (IN) + * @return bool true if empty false if there is a occupant(s) + * + */ +static inline bool pmix_hotel_is_empty (pmix_hotel_t *hotel) +{ + if (hotel->last_unoccupied_room == hotel->num_rooms - 1) + return true; + else + return false; +} + +/** + * Access the occupant of a room, but leave them checked into their room. + * + * @param hotel Pointer to hotel (IN) + * @param room Room number to checkout (IN) + * @param void * occupant (OUT) + * + * This accessor function is typically used to cycle across the occupants + * to check for someone already present that matches a description. + */ +static inline void pmix_hotel_knock(pmix_hotel_t *hotel, int room_num, void **occupant) +{ + pmix_hotel_room_t *room; + + /* Bozo check */ + assert(room_num < hotel->num_rooms); + + *occupant = NULL; + + /* If there's an occupant in the room, have them come to the door */ + room = &(hotel->rooms[room_num]); + if (PMIX_LIKELY(NULL != room->occupant)) { + pmix_output (10, "occupant %p in room num %d responded to knock", room->occupant, room_num); + *occupant = room->occupant; + } +} + +END_C_DECLS + +#endif /* PMIX_HOTEL_H */ diff --git a/opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.c b/opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.c new file mode 100644 index 0000000000..088bbe430e --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.c @@ -0,0 +1,154 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#include +#include + +#include "pmix/pmix_common.h" +#include "src/class/pmix_ring_buffer.h" +#include "src/util/output.h" + +static void pmix_ring_buffer_construct(pmix_ring_buffer_t *); +static void pmix_ring_buffer_destruct(pmix_ring_buffer_t *); + +PMIX_CLASS_INSTANCE(pmix_ring_buffer_t, pmix_object_t, + pmix_ring_buffer_construct, + pmix_ring_buffer_destruct); + +/* + * pmix_ring_buffer constructor + */ +static void pmix_ring_buffer_construct(pmix_ring_buffer_t *ring) +{ + ring->head = 0; + ring->tail = -1; + ring->size = 0; + ring->addr = NULL; +} + +/* + * pmix_ring_buffer destructor + */ +static void pmix_ring_buffer_destruct(pmix_ring_buffer_t *ring) +{ + if( NULL != ring->addr) { + free(ring->addr); + ring->addr = NULL; + } + + ring->size = 0; +} + +/** + * initialize a ring object + */ +int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size) +{ + /* check for errors */ + if (NULL == ring) { + return PMIX_ERR_BAD_PARAM; + } + + /* Allocate and set the ring to NULL */ + ring->addr = (char **)calloc(size * sizeof(char*), 1); + if (NULL == ring->addr) { /* out of memory */ + return PMIX_ERR_OUT_OF_RESOURCE; + } + ring->size = size; + + return PMIX_SUCCESS; +} + +void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr) +{ + char *p=NULL; + + if (NULL != ring->addr[ring->head]) { + p = (char*)ring->addr[ring->head]; + if (ring->tail == ring->size - 1) { + ring->tail = 0; + } else { + ring->tail = ring->head + 1; + } + } + ring->addr[ring->head] = (char*)ptr; + if (ring->tail < 0) { + ring->tail = ring->head; + } + if (ring->head == ring->size - 1) { + ring->head = 0; + } else { + ring->head++; + } + return (void*)p; +} + +void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring) +{ + char *p=NULL; + + if (-1 == ring->tail) { + /* nothing has been put on the ring yet */ + p = NULL; + } else { + p = (char*)ring->addr[ring->tail]; + ring->addr[ring->tail] = NULL; + if (ring->tail == ring->size-1) { + ring->tail = 0; + } else { + ring->tail++; + } + /* see if the ring is empty */ + if (ring->tail == ring->head) { + ring->tail = -1; + } + } + return (void*)p; +} + + void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i) + { + char *p=NULL; + int offset; + + if (ring->size <= i || -1 == ring->tail) { + p = NULL; + } else if (i < 0) { + /* return the value at the head of the ring */ + if (ring->head == 0) { + p = ring->addr[ring->size - 1]; + } else { + p = ring->addr[ring->head - 1]; + } + } else { + /* calculate the offset of the tail in the ring */ + offset = ring->tail + i; + /* correct for wrap-around */ + if (ring->size <= offset) { + offset -= ring->size; + } + p = ring->addr[offset]; + } + return (void*)p; +} diff --git a/opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.h b/opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.h new file mode 100644 index 0000000000..618868ca28 --- /dev/null +++ b/opal/mca/pmix/pmix120/pmix/src/class/pmix_ring_buffer.h @@ -0,0 +1,102 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file + * + */ + +#ifndef PMIX_RING_BUFFER_H +#define PMIX_RING_BUFFER_H + +#include + +#include "src/class/pmix_object.h" +#include "src/util/output.h" + +BEGIN_C_DECLS + +/** + * dynamic pointer ring + */ +struct pmix_ring_buffer_t { + /** base class */ + pmix_object_t super; + /* head/tail indices */ + int head; + int tail; + /** size of list, i.e. number of elements in addr */ + int size; + /** pointer to ring */ + char **addr; +}; +/** + * Convenience typedef + */ +typedef struct pmix_ring_buffer_t pmix_ring_buffer_t; +/** + * Class declaration + */ +PMIX_DECLSPEC PMIX_CLASS_DECLARATION(pmix_ring_buffer_t); + +/** + * Initialize the ring buffer, defining its size. + * + * @param ring Pointer to a ring buffer (IN/OUT) + * @param size The number of elements in the ring (IN) + * + * @return PMIX_SUCCESS if all initializations were succesful. Otherwise, + * the error indicate what went wrong in the function. + */ +PMIX_DECLSPEC int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size); + +/** + * Push an item onto the ring buffer, displacing the oldest + * item on the ring if the ring is full + * + * @param ring Pointer to ring (IN) + * @param ptr Pointer value (IN) + * + * @return Pointer to displaced item, NULL if ring + * is not yet full + */ +PMIX_DECLSPEC void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr); + + +/** + * Pop an item off of the ring. The oldest entry on the ring will be + * returned. If nothing on the ring, NULL is returned. + * + * @param ring Pointer to ring (IN) + * + * @return Error code. NULL indicates an error. + */ + +PMIX_DECLSPEC void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring); + +/* + * Access an element of the ring, without removing it, indexed + * starting at the tail - a value of -1 will return the element + * at the head of the ring + */ +PMIX_DECLSPEC void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i); + +END_C_DECLS + +#endif /* PMIX_RING_BUFFER_H */ diff --git a/opal/mca/pmix/pmix120/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix120/pmix/src/client/pmi1.c index 808f9e3984..344be5951c 100644 --- a/opal/mca/pmix/pmix120/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix120/pmix/src/client/pmi1.c @@ -44,6 +44,7 @@ #define PMI_MAX_KVSNAME_LEN PMIX_MAX_NSLEN /* Maximum size of KVS name */ #define PMI_MAX_VAL_LEN 4096 /* Maximum size of a PMI value */ + #define PMI_CHECK() \ do { \ if (!pmi_init) { \ @@ -55,25 +56,37 @@ static pmix_status_t convert_int(int *value, pmix_value_t *kv); static int convert_err(pmix_status_t rc); static pmix_proc_t myproc; -static bool data_commited = false; static int pmi_init = 0; int PMI_Init(int *spawned) { + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_status_t rc; + pmix_proc_t proc; + pmix_info_t info[1]; + bool val_optinal = 1; if (PMIX_SUCCESS != PMIx_Init(&myproc)) { return PMI_ERR_INIT; } + /* getting internal key requires special rank value */ + memcpy(&proc, &myproc, sizeof(myproc)); + proc.rank = PMIX_RANK_UNDEF; + + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + if (NULL != spawned) { /* get the spawned flag */ - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_SPAWNED, NULL, 0, &val)) { + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) { rc = convert_int(spawned, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { - return convert_err(rc); + goto error; } } else { /* if not found, default to not spawned */ @@ -82,7 +95,12 @@ int PMI_Init(int *spawned) } pmi_init = 1; - return PMI_SUCCESS; + rc = PMIX_SUCCESS; + +error: + PMIX_INFO_DESTRUCT(&info[0]); + + return convert_err(rc); } int PMI_Initialized(PMI_BOOL *initialized) @@ -160,8 +178,6 @@ int PMI_KVS_Commit(const char kvsname[]) kvsname); rc = PMIx_Commit(); - /* PMIx permits only one data commit! */ - data_commited = true; return convert_err(rc); } @@ -169,17 +185,14 @@ int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int lengt { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - uint32_t i; - static pmix_proc_t proc; - uint32_t procnum; - proc = myproc; + pmix_proc_t proc; PMI_CHECK(); if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN)) { return PMI_ERR_INVALID_KVS; } - if ((key == NULL) || (strlen(key) >PMI_MAX_KEY_LEN)) { + if ((key == NULL) || (strlen(key) > PMI_MAX_KEY_LEN)) { return PMI_ERR_INVALID_KEY; } if (value == NULL) { @@ -189,60 +202,22 @@ int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int lengt pmix_output_verbose(2, pmix_globals.debug_output, "PMI_KVS_Get: KVS=%s, key=%s value=%s", kvsname, key, value); - /* PMI-1 expects resource manager to set - * process mapping in ANL notation. */ - if (!strcmp(key, ANL_MAPPING)) { - /* we are looking in the job-data. If there is nothing there - * we don't want to look in rank's data, thus set rank to widcard */ - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && - (NULL != val) && (PMIX_STRING == val->type)) { - strncpy(value, val->data.string, length); - PMIX_VALUE_FREE(val, 1); - return PMI_SUCCESS; - } else { - /* artpol: - * Some RM's (i.e. SLURM) already have ANL precomputed. The export it - * through PMIX_ANL_MAP variable. - * If we haven't found it we want to have our own packing functionality - * since it's common. - * Somebody else has to write it since I've already done that for - * GPL'ed SLURM :) */ - return PMI_FAIL; + /* retrieve the data from PMIx - since we don't have a rank, + * we indicate that by passing the UNDEF value */ + (void)strncpy(proc.nspace, kvsname, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_UNDEF; + + rc = PMIx_Get(&proc, key, NULL, 0, &val); + if (PMIX_SUCCESS == rc && NULL != val) { + if (PMIX_STRING != val->type) { + rc = PMIX_ERROR; + } else if (NULL != val->data.string) { + (void)strncpy(value, val->data.string, length); } + PMIX_VALUE_RELEASE(val); } - /* We don't know what process keeps this data. So it looks like we need to - * check each process. - * TODO: Is there any beter way? - * WARNING: this may lead to the VERY long HANG's if we ask for the unknown key - * before we've done Commit on all nodes. We need a workaround for that. - * - * SOLUTION: perhaps rovide "OK if nothing" info flag to tell PMIx that - * the key supposed to already be there and if nothing there - gave up with - * an error and don't try to use direct modex. - */ - - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmi1: executing put for KVS %s, key %s value %s", kvsname, key, - value); - return convert_err(rc); - } - procnum = val->data.uint32; - PMIX_VALUE_FREE(val, 1); - - for (i = 0; i < procnum; i++) { - proc.rank = i; - if (PMIX_SUCCESS == PMIx_Get(&proc, key, NULL, 0, &val) && (NULL != val) - && (PMIX_STRING == val->type)) { - strncpy(value, val->data.string, length); - PMIX_VALUE_FREE(val, 1); - return PMI_SUCCESS; - } - PMIX_VALUE_FREE(val, 1); - } - return PMI_FAIL; + return convert_err(rc); } /* Barrier only applies to our own nspace, and we want all @@ -253,28 +228,28 @@ int PMI_Barrier(void) pmix_info_t buf; int ninfo = 0; pmix_info_t *info = NULL; + bool val = 1; PMI_CHECK(); - if (data_commited) { - bool val = 1; - info = &buf; - PMIX_INFO_CONSTRUCT(info); - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL); - ninfo = 1; - } + info = &buf; + PMIX_INFO_CONSTRUCT(info); + PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL); + ninfo = 1; rc = PMIx_Fence(NULL, 0, info, ninfo); - if (NULL != info) { - PMIX_INFO_DESTRUCT(info); - } - return rc; + PMIX_INFO_DESTRUCT(info); + + return convert_err(rc); } int PMI_Get_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; + pmix_proc_t proc; + pmix_info_t info[1]; + bool val_optinal = 1; PMI_CHECK(); @@ -282,13 +257,23 @@ int PMI_Get_size(int *size) return PMI_ERR_INVALID_ARG; } - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val)) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_UNDEF; + + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_JOB_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); - return convert_err(rc); } - return PMI_FAIL; + PMIX_INFO_DESTRUCT(&info[0]); + + return convert_err(rc); } int PMI_Get_rank(int *rk) @@ -307,6 +292,9 @@ int PMI_Get_universe_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; + pmix_proc_t proc; + pmix_info_t info[1]; + bool val_optinal = 1; PMI_CHECK(); @@ -314,29 +302,56 @@ int PMI_Get_universe_size(int *size) return PMI_ERR_INVALID_ARG; } - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val)) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_UNDEF; + + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); - return convert_err(rc); } - return PMI_FAIL; + + PMIX_INFO_DESTRUCT(&info[0]); + + return convert_err(rc); } int PMI_Get_appnum(int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; + pmix_proc_t proc; + pmix_info_t info[1]; + bool val_optinal = 1; PMI_CHECK(); - if (NULL != appnum && - PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_APPNUM, NULL, 0, &val)) { - rc = convert_int(appnum, val); - PMIX_VALUE_RELEASE(val); - return convert_err(rc); + if (NULL == appnum) { + return PMI_ERR_INVALID_ARG; } - return PMI_FAIL; + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_UNDEF; + + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) { + rc = convert_int(appnum, val); + PMIX_VALUE_RELEASE(val); + } + + PMIX_INFO_DESTRUCT(&info[0]); + + return convert_err(rc); } int PMI_Publish_name(const char service_name[], const char port[]) @@ -461,24 +476,34 @@ int PMI_Get_clique_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; + pmix_info_t info[1]; + bool val_optinal = 1; PMI_CHECK(); if (NULL == size) { - return PMI_ERR_INVALID_ARGS; + return PMI_ERR_INVALID_ARG; } - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) { + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); - return convert_err(rc); } - return PMI_FAIL; + PMIX_INFO_DESTRUCT(&info[0]); + + return convert_err(rc); } int PMI_Get_clique_ranks(int ranks[], int length) { + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; char **rks; int i; @@ -498,9 +523,9 @@ int PMI_Get_clique_ranks(int ranks[], int length) } pmix_argv_free(rks); PMIX_VALUE_RELEASE(val); - return PMI_SUCCESS; } - return PMI_FAIL; + + return convert_err(rc); } int PMI_KVS_Get_my_name(char kvsname[], int length) diff --git a/opal/mca/pmix/pmix120/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix120/pmix/src/client/pmi2.c index b8394f2037..09c84901c7 100644 --- a/opal/mca/pmix/pmix120/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix120/pmix/src/client/pmi2.c @@ -37,6 +37,7 @@ #include "src/util/error.h" #include "src/util/output.h" + #define PMI2_CHECK() \ do { \ if (!pmi2_init) { \ @@ -55,6 +56,8 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_proc_t proc; + pmix_info_t info[1]; + bool val_optinal = 1; if (PMIX_SUCCESS != PMIx_Init(&myproc)) { return PMI2_ERR_INIT; @@ -65,14 +68,20 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_WILDCARD; + proc.rank = PMIX_RANK_UNDEF; + + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); if (NULL != size) { /* get the universe size - this will likely pull * down all attributes assigned to the job, thus * making all subsequent "get" operations purely * local */ - if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) { + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { @@ -80,13 +89,14 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) } } else { /* cannot continue without this info */ - return PMI2_ERR_INIT; + rc = PMIX_ERR_INIT; + goto error; } } if (NULL != spawned) { /* get the spawned flag */ - if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, NULL, 0, &val)) { + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, info, 1, &val)) { rc = convert_int(spawned, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { @@ -100,7 +110,7 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) if (NULL != appnum) { /* get our appnum */ - if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, NULL, 0, &val)) { + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val)) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { @@ -113,9 +123,11 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) } pmi2_init = 1; - return PMI2_SUCCESS; + rc = PMIX_SUCCESS; error: + PMIX_INFO_DESTRUCT(&info[0]); + return convert_err(rc); } @@ -175,20 +187,25 @@ int PMI2_KVS_Fence(void) PMI2_CHECK(); + pmix_output_verbose(3, pmix_globals.debug_output, "PMI2_KVS_Fence"); + if (PMIX_SUCCESS != (rc = PMIx_Commit())) { return convert_err(rc); } /* we want all data to be collected upon completion */ { - pmix_info_t info; - int ninfo = 1; - bool val = 1; + pmix_info_t info[1]; + bool val_data = 1; - PMIX_INFO_CONSTRUCT(&info); - PMIX_INFO_LOAD(&info, PMIX_COLLECT_DATA, &val, PMIX_BOOL); - rc = PMIx_Fence(NULL, 0, &info, ninfo); - PMIX_INFO_DESTRUCT(&info); + /* set controlling parameters + * PMIX_COLLECT_DATA - meet legacy PMI2 requirement + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &val_data, PMIX_BOOL); + + rc = PMIx_Fence(NULL, 0, &info[0], 1); + PMIX_INFO_DESTRUCT(&info[0]); } return convert_err(rc); @@ -206,10 +223,12 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id, pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_proc_t proc; - uint32_t procnum = 0; PMI2_CHECK(); + /* set default */ + *vallen = 0; + if ((NULL == key) || (NULL == value)) { return PMI2_ERR_INVALID_ARG; } @@ -219,37 +238,22 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id, (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), PMIX_MAX_NSLEN); if (src_pmi_id == PMI2_ID_NULL) { - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) { - return convert_err(rc); - } - procnum = val->data.uint32; - PMIX_VALUE_RELEASE(val); - proc.rank = 0; + /* the rank is UNDEF */ + proc.rank = PMIX_RANK_UNDEF; } else { proc.rank = src_pmi_id; } - do { - rc = PMIx_Get(&proc, key, NULL, 0, &val); - if (PMIX_SUCCESS == rc && NULL != val) { - if (PMIX_STRING != val->type) { - /* this is an error */ - PMIX_VALUE_RELEASE(val); - return PMI2_FAIL; - } - if (NULL != val->data.string) { - (void)strncpy(value, val->data.string, maxvalue); - *vallen = strlen(val->data.string); - } - PMIX_VALUE_RELEASE(val); - break; - } else if (PMIX_ERR_NOT_FOUND == rc) { - proc.rank++; - } else { - break; + rc = PMIx_Get(&proc, key, NULL, 0, &val); + if (PMIX_SUCCESS == rc && NULL != val) { + if (PMIX_STRING != val->type) { + rc = PMIX_ERROR; + } else if (NULL != val->data.string) { + (void)strncpy(value, val->data.string, maxvalue); + *vallen = strlen(val->data.string); } - } while (proc.rank < (int)procnum); + PMIX_VALUE_RELEASE(val); + } return convert_err(rc); } @@ -258,6 +262,8 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; + pmix_info_t info[1]; + bool val_optinal = 1; PMI2_CHECK(); @@ -265,15 +271,18 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo return PMI2_ERR_INVALID_ARG; } + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + *found = 0; - rc = PMIx_Get(&myproc, name, NULL, 0, &val); + rc = PMIx_Get(&myproc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { - /* this is an error */ - PMIX_VALUE_RELEASE(val); - return PMI2_FAIL; - } - if (NULL != val->data.string) { + rc = PMIX_ERROR; + } else if (NULL != val->data.string) { (void)strncpy(value, val->data.string, valuelen); *found = 1; } @@ -281,6 +290,9 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo } else if (PMIX_ERR_NOT_FOUND == rc) { rc = PMIX_SUCCESS; } + + PMIX_INFO_DESTRUCT(&info[0]); + return convert_err(rc); } @@ -307,6 +319,8 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_proc_t proc; + pmix_info_t info[1]; + bool val_optinal = 1; PMI2_CHECK(); @@ -316,17 +330,20 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_WILDCARD; + proc.rank = PMIX_RANK_UNDEF; + + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); *found = 0; - rc = PMIx_Get(&proc, name, NULL, 0, &val); + rc = PMIx_Get(&proc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { - /* this is an error */ - PMIX_VALUE_RELEASE(val); - return PMI2_FAIL; - } - if (NULL != val->data.string) { + rc = PMIX_ERROR; + } else if (NULL != val->data.string) { (void)strncpy(value, val->data.string, valuelen); *found = 1; } @@ -334,6 +351,9 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou } else if (PMIX_ERR_NOT_FOUND == rc) { rc = PMIX_SUCCESS; } + + PMIX_INFO_DESTRUCT(&info[0]); + return convert_err(rc); } @@ -482,8 +502,10 @@ int PMI2_Job_GetRank(int *rank) int PMI2_Info_GetSize(int *size) { - pmix_status_t rc = PMIX_SUCCESS; + pmix_status_t rc = PMIX_ERROR; pmix_value_t *val; + pmix_info_t info[1]; + bool val_optinal = 1; PMI2_CHECK(); @@ -491,13 +513,20 @@ int PMI2_Info_GetSize(int *size) return PMI2_ERR_INVALID_ARGS; } - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) { + /* set controlling parameters + * PMIX_OPTIONAL - expect that these keys should be available on startup + */ + PMIX_INFO_CONSTRUCT(&info[0]); + PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); - return convert_err(rc); } - return PMI2_FAIL; + PMIX_INFO_DESTRUCT(&info[0]); + + return convert_err(rc); } int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) diff --git a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c index 6a829d24b9..d22e1e132f 100644 --- a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -273,13 +273,13 @@ int PMIx_Init(pmix_proc_t *proc) /* get our effective id's */ pmix_globals.uid = geteuid(); pmix_globals.gid = getegid(); - /* default to our internal errhandler */ - pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref); /* initialize the output system */ if (!pmix_output_init()) { return PMIX_ERROR; } + /* default to our internal errhandler */ + pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref); /* see if debug is requested */ if (NULL != (evar = getenv("PMIX_DEBUG"))) { debug_level = strtol(evar, NULL, 10); @@ -1270,6 +1270,7 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; int ret, cnt; + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: regevents callback recvd"); @@ -1286,8 +1287,7 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, rc = pmix_remove_errhandler(cb->errhandler_ref); /* call the callback with error */ cb->errreg_cbfunc(PMIX_ERR_SERVER_FAILED_REQUEST, -1, cb->cbdata); - } - else { + } else { /* complete err handler registration with success status*/ pmix_output_verbose(2, pmix_globals.debug_output, "client:reg events cbfunc received status %d for errhandler %d", @@ -1303,28 +1303,36 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo, void *cbdata) { /* add err handler, process info keys and register for events and call the callback */ - int rc, index = 0; + int index = 0; pmix_buffer_t *msg; pmix_cb_t *cb; + pmix_status_t rc; + pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: register errhandler"); + "pmix: register errhandler with %d infos", (int)ninfo); + /* check if this handler is already registered if so return error */ - if (PMIX_SUCCESS == pmix_lookup_errhandler (errhandler, &index)) { - /* complete request with error status and return its original reference */ + if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(info, ninfo, &index))) { + /* complete request with error status and return its original reference */ pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register errhandler - already registered"); cbfunc(PMIX_EXISTS, index, cbdata); - + } else if (PMIX_ERR_GRP_FOUND == rc) { + /* just acknowledge it */ + cbfunc(PMIX_SUCCESS, index, cbdata); + } else if (PMIX_ERR_DFLT_FOUND == rc && NULL == info) { + /* if they are registering a default errhandler, then + * overwrite the existing one with it - the index will + * contain its location */ + pmix_add_errhandler(errhandler, info, ninfo, &index); } else { - if(PMIX_SUCCESS != (rc = pmix_add_errhandler (errhandler, info, ninfo, &index))) { + /* need to add this errhandler */ + if (PMIX_SUCCESS != (rc = pmix_add_errhandler(errhandler, info, ninfo, &index))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register errhandler - error status rc=%d", rc); /* complete request with error*/ cbfunc(rc, index, cbdata); - } - else { - /* To do: need to determine if the client needs to process the info keys before passing it to - server */ + } else { pmix_output_verbose(10, pmix_globals.debug_output, "pmix: register errhandler - added index=%d, ninfo =%lu", index, ninfo); msg = PMIX_NEW(pmix_buffer_t); @@ -1334,11 +1342,10 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo, PMIX_RELEASE(msg); pmix_remove_errhandler(index); cbfunc(PMIX_ERR_PACK_FAILURE, -1, cbdata); - } - else { + } else { /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when - * the server acks/nacks the register events request*/ + * the server acks/nacks the register events request */ pmix_output_verbose(10, pmix_globals.debug_output, "pmix: register errhandler - pack events success status=%d", rc); cb = PMIX_NEW(pmix_cb_t); @@ -1358,6 +1365,7 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; int ret, cnt =1; + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: deregevents_cbfunc recvd"); @@ -1371,7 +1379,7 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, PMIX_ERROR_LOG(rc); } - /* remove the err handler and call the error handler reg completion callback fn.*/ + /* remove the err handler and call the error handler dereg completion callback fn.*/ pmix_remove_errhandler(cb->errhandler_ref); pmix_output_verbose(2, pmix_globals.debug_output, "client:dereg events cbfunc received status %d for errhandler %d", @@ -1388,17 +1396,18 @@ void pmix_client_deregister_errhandler(int errhandler_ref, pmix_error_reg_info_t *errreg; pmix_buffer_t *msg; pmix_cb_t *cb; + pmix_output_verbose(2, pmix_globals.debug_output, "pmix_client_deregister_errhandler errhandler_ref = %d", errhandler_ref); - errreg = (pmix_error_reg_info_t *) pmix_pointer_array_get_item (&pmix_globals.errregs, errhandler_ref); + + errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, errhandler_ref); if (NULL != errreg ) { msg = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_DEREGEVENTS_CMD, errreg->info, errreg->ninfo))) { PMIX_RELEASE(msg); pmix_remove_errhandler(errhandler_ref); cbfunc(PMIX_ERR_PACK_FAILURE, cbdata); - } - else { + } else { /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the server acks/nacks the register events request*/ @@ -1409,17 +1418,18 @@ void pmix_client_deregister_errhandler(int errhandler_ref, /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb); } - } - else + } else { cbfunc(PMIX_ERR_NOT_FOUND, cbdata); + } } static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, - pmix_buffer_t *buf, void *cbdata) + pmix_buffer_t *buf, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; int ret, cnt; + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: notifyerror_cbfunc recvd"); @@ -1427,14 +1437,15 @@ static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); return; } + /* unpack the status code */ if ((PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_INT))) || (PMIX_SUCCESS != ret)) { PMIX_ERROR_LOG(rc); } - /* call the notify error completion callback fn.*/ + /* call the notify error completion callback fn.*/ pmix_output_verbose(2, pmix_globals.debug_output, "client: notified error cbfunc received status %d ", ret); @@ -1449,9 +1460,13 @@ pmix_status_t pmix_client_notify_error(pmix_status_t status, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t rc; - pmix_buffer_t *msg = PMIX_NEW(pmix_buffer_t); + pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_NOTIFY_CMD; pmix_cb_t *cb; + + /* get the message buffer */ + msg = PMIX_NEW(pmix_buffer_t); + /* pack the command */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); @@ -1484,17 +1499,22 @@ pmix_status_t pmix_client_notify_error(pmix_status_t status, goto cleanup; } } + /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the server acks/nacks the register events request*/ cb = PMIX_NEW(pmix_cb_t); cb->op_cbfunc = cbfunc; cb->cbdata = cbdata; + /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, notifyerror_cbfunc, cb); return PMIX_SUCCESS; + cleanup: PMIX_RELEASE(msg); - cbfunc(rc, cbdata); + /* never call a callback function when returning an error as + * the error tells the caller that they will never recv a + * callback */ return rc; } diff --git a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c index a9b27b0b94..8ff0d84081 100644 --- a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c @@ -56,16 +56,16 @@ #include "pmix_client_ops.h" -static pmix_buffer_t* pack_get(char *nspace, int rank, +static pmix_buffer_t* _pack_get(char *nspace, int rank, const pmix_info_t info[], size_t ninfo, pmix_cmd_t cmd); static void _getnbfn(int sd, short args, void *cbdata); -static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, +static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata); -static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata); +static void _value_cbfunc(int status, pmix_value_t *kv, void *cbdata); int PMIx_Get(const pmix_proc_t *proc, const char key[], const pmix_info_t info[], size_t ninfo, @@ -74,16 +74,6 @@ int PMIx_Get(const pmix_proc_t *proc, const char key[], pmix_cb_t *cb; int rc; - if (NULL == proc) { - return PMIX_ERR_BAD_PARAM; - } - - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: %s:%d getting value for proc %s:%d key %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - proc->nspace, proc->rank, - (NULL == key) ? "NULL" : key); - if (pmix_globals.init_cntr <= 0) { return PMIX_ERR_INIT; } @@ -93,7 +83,7 @@ int PMIx_Get(const pmix_proc_t *proc, const char key[], * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, value_cbfunc, cb))) { + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, _value_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } @@ -115,30 +105,60 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, pmix_value_cbfunc_t cbfunc, void *cbdata) { pmix_cb_t *cb; - - if (NULL == proc) { - return PMIX_ERR_BAD_PARAM; - } - - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: get_nb value for proc %s:%d key %s", - proc->nspace, proc->rank, - (NULL == key) ? "NULL" : key); + int rank; + char *nm; if (pmix_globals.init_cntr <= 0) { return PMIX_ERR_INIT; } - /* protect against bozo input */ - if (NULL == key) { + /* if the proc is NULL, then the caller is assuming + * that the key is universally unique within the caller's + * own nspace. This most likely indicates that the code + * was originally written for a legacy version of PMI. + * + * If the key is NULL, then the caller wants all + * data from the specified proc. Again, this likely + * indicates use of a legacy version of PMI. + * + * Either case is supported. However, we don't currently + * support the case where -both- values are NULL */ + if (NULL == proc && NULL == key) { return PMIX_ERR_BAD_PARAM; } + /* if the key is NULL, the rank cannot be WILDCARD as + * we cannot return all info from every rank */ + if (NULL != proc && PMIX_RANK_WILDCARD == proc->rank && NULL == key) { + return PMIX_ERR_BAD_PARAM; + } + + /* if the given proc param is NULL, or the nspace is + * empty, then the caller is referencing our own nspace */ + if (NULL == proc || 0 == strlen(proc->nspace)) { + nm = pmix_globals.myid.nspace; + } else { + nm = (char*)proc->nspace; + } + + /* if the proc param is NULL, then we are seeking a key that + * must be globally unique, so communicate this to the hash + * functions with the UNDEF rank */ + if (NULL == proc) { + rank = PMIX_RANK_UNDEF; + } else { + rank = proc->rank; + } + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: get_nb value for proc %s:%d key %s", + nm, rank, (NULL == key) ? "NULL" : key); + /* thread-shift so we can check global objects */ cb = PMIX_NEW(pmix_cb_t); cb->active = true; - (void)strncpy(cb->nspace, proc->nspace, PMIX_MAX_NSLEN); - cb->rank = proc->rank; + (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); + cb->rank = rank; cb->key = (char*)key; cb->info = (pmix_info_t*)info; cb->ninfo = ninfo; @@ -149,7 +169,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, return PMIX_SUCCESS; } -static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata) +static void _value_cbfunc(int status, pmix_value_t *kv, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; @@ -163,7 +183,7 @@ static void value_cbfunc(int status, pmix_value_t *kv, void *cbdata) cb->active = false; } -static pmix_buffer_t* pack_get(char *nspace, int rank, +static pmix_buffer_t* _pack_get(char *nspace, int rank, const pmix_info_t info[], size_t ninfo, pmix_cmd_t cmd) { @@ -209,7 +229,7 @@ static pmix_buffer_t* pack_get(char *nspace, int rank, /* this callback is coming from the usock recv, and thus * is occurring inside of our progress thread - hence, no * need to thread shift */ -static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, +static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; @@ -218,18 +238,19 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, pmix_value_t *val = NULL; int32_t cnt; pmix_buffer_t *bptr; - pmix_kval_t *kp; pmix_nspace_t *ns, *nptr; int rank; + int cur_rank; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: get_nb callback recvd"); + if (NULL == cb) { /* nothing we can do */ PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); return; } - // cache the rank + /* cache the rank */ rank = cb->rank; /* unpack the status */ @@ -262,31 +283,36 @@ static void getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, * unpack and store it in the modex - this could consist * of buffers from multiple scopes */ cnt = 1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) { + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &cur_rank, &cnt, PMIX_INT))) { + pmix_kval_t *cur_kval; + cnt = 1; - kp = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, kp, &cnt, PMIX_KVAL))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: unpacked key %s", kp->key); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cb->rank, kp))) { - PMIX_ERROR_LOG(rc); - } - if (NULL != cb->key && 0 == strcmp(cb->key, kp->key)) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: found requested value"); - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, kp->value, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kp); - val = NULL; - goto done; - } - } - PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain + if (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) { cnt = 1; - kp = PMIX_NEW(pmix_kval_t); + cur_kval = PMIX_NEW(pmix_kval_t); + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, cur_kval, &cnt, PMIX_KVAL))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: unpacked key %s", cur_kval->key); + if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cur_rank, cur_kval))) { + PMIX_ERROR_LOG(rc); + } + if (NULL != cb->key && 0 == strcmp(cb->key, cur_kval->key)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: found requested value"); + if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, cur_kval->value, PMIX_VALUE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(cur_kval); + val = NULL; + goto done; + } + } + PMIX_RELEASE(cur_kval); // maintain acctg - hash_store does a retain + cnt = 1; + cur_kval = PMIX_NEW(pmix_kval_t); + } + cnt = 1; + PMIX_RELEASE(cur_kval); } - cnt = 1; - PMIX_RELEASE(kp); PMIX_RELEASE(bptr); // free's the data region if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); @@ -338,28 +364,21 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_cb_t *cbret; pmix_buffer_t *msg; pmix_value_t *val; + pmix_info_t *info, *iptr; + pmix_pointer_array_t results; pmix_status_t rc; - char *nm; pmix_nspace_t *ns, *nptr; - size_t n; + size_t n, nvals; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: getnbfn value for proc %s:%d key %s", cb->nspace, cb->rank, (NULL == cb->key) ? "NULL" : cb->key); - /* if the nspace is empty, then the caller is referencing - * our own nspace */ - if (0 == strlen(cb->nspace)) { - nm = pmix_globals.myid.nspace; - } else { - nm = (char*)cb->nspace; - } - /* find the nspace object */ nptr = NULL; PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(nm, ns->nspace)) { + if (0 == strcmp(cb->nspace, ns->nspace)) { nptr = ns; break; } @@ -370,13 +389,105 @@ static void _getnbfn(int fd, short flags, void *cbdata) * server has never heard of it, the server will return * an error */ nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nptr->nspace, nm, PMIX_MAX_NSLEN); + (void)strncpy(nptr->nspace, cb->nspace, PMIX_MAX_NSLEN); pmix_list_append(&pmix_globals.nspaces, &nptr->super); /* there is no point in looking for data in this nspace * object, so let's just go generate the request */ goto request; } + /* if the key is NULL, then we have to check both the job-data + * and the modex tables. If we don't yet have the modex data, + * then we are going to have to go get it. So let's check that + * case first */ + if (NULL == cb->key) { + PMIX_CONSTRUCT(&results, pmix_pointer_array_t); + pmix_pointer_array_init(&results, 2, INT_MAX, 1); + nvals = 0; + /* if the rank is WILDCARD, then they want all the job-level info, + * so no need to check the modex */ + if (PMIX_RANK_WILDCARD != cb->rank) { + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: value retrieved from dstore"); + /* since we didn't provide them with a key, the hash function + * must return the results in the pmix_info_array field of the + * value */ + if (NULL == val || PMIX_INFO_ARRAY != val->type) { + /* this is an error */ + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + cb->value_cbfunc(PMIX_ERR_BAD_PARAM, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } + /* save the results */ + info = (pmix_info_t*)val->data.array.array; + for (n=0; n < val->data.array.size; n++) { + pmix_pointer_array_add(&results, &info[n]); + ++nvals; + } + val->data.array.array = NULL; // protect the data + val->data.array.size = 0; + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + } else { + /* if we didn't find a modex for this rank, then we need + * to go get it. Recall that the NULL==key scenario only + * pertains to cases where legacy PMI methods are being + * employed. Thus, the caller wants -all- information for + * the specified rank, not just the job-level info. */ + goto request; + } + } + /* now get any data from the job-level info */ + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, NULL, &val))) { + /* since we didn't provide them with a key, the hash function + * must return the results in the pmix_info_array field of the + * value */ + if (NULL == val || PMIX_INFO_ARRAY != val->type) { + /* this is an error */ + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + cb->value_cbfunc(PMIX_ERR_BAD_PARAM, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } + /* save the results */ + info = (pmix_info_t*)val->data.array.array; + for (n=0; n < val->data.array.size; n++) { + pmix_pointer_array_add(&results, &info[n]); + ++nvals; + } + val->data.array.array = NULL; // protect the data + val->data.array.size = 0; + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + } + /* now let's package up the results */ + PMIX_VALUE_CREATE(val, 1); + val->type = PMIX_INFO_ARRAY; + val->data.array.size = nvals; + PMIX_INFO_CREATE(iptr, nvals); + val->data.array.array = (struct pmix_info_t*)iptr; + for (n=0; n < (size_t)results.size && n < nvals; n++) { + if (NULL != (info = (pmix_info_t*)pmix_pointer_array_get_item(&results, n))) { + (void)strncpy(iptr[n].key, info->key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&iptr[n].value, &info->value); + PMIX_INFO_FREE(info, 1); + } + } + /* done with results array */ + PMIX_DESTRUCT(&results); + /* return the result to the caller */ + cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata); + PMIX_VALUE_FREE(val, 1); + PMIX_RELEASE(cb); + return; + } + /* the requested data could be in the job-data table, so let's * just check there first. */ if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, cb->key, &val))) { @@ -433,7 +544,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) * the error */ pmix_output_verbose(2, pmix_globals.debug_output, "Error requesting key=%s for rank = %d, namespace = %s", - cb->key, cb->rank, nm); + cb->key, cb->rank, cb->nspace); cb->value_cbfunc(rc, NULL, cb->cbdata); /* protect the data */ cb->procs = NULL; @@ -461,7 +572,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* they don't want us to try and retrieve it */ pmix_output_verbose(2, pmix_globals.debug_output, "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", - cb->key, cb->rank, nm); + cb->key, cb->rank, cb->nspace); cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); PMIX_RELEASE(cb); return; @@ -472,7 +583,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) * this nspace:rank. If we do, then no need to ask again as the * request will return _all_ data from that proc */ PMIX_LIST_FOREACH(cbret, &pmix_client_globals.pending_requests, pmix_cb_t) { - if (0 == strncmp(cbret->nspace, nm, PMIX_MAX_NSLEN) && + if (0 == strncmp(cbret->nspace, cb->nspace, PMIX_MAX_NSLEN) && cbret->rank == cb->rank) { /* we do have a pending request, but we still need to track this * outstanding request so we can satisfy it once the data is returned */ @@ -483,7 +594,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* we don't have a pending request, so let's create one - don't worry * about packing the key as we return everything from that proc */ - msg = pack_get(nm, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD); + msg = _pack_get(cb->nspace, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD); if (NULL == msg) { cb->value_cbfunc(PMIX_ERROR, NULL, cb->cbdata); PMIX_RELEASE(cb); @@ -496,5 +607,5 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, getnb_cbfunc, cb); + PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, _getnb_cbfunc, cb); } diff --git a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_spawn.c index afad908228..261177a4e5 100644 --- a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_spawn.c @@ -150,10 +150,12 @@ pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t ninfo, PMIX_RELEASE(msg); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, apps, napps, PMIX_APP))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - return rc; + if (0 < napps) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, apps, napps, PMIX_APP))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } } /* create a callback object as we need to pass it to the diff --git a/opal/mca/pmix/pmix120/pmix/src/common/pmix_common.c b/opal/mca/pmix/pmix120/pmix/src/common/pmix_common.c index c2dc6f795c..ddc07ea635 100644 --- a/opal/mca/pmix/pmix120/pmix/src/common/pmix_common.c +++ b/opal/mca/pmix/pmix120/pmix/src/common/pmix_common.c @@ -43,10 +43,10 @@ void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, * call pmix_server_register_for_events, and call cbfunc with * reference to the errhandler */ pmix_output_verbose(2, pmix_globals.debug_output, - "registering client err handler"); + "registering client err handler with %d info", (int)ninfo); pmix_client_register_errhandler(info, ninfo, - errhandler, - cbfunc, cbdata); + errhandler, + cbfunc, cbdata); } } @@ -80,17 +80,17 @@ pmix_status_t PMIx_Notify_error(pmix_status_t status, int rc; if (pmix_globals.server) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_server_notify_error error =%d, rc=%d", status, rc); rc = pmix_server_notify_error(status, procs, nprocs, error_procs, error_nprocs, info, ninfo, - cbfunc, cbdata); - pmix_output_verbose(0, pmix_globals.debug_output, - "pmix_server_notify_error error =%d, rc=%d", status, rc); + cbfunc, cbdata); } else { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_client_notify_error error =%d, rc=%d", status, rc); rc = pmix_client_notify_error(status, procs, nprocs, error_procs, error_nprocs, info, ninfo, cbfunc, cbdata); - pmix_output_verbose(0, pmix_globals.debug_output, - "pmix_client_notify_error error =%d, rc=%d", status, rc); } return rc; } diff --git a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c index 3ffe95a4ba..da332c752a 100644 --- a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . @@ -56,7 +56,8 @@ void pmix_globals_init(void) { memset(&pmix_globals.myid, 0, sizeof(pmix_proc_t)); PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t); - pmix_pointer_array_init(&pmix_globals.errregs, 1, PMIX_MAX_ERROR_REGISTRATIONS, 1); + PMIX_CONSTRUCT(&pmix_globals.errregs, pmix_pointer_array_t); + pmix_pointer_array_init(&pmix_globals.errregs, 16, PMIX_MAX_ERROR_REGISTRATIONS, 16); } void pmix_globals_finalize(void) @@ -68,6 +69,7 @@ void pmix_globals_finalize(void) if (NULL != pmix_globals.cache_remote) { PMIX_RELEASE(pmix_globals.cache_remote); } + PMIX_DESTRUCT(&pmix_globals.errregs); } @@ -158,6 +160,7 @@ PMIX_CLASS_INSTANCE(pmix_rank_info_t, static void errcon(pmix_error_reg_info_t *p) { + p->sglhdlr = false; p->errhandler = NULL; p->info = NULL; p->ninfo = 0; @@ -165,7 +168,9 @@ static void errcon(pmix_error_reg_info_t *p) static void errdes(pmix_error_reg_info_t *p) { p->errhandler = NULL; - // PMIX_INFO_FREE(p->info, p->ninfo); + if (NULL != p->info) { + PMIX_INFO_FREE(p->info, p->ninfo); + } } PMIX_CLASS_INSTANCE(pmix_error_reg_info_t, pmix_object_t, diff --git a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h index 664cbe2d12..3f7b83fd0d 100644 --- a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,6 +44,7 @@ BEGIN_C_DECLS /* define a structure for tracking error registrations */ typedef struct { pmix_object_t super; + bool sglhdlr; // registers a specific error status handler pmix_notification_fn_t errhandler; /* registered err handler callback fn */ pmix_info_t *info; /* error info keys registered with the handler */ size_t ninfo; /* size of info */ diff --git a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c index 81fcfb4f5e..706451d317 100644 --- a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . @@ -241,6 +241,8 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.client_eventregs, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t); + PMIX_CONSTRUCT(&pmix_server_globals.notifications, pmix_ring_buffer_t); + pmix_ring_buffer_init(&pmix_server_globals.notifications, 256); /* see if debug is requested */ if (NULL != (evar = getenv("PMIX_DEBUG"))) { @@ -1111,6 +1113,7 @@ static bool match_error_registration(pmix_regevents_info_t *reginfoptr, pmix_not static void _notify_error(int sd, short args, void *cbdata) { pmix_notify_caddy_t *cd = (pmix_notify_caddy_t*)cbdata; + pmix_notify_caddy_t *rbout; pmix_status_t rc; pmix_cmd_t cmd = PMIX_NOTIFY_CMD; int i; @@ -1119,25 +1122,27 @@ static void _notify_error(int sd, short args, void *cbdata) pmix_regevents_info_t *reginfoptr; bool notify, notifyall; - pmix_output_verbose(0, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_globals.debug_output, "pmix_server: _notify_error notifying client of error %d", cd->status); - /* pack the command */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); goto cleanup; } + /* pack the status */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->status, 1, PMIX_INT))) { PMIX_ERROR_LOG(rc); goto cleanup; } + /* pack the error procs */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->error_nprocs, 1, PMIX_SIZE))) { PMIX_ERROR_LOG(rc); goto cleanup; } + if (0 < cd->error_nprocs) { if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, cd->error_procs, cd->error_nprocs, PMIX_PROC))) { PMIX_ERROR_LOG(rc); @@ -1145,74 +1150,37 @@ static void _notify_error(int sd, short args, void *cbdata) } } + /* pack the info */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cd->ninfo, 1, PMIX_SIZE))) { PMIX_ERROR_LOG(rc); goto cleanup; } - if (0 < cd->ninfo) { + + if (0 < cd->ninfo) { if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, cd->info, cd->ninfo, PMIX_INFO))) { PMIX_ERROR_LOG(rc); goto cleanup; } } - /* if the RM gave us a NULL proc list, then we are notifying everyone */ - if (NULL == cd->procs) { - notifyall = true; - } else { - notifyall = false; + + /* we cannot know if everyone who wants this notice has had a chance + * to register for it - the notice may be coming too early. So cache + * the message until all local procs have received it, or it ages to + * the point where it gets pushed out by more recent events */ + PMIX_RETAIN(cd); + rbout = pmix_ring_buffer_push(&pmix_server_globals.notifications, cd); + + /* if an older event was bumped, release it */ + if (NULL != rbout) { + PMIX_RELEASE(rbout); } - /* cycle across our connected clients and send the message to + /* cycle across our registered events and send the message to * any within the specified proc array */ - for (i=0; i < pmix_server_globals.clients.size; i++) { - if (NULL == (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) { - continue; - } - if (!notifyall) { - /* check to see if this proc matches that of one in the specified array */ - notify = false; - for (j=0; j < cd->nprocs; j++) { - if (0 != strncmp(peer->info->nptr->nspace, cd->procs[j].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == cd->procs[j].rank || - cd->procs[j].rank == peer->info->rank) { - notify = true; - break; - } - } - if (!notify) { - /* if we are not notifying everyone, and this proc isn't to - * be notified, then just continue the main loop */ - continue; - } - } - - /* get the client's error registration and check if client - * requested notification of this error */ - reginfoptr = NULL; - notify = false; - PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.client_eventregs, pmix_regevents_info_t) { - if (reginfoptr->peer == peer) { - /* check if the client has registered for this error - * by parsing the info keys */ - notify = match_error_registration(reginfoptr, cd); - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_server _notify_error - match error registration returned notify =%d ", notify); - } - if (notify) { - break; - } - } - if (notify) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_server: _notify_error - notifying process rank %d error %d", - peer->info->rank, cd->status); - PMIX_RETAIN(cd->buf); - PMIX_SERVER_QUEUE_REPLY(peer, 0, cd->buf); - } + PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.client_eventregs, pmix_regevents_info_t) { + pmix_server_check_notifications(reginfoptr, cd); } cleanup: @@ -1220,7 +1188,7 @@ static void _notify_error(int sd, short args, void *cbdata) if (NULL != cd->cbfunc) { cd->cbfunc(rc, cd->cbdata); } - PMIX_RELEASE(cd); + PMIX_RELEASE(cd); } pmix_status_t pmix_server_notify_error(pmix_status_t status, @@ -1234,12 +1202,32 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status, cd = PMIX_NEW(pmix_notify_caddy_t); cd->status = status; - cd->procs = procs; - cd->nprocs = nprocs; - cd->error_procs = error_procs; - cd->error_nprocs = error_nprocs; - cd->info = info; - cd->ninfo = ninfo; + /* have to copy the info here as we may have to cache this + * notification until procs have a chance to register for it */ + if (NULL != procs) { + cd->nprocs = nprocs; + PMIX_PROC_CREATE(cd->procs, cd->nprocs); + for (n=0; n < cd->nprocs; n++) { + (void)strncpy(cd->procs[n].nspace, procs[n].nspace, PMIX_MAX_NSLEN); + cd->procs[n].rank = procs[n].rank; + } + } + if (NULL != error_procs) { + cd->error_nprocs = error_nprocs; + PMIX_PROC_CREATE(cd->error_procs, cd->error_nprocs); + for (n=0; n < cd->error_nprocs; n++) { + (void)strncpy(cd->error_procs[n].nspace, error_procs[n].nspace, PMIX_MAX_NSLEN); + cd->error_procs[n].rank = error_procs[n].rank; + } + } + if (NULL != info) { + cd->ninfo = ninfo; + PMIX_INFO_CREATE(cd->info, cd->ninfo); + for (n=0; n < cd->ninfo; n++) { + PMIX_INFO_LOAD(&cd->info[n], info[n].key, + &info[n].value.data, info[n].value.type); + } + } cd->cbfunc = cbfunc; cd->cbdata = cbdata; @@ -1253,33 +1241,75 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status, return PMIX_SUCCESS; } +void pmix_server_check_notifications(pmix_regevents_info_t *reginfo, + pmix_notify_caddy_t *cd) +{ + bool notify; + size_t j; + + /* if the RM gave us a NULL proc list, then we are notifying everyone */ + if (NULL != cd->procs) { + /* check to see if this proc matches that of one in the specified array */ + notify = false; + for (j=0; j < cd->nprocs; j++) { + if (0 != strncmp(reginfo->peer->info->nptr->nspace, cd->procs[j].nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == cd->procs[j].rank || + cd->procs[j].rank == reginfo->peer->info->rank) { + notify = true; + break; + } + } + if (!notify) { + /* if we are not notifying everyone, and this proc isn't to + * be notified, so just return */ + return; + } + } + /* check if the client has registered for this error + * by parsing the info keys */ + if (match_error_registration(reginfo, cd)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_server: check notifications - notifying process rank %d error %d", + reginfo->peer->info->rank, cd->status); + PMIX_RETAIN(cd->buf); + PMIX_SERVER_QUEUE_REPLY(reginfo->peer, 0, cd->buf); + } + +} static void reg_errhandler(int sd, short args, void *cbdata) { int index = 0; pmix_status_t rc; pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; + pmix_notify_caddy_t *rb; /* check if this handler is already registered if so return error */ - if (PMIX_SUCCESS == pmix_lookup_errhandler(cd->err, &index)) { - /* complete request with error status and return its original reference */ + if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(cd->info, cd->ninfo, &index))) { + /* complete request with error status and return its original reference */ pmix_output_verbose(2, pmix_globals.debug_output, "pmix_server_register_errhandler error - hdlr already registered index = %d", index); - cd->cbfunc.errregcbfn(PMIX_EXISTS, index, cd->cbdata); } else { rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index); pmix_output_verbose(2, pmix_globals.debug_output, "pmix_server_register_errhandler - success index =%d", index); - cd->cbfunc.errregcbfn(rc, index, cd->cbdata); } - cd->active = false; + /* cycle across any cached notifications and see if any are + * pending for us and match this description */ + + /* acknowledge the registration so the caller can release + * their data */ + cd->cbfunc.errregcbfn(rc, index, cd->cbdata); + PMIX_RELEASE(cd); } void pmix_server_register_errhandler(pmix_info_t info[], size_t ninfo, - pmix_notification_fn_t errhandler, - pmix_errhandler_reg_cbfunc_t cbfunc, - void *cbdata) + pmix_notification_fn_t errhandler, + pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata) { pmix_shift_caddy_t *cd; @@ -2161,44 +2191,51 @@ static void cnct_cbfunc(pmix_status_t status, void *cbdata) PMIX_THREADSHIFT(scd, _cnct); } -void regevents_cbfunc (pmix_status_t status, void *cbdata) +void regevents_cbfunc(pmix_status_t status, void *cbdata) { pmix_status_t rc; pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata; pmix_regevents_info_t *reginfo, *reginfo_next; pmix_buffer_t *reply; + pmix_output_verbose(2, pmix_globals.debug_output, "server:regevents_cbfunc called status = %d", status); + if (PMIX_SUCCESS != status) { /* need to delete the stored event reg info when server - nacks reg events request */ + * nacks reg events request */ PMIX_LIST_FOREACH_SAFE(reginfo, reginfo_next, &pmix_server_globals.client_eventregs, pmix_regevents_info_t) { - if(reginfo->peer == cd->peer) { - pmix_list_remove_item (&pmix_server_globals.client_eventregs, - ®info->super); + if (reginfo->peer == cd->peer) { + pmix_list_remove_item(&pmix_server_globals.client_eventregs, + ®info->super); PMIX_RELEASE(reginfo); break; } } } reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) { PMIX_ERROR_LOG(rc); + } // send reply PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); PMIX_RELEASE(cd); } -static void deregevents_cbfunc (pmix_status_t status, void *cbdata) +static void deregevents_cbfunc(pmix_status_t status, void *cbdata) { pmix_status_t rc; pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata; pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t); + pmix_output_verbose(2, pmix_globals.debug_output, "server:deregevents_cbfunc called status = %d", status); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) + + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) { PMIX_ERROR_LOG(rc); + } + // send reply PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); PMIX_RELEASE(cd); @@ -2209,10 +2246,14 @@ static void notifyerror_cbfunc (pmix_status_t status, void *cbdata) pmix_status_t rc; pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata; pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t); + pmix_output_verbose(2, pmix_globals.debug_output, "server:notifyerror_cbfunc called status = %d", status); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) + + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_INT))) { PMIX_ERROR_LOG(rc); + } + // send reply PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); PMIX_RELEASE(cd); @@ -2387,6 +2428,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, } return rc; } + if (PMIX_DEREGEVENTS_CMD == cmd) { PMIX_PEER_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_deregister_events(peer, buf, deregevents_cbfunc, cd))) { @@ -2395,6 +2437,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, } return rc; } + if (PMIX_NOTIFY_CMD == cmd) { PMIX_PEER_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_notify_error_client(peer, buf, notifyerror_cbfunc, cd))) { @@ -2402,6 +2445,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, } return rc; } + return PMIX_ERR_NOT_SUPPORTED; } diff --git a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_get.c index 9d48269723..a0989dc055 100644 --- a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_get.c @@ -215,6 +215,20 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, return rc; } + /* do not force dmodex logic for non-specific ranks + * let return not found status instead of doing fence with + * data exchange. User can make a decision to do such call getting + * not found status + */ + if (PMIX_RANK_UNDEF == rank || PMIX_RANK_WILDCARD == rank) { + pmix_output_verbose(2, pmix_globals.debug_output, + "%s:%d not found data for namespace = %s, rank = %d " + "(do not request resource manager server for non-specified rank)", + pmix_globals.myid.nspace, + pmix_globals.myid.rank, nspace, rank); + return PMIX_ERR_NOT_FOUND; + } + /* If we get here, then we don't have the data at this time. Check * to see if we already have a pending request for the data - if * we do, then we can just wait for it to arrive */ @@ -362,31 +376,52 @@ static pmix_status_t _satisfy_request(pmix_hash_table_t *ht, int rank, pmix_value_t *val; char *data; size_t sz; + int cur_rank; + int found = 0; pmix_buffer_t xfer, pbkt, *xptr; + void *last; /* check to see if this data already has been * obtained as a result of a prior direct modex request from * a remote peer, or due to data from a local client * having been committed */ - rc = pmix_hash_fetch(ht, rank, "modex", &val); - if (PMIX_SUCCESS == rc && NULL != val) { - /* the client is expecting this to arrive as a byte object - * containing a buffer, so package it accordingly */ - PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - PMIX_CONSTRUCT(&xfer, pmix_buffer_t); - xptr = &xfer; - PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); - pmix_bfrop.pack(&pbkt, &xptr, 1, PMIX_BUFFER); - xfer.base_ptr = NULL; // protect the passed data - xfer.bytes_used = 0; - PMIX_DESTRUCT(&xfer); - PMIX_UNLOAD_BUFFER(&pbkt, data, sz); - PMIX_DESTRUCT(&pbkt); - PMIX_VALUE_RELEASE(val); - /* pass it back */ - cbfunc(rc, data, sz, cbdata, relfn, data); - return rc; + cur_rank = rank; + if (PMIX_RANK_UNDEF == rank) { + rc = pmix_hash_fetch_by_key(ht, "modex", &cur_rank, &val, &last); + } else { + rc = pmix_hash_fetch(ht, cur_rank, "modex", &val); } + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + while (PMIX_SUCCESS == rc) { + if (NULL != val) { + pmix_bfrop.pack(&pbkt, &cur_rank, 1, PMIX_INT); + /* the client is expecting this to arrive as a byte object + * containing a buffer, so package it accordingly */ + PMIX_CONSTRUCT(&xfer, pmix_buffer_t); + xptr = &xfer; + PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); + PMIX_VALUE_RELEASE(val); + pmix_bfrop.pack(&pbkt, &xptr, 1, PMIX_BUFFER); + xfer.base_ptr = NULL; // protect the passed data + xfer.bytes_used = 0; + PMIX_DESTRUCT(&xfer); + found++; + } + if (PMIX_RANK_UNDEF == rank) { + rc = pmix_hash_fetch_by_key(ht, NULL, &cur_rank, &val, &last); + } else { + break; + } + } + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); + PMIX_DESTRUCT(&pbkt); + + if (found) { + /* pass it back */ + cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data); + return PMIX_SUCCESS; + } + return PMIX_ERR_NOT_FOUND; } diff --git a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.c index 25434ad0e2..e216cd4a4f 100644 --- a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . @@ -132,7 +132,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) pmix_nspace_t *nptr; pmix_rank_info_t *info; pmix_dmdx_remote_t *dcd, *dcdnext; - pmix_buffer_t pbkt; + pmix_buffer_t *pbkt; pmix_value_t *val; char *data; size_t sz; @@ -141,6 +141,12 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) info = peer->info; nptr = info->nptr; + pmix_output_verbose(2, pmix_globals.debug_output, + "%s:%d EXECUTE COMMIT FOR %s:%d", + pmix_globals.myid.nspace, + pmix_globals.myid.rank, + nptr->nspace, info->rank); + /* this buffer will contain one or more buffers, each * representing a different scope. These need to be locally * stored separately so we can provide required data based @@ -162,17 +168,39 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) PMIX_ERROR_LOG(rc); return rc; } - kp = PMIX_NEW(pmix_kval_t); - kp->key = strdup("modex"); - PMIX_VALUE_CREATE(kp->value, 1); - kp->value->type = PMIX_BYTE_OBJECT; - PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); - PMIX_RELEASE(b2); - /* store it in the appropriate hash */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { - PMIX_ERROR_LOG(rc); + /* see if we already have info for this proc */ + if (PMIX_SUCCESS == pmix_hash_fetch(ht, info->rank, "modex", &val) && NULL != val) { + /* create the new data storage */ + kp = PMIX_NEW(pmix_kval_t); + kp->key = strdup("modex"); + PMIX_VALUE_CREATE(kp->value, 1); + kp->value->type = PMIX_BYTE_OBJECT; + /* get space for the new new data blob */ + kp->value->data.bo.bytes = (char*)malloc(b2->bytes_used + val->data.bo.size); + memcpy(kp->value->data.bo.bytes, val->data.bo.bytes, val->data.bo.size); + memcpy(kp->value->data.bo.bytes+val->data.bo.size, b2->base_ptr, b2->bytes_used); + kp->value->data.bo.size = val->data.bo.size + b2->bytes_used; + /* release the storage */ + PMIX_VALUE_FREE(val, 1); + /* store it in the appropriate hash */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { + PMIX_ERROR_LOG(rc); + } + PMIX_RELEASE(kp); // maintain acctg + } else { + /* create a new kval to hold this data */ + kp = PMIX_NEW(pmix_kval_t); + kp->key = strdup("modex"); + PMIX_VALUE_CREATE(kp->value, 1); + kp->value->type = PMIX_BYTE_OBJECT; + PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); + PMIX_RELEASE(b2); + /* store it in the appropriate hash */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { + PMIX_ERROR_LOG(rc); + } + PMIX_RELEASE(kp); // maintain acctg } - PMIX_RELEASE(kp); // maintain acctg cnt = 1; } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { @@ -191,16 +219,16 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) if (dcd->cd->proc.rank == info->rank) { /* we can now fulfill this request - collect the * remote/global data from this proc */ - PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + pbkt = PMIX_NEW(pmix_buffer_t); /* get any remote contribution - note that there * may not be a contribution */ if (PMIX_SUCCESS == pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val) && NULL != val) { - PMIX_LOAD_BUFFER(&pbkt, val->data.bo.bytes, val->data.bo.size); + PMIX_LOAD_BUFFER(pbkt, val->data.bo.bytes, val->data.bo.size); PMIX_VALUE_RELEASE(val); } - PMIX_UNLOAD_BUFFER(&pbkt, data, sz); - PMIX_DESTRUCT(&pbkt); + PMIX_UNLOAD_BUFFER(pbkt, data, sz); + PMIX_RELEASE(pbkt); /* execute the callback */ dcd->cd->cbfunc(PMIX_SUCCESS, data, sz, dcd->cd->cbdata); if (NULL != data) { @@ -805,7 +833,7 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, PMIX_ERROR_LOG(rc); return rc; } - /* unpack the array of apps */ + /* unpack the array of directives */ if (0 < ninfo) { PMIX_INFO_CREATE(info, ninfo); cnt=ninfo; @@ -958,6 +986,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, pmix_info_t *info = NULL; size_t ninfo, n; pmix_regevents_info_t *reginfo; + pmix_notify_caddy_t *cd; pmix_output_verbose(2, pmix_globals.debug_output, "recvd register events"); @@ -995,13 +1024,20 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, pmix_output_verbose(2, pmix_globals.debug_output, "server register events: calling host server reg events"); /* call the local server */ - if(PMIX_SUCCESS != (rc = pmix_host_server.register_events(reginfo->info, - reginfo->ninfo, cbfunc, cbdata))) - { - + if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(reginfo->info, + reginfo->ninfo, cbfunc, cbdata))) { pmix_output_verbose(2, pmix_globals.debug_output, "server register events: host server reg events returned rc =%d", rc); } + + /* check if any matching notifications have been cached */ + for (n=0; n < pmix_server_globals.notifications.size; n++) { + if (NULL == (cd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_server_globals.notifications, n))) { + break; + } + pmix_server_check_notifications(reginfo, cd); + } + cleanup: pmix_output_verbose(2, pmix_globals.debug_output, "server register events: ninfo =%lu rc =%d", ninfo, rc); @@ -1197,6 +1233,15 @@ static void ncon(pmix_notify_caddy_t *p) } static void ndes(pmix_notify_caddy_t *p) { + if (NULL != p->procs) { + PMIX_PROC_FREE(p->procs, p->nprocs); + } + if (NULL != p->error_procs) { + PMIX_PROC_FREE(p->error_procs, p->error_nprocs); + } + if (NULL != p->info) { + PMIX_INFO_FREE(p->info, p->ninfo); + } if (NULL != p->buf) { PMIX_RELEASE(p->buf); } @@ -1261,3 +1306,4 @@ static void regdes(pmix_regevents_info_t *p) PMIX_CLASS_INSTANCE(pmix_regevents_info_t, pmix_list_item_t, regcon, regdes); + diff --git a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h index ec1595a413..fe0d866269 100644 --- a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "src/usock/usock.h" #include "src/util/hash.h" @@ -157,6 +158,7 @@ typedef struct { int stop_thread[2]; // pipe used to stop listener thread pmix_buffer_t gdata; // cache of data given to me for passing to all clients pmix_list_t client_eventregs; // list of registered events per client. + pmix_ring_buffer_t notifications; // ring buffer of pending notifications } pmix_server_globals_t; #define PMIX_PEER_CADDY(c, p, t) \ @@ -272,6 +274,8 @@ pmix_status_t pmix_server_notify_error_client(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, void *cbdata); +void pmix_server_check_notifications(pmix_regevents_info_t *reginfo, + pmix_notify_caddy_t *cd); void regevents_cbfunc (pmix_status_t status, void *cbdata); diff --git a/opal/mca/pmix/pmix120/pmix/src/util/error.c b/opal/mca/pmix/pmix120/pmix/src/util/error.c index b837fa6414..30d44fda7c 100644 --- a/opal/mca/pmix/pmix120/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix120/pmix/src/util/error.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -137,10 +137,16 @@ const char* PMIx_Error_string(pmix_status_t errnum) return "PROC-ABORT-REQUESTED"; case PMIX_ERR_PROC_ABORTED: return "PROC-ABORTED"; + case PMIX_ERR_DEBUGGER_RELEASE: + return "DEBUGGER-RELEASE"; case PMIX_ERR_SILENT: return "SILENT_ERROR"; case PMIX_ERROR: return "ERROR"; + case PMIX_ERR_GRP_FOUND: + return "GROUP-FOUND"; + case PMIX_ERR_DFLT_FOUND: + return "DEFAULT-FOUND"; case PMIX_SUCCESS: return "SUCCESS"; @@ -162,15 +168,26 @@ void pmix_errhandler_invoke(pmix_status_t status, pmix_error_reg_info_t *errreg, *errdflt=NULL; pmix_info_t *iptr; + /* we will need to provide the errhandler reference id when + * we provide the callback. Since the callback function doesn't + * provide a param for that purpose, we have to add it to any + * info array that came from the RM, so extend the array by 1 */ PMIX_INFO_CREATE(iptr, ninfo+1); + /* put the reference id in the first location */ (void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN); iptr[0].value.type = PMIX_INT; + /* we don't know the reference id yet, but we'll fill that in + * later - for now, just copy the incoming info array across */ if (NULL != info) { for (j=0; j < ninfo; j++) { PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type); } } + /* search our array of errhandlers for a match. We take any specific + * error status first, then take the group of the incoming status next. + * If neither of those have been registered, then use any default + * errhandler - otherwise, ignore it */ for (i = 0; i < pmix_globals.errregs.size; i++) { if (NULL == (errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item(&pmix_globals.errregs, i))) { continue; @@ -194,7 +211,7 @@ void pmix_errhandler_invoke(pmix_status_t status, break; } } - if (!exact_match) { + if (!exact_match && NULL != info) { /* if no exact match was found, then we will fire the errhandler * for any matching info key. This may be too lax and need to be adjusted * later */ @@ -217,22 +234,102 @@ void pmix_errhandler_invoke(pmix_status_t status, PMIX_INFO_FREE(iptr, ninfo+1); } -pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err, +/* lookup an errhandler during registration */ +pmix_status_t pmix_lookup_errhandler(pmix_info_t info[], size_t ninfo, int *index) { - int i; - pmix_status_t rc = PMIX_ERR_NOT_FOUND; - pmix_error_reg_info_t *errreg = NULL; + int i, idflt=-1, igrp=-1; + pmix_error_reg_info_t *errreg; + size_t sz, n; + char errgrp[PMIX_MAX_KEYLEN]; + bool exact_given = false; + int given = -1; + pmix_status_t status; + char *grp; - for (i = 0; i < pmix_pointer_array_get_size(&pmix_globals.errregs) ; i++) { - errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i); - if ((NULL != errreg) && (err == errreg->errhandler)) { - *index = i; - rc = PMIX_SUCCESS; - break; + /* scan the incoming specification to see if it is a general errhandler, + * a group errhandler, or an error handler for a specific status. Only + * one of these options can be specified! */ + if (NULL == info) { + /* this is the general error handler */ + given = 0; + } else { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) { + /* this is a specific errhandler */ + given = 1; + status = info[n].value.data.integer; + break; + } else if (0 == strcmp(info[n].key, "pmix.errgroup")) { + /* this is a group errhandler */ + given = 2; + grp = info[n].value.data.string; + break; + } } } - return rc; + + /* search our array of errhandlers for a match */ + for (i = 0; i < pmix_globals.errregs.size ; i++) { + errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i); + if (NULL == errreg) { + continue; + } + if (NULL == errreg->info) { + /* this is the general errhandler - if they gave us + * another general errhandler, then we should + * replace it */ + if (0 == given) { + *index = i; + return PMIX_ERR_DFLT_FOUND; + } + /* save this spot as we will default to it if nothing else is found */ + idflt = i; + continue; + } + if (0 == given) { + /* they are looking for the general errhandler */ + continue; + } + /* if this registration is for a single specific errhandler, then + * see if the incoming one matches */ + if (1 == given && errreg->sglhdlr) { + for (sz=0; sz < errreg->ninfo; sz++) { + if (0 == strncmp(errreg->info[sz].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) { + if (status == errreg->info[sz].value.data.integer) { + /* we have an exact match - return this errhandler and + * let the caller know it was an exact match */ + *index = i; + return PMIX_EXISTS; + } + } + } + } else if (2 == given && !errreg->sglhdlr) { + /* this registration is for a group, so check that case */ + + } + } + + /* if we get here, then no match was found. If they + * gave us a specific error, then we have to return not_found */ + if (exact_given) { + return PMIX_ERR_NOT_FOUND; + } + + /* If we have a group match, then that takes precedence */ + if (0 <= igrp) { + *index = igrp; + return PMIX_ERR_GRP_FOUND; + } + + /* if we found a default errhandler, then use it */ + if (0 <= idflt) { + *index = idflt; + return PMIX_ERR_DFLT_FOUND; + } + + /* otherwise, it wasn't found */ + return PMIX_ERR_NOT_FOUND; } pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err, @@ -242,25 +339,45 @@ pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err, int i; pmix_status_t rc = PMIX_SUCCESS; pmix_error_reg_info_t *errreg; + bool sglhdlr = false; - errreg = PMIX_NEW(pmix_error_reg_info_t); - errreg->errhandler = err; - errreg->ninfo = ninfo; + if (0 != *index) { + /* overwrite an existing entry */ + errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, *index); + if (NULL == errreg) { + return PMIX_ERR_NOT_FOUND; + } + errreg->errhandler = err; + PMIX_INFO_FREE(errreg->info, errreg->ninfo); + errreg->ninfo = ninfo; + } else { + errreg = PMIX_NEW(pmix_error_reg_info_t); + errreg->errhandler = err; + errreg->ninfo = ninfo; + *index = pmix_pointer_array_add(&pmix_globals.errregs, errreg); + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_add_errhandler index =%d", *index); + if (*index < 0) { + PMIX_RELEASE(errreg); + return PMIX_ERROR; + } + } + /* sadly, we have to copy the info objects as we cannot + * rely on them to remain in-memory */ if (NULL != info && 0 < ninfo) { PMIX_INFO_CREATE(errreg->info, ninfo); for (i=0; i < ninfo; i++) { + /* if this is a specific, single errhandler, then + * mark it accordingly */ + if (0 == strncmp(info[i].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) { + errreg->sglhdlr = true; + } (void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN); pmix_value_xfer(&errreg->info[i].value, &info[i].value); } } - *index = pmix_pointer_array_add(&pmix_globals.errregs, errreg); - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_add_errhandler index =%d", *index); - if (*index < 0) { - PMIX_RELEASE(errreg); - rc = PMIX_ERROR; - } - return rc; + + return PMIX_SUCCESS; } pmix_status_t pmix_remove_errhandler(int errhandler_ref) diff --git a/opal/mca/pmix/pmix120/pmix/src/util/error.h b/opal/mca/pmix/pmix120/pmix/src/util/error.h index 55f0e1ee8d..1bffc2f87d 100644 --- a/opal/mca/pmix/pmix120/pmix/src/util/error.h +++ b/opal/mca/pmix/pmix120/pmix/src/util/error.h @@ -42,11 +42,19 @@ BEGIN_C_DECLS pmix_errhandler_invoke(e, NULL, 0, NULL, 0); \ } while(0); +/* invoke the error handler that is registered against the given + * status, passing it the provided info on the procs that were + * affected, plus any additional info provided by the server */ PMIX_DECLSPEC void pmix_errhandler_invoke(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo); -PMIX_DECLSPEC pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err, +/* lookup the errhandler registered against the given status. If there + * is none, but an errhandler has been registered against the group + * that this status belongs to, then return that errhandler. If neither + * of those is true, but a general errhandler has been registered, then + * return that errhandler. Otherwise, return NOT_FOUND */ +PMIX_DECLSPEC pmix_status_t pmix_lookup_errhandler(pmix_info_t info[], size_t ninfo, int *index); PMIX_DECLSPEC pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err, diff --git a/opal/mca/pmix/pmix120/pmix/src/util/hash.c b/opal/mca/pmix/pmix120/pmix/src/util/hash.c index 84fc6cda75..b809736853 100644 --- a/opal/mca/pmix/pmix120/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix120/pmix/src/util/hash.c @@ -67,33 +67,28 @@ int pmix_hash_store(pmix_hash_table_t *table, { pmix_proc_data_t *proc_data; uint64_t id; - pmix_kval_t *kv; pmix_output_verbose(10, pmix_globals.debug_output, "HASH:STORE rank %d key %s", rank, kin->key); - if (PMIX_RANK_WILDCARD == rank) { + if (PMIX_RANK_UNDEF == rank) { id = UINT64_MAX; } else { id = (uint64_t)rank; } /* lookup the proc data object for this proc - create - * it if we don't */ + * it if we don't already have it */ if (NULL == (proc_data = lookup_proc(table, id, true))) { return PMIX_ERR_OUT_OF_RESOURCE; } - /* see if we already have this key in the data - means we are updating - * a pre-existing value - */ - kv = lookup_keyval(&proc_data->data, kin->key); - if (NULL != kv) { - pmix_list_remove_item(&proc_data->data, &kv->super); - PMIX_RELEASE(kv); - } - /* store the new value */ + /* add the new value - note that if the user is updating + * a value, the ordering of the stored blobs will cause + * an update to eventually occur. In other words, the + * receiving process will first unpack the "old" data, + * and then unpack the update and overwrite it */ PMIX_RETAIN(kin); pmix_list_append(&proc_data->data, &kin->super); @@ -103,46 +98,131 @@ int pmix_hash_store(pmix_hash_table_t *table, pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank, const char *key, pmix_value_t **kvs) { + pmix_status_t rc = PMIX_SUCCESS; pmix_proc_data_t *proc_data; pmix_kval_t *hv; uint64_t id; - pmix_status_t rc; + char *node; pmix_output_verbose(10, pmix_globals.debug_output, "HASH:FETCH rank %d key %s", rank, (NULL == key) ? "NULL" : key); - /* NULL keys are not supported */ - if (NULL == key) { - return PMIX_ERR_BAD_PARAM; - } - - if (PMIX_RANK_WILDCARD == rank) { - id = UINT64_MAX; + if (PMIX_RANK_UNDEF == rank) { + /* PMIX_RANK_UNDEF should return following statuses + * PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_SUCCESS + * special logic is basing on these statuses on a client and a server */ + rc = pmix_hash_table_get_first_key_uint64(table, &id, + (void**)&proc_data, (void**)&node); + if (PMIX_SUCCESS != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "HASH:FETCH proc data for rank %d not found", + rank); + return PMIX_ERR_PROC_ENTRY_NOT_FOUND; + } } else { + /* specified rank can return following statuses + * PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_ERR_NOT_FOUND | PMIX_SUCCESS + * special logic is basing on these statuses on a client and a server */ id = (uint64_t)rank; } - /* lookup the proc data object for this proc */ - if (NULL == (proc_data = lookup_proc(table, id, false))) { + while (PMIX_SUCCESS == rc) { + proc_data = lookup_proc(table, id, false); + if (NULL == proc_data) { + pmix_output_verbose(10, pmix_globals.debug_output, + "HASH:FETCH proc data for rank %d not found", + rank); + return PMIX_ERR_PROC_ENTRY_NOT_FOUND; + } + + /* if the key is NULL, then the user wants -all- data + * put by the specified rank */ + if (NULL == key) { + /* we will return the data as an array of pmix_info_t + * in the kvs pmix_value_t */ + + } else { + /* find the value from within this proc_data object */ + hv = lookup_keyval(&proc_data->data, key); + if (hv) { + /* create the copy */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + break; + } else if (PMIX_RANK_UNDEF != rank) { + pmix_output_verbose(10, pmix_globals.debug_output, + "HASH:FETCH data for key %s not found", key); + return PMIX_ERR_NOT_FOUND; + } + } + + rc = pmix_hash_table_get_next_key_uint64(table, &id, + (void**)&proc_data, node, (void**)&node); + if (PMIX_SUCCESS != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "HASH:FETCH data for key %s not found", key); + return PMIX_ERR_PROC_ENTRY_NOT_FOUND; + } + } + + return rc; +} + +pmix_status_t pmix_hash_fetch_by_key(pmix_hash_table_t *table, const char *key, + int *rank, pmix_value_t **kvs, void **last) +{ + pmix_status_t rc = PMIX_SUCCESS; + pmix_proc_data_t *proc_data; + pmix_kval_t *hv; + uint64_t id; + char *node; + static const char *key_r = NULL; + + if (key == NULL && (node = *last) == NULL) { + return PMIX_ERR_PROC_ENTRY_NOT_FOUND; + } + + if (key == NULL && key_r == NULL) { + return PMIX_ERR_PROC_ENTRY_NOT_FOUND; + } + + if (key) { + rc = pmix_hash_table_get_first_key_uint64(table, &id, + (void**)&proc_data, (void**)&node); + key_r = key; + } else { + rc = pmix_hash_table_get_next_key_uint64(table, &id, + (void**)&proc_data, node, (void**)&node); + } + + pmix_output_verbose(10, pmix_globals.debug_output, + "HASH:FETCH BY KEY rank %d key %s", + (int)id, key_r); + + if (PMIX_SUCCESS != rc) { pmix_output_verbose(10, pmix_globals.debug_output, - "HASH:FETCH proc data for rank %d not found", - rank); + "HASH:FETCH proc data for key %s not found", + key_r); return PMIX_ERR_PROC_ENTRY_NOT_FOUND; } /* find the value from within this proc_data object */ - if (NULL == (hv = lookup_keyval(&proc_data->data, key))) { - pmix_output_verbose(10, pmix_globals.debug_output, - "HASH:FETCH data for key %s not found", key); + hv = lookup_keyval(&proc_data->data, key_r); + if (hv) { + /* create the copy */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else { return PMIX_ERR_NOT_FOUND; } - /* create the copy */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - return rc; - } + *rank = (int)id; + *last = node; return PMIX_SUCCESS; } @@ -150,6 +230,7 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank, int pmix_hash_remove_data(pmix_hash_table_t *table, int rank, const char *key) { + pmix_status_t rc = PMIX_SUCCESS; pmix_proc_data_t *proc_data; pmix_kval_t *kv; uint64_t id; @@ -157,11 +238,11 @@ int pmix_hash_remove_data(pmix_hash_table_t *table, /* if the rank is wildcard, we want to apply this to * all rank entries */ - if (PMIX_RANK_WILDCARD == rank) { + if (PMIX_RANK_UNDEF == rank) { id = UINT64_MAX; - if (PMIX_SUCCESS == pmix_hash_table_get_first_key_uint64(table, &id, - (void**)&proc_data, - (void**)&node)) { + rc = pmix_hash_table_get_first_key_uint64(table, &id, + (void**)&proc_data, (void**)&node); + while (PMIX_SUCCESS == rc) { if (NULL != proc_data) { if (NULL == key) { PMIX_RELEASE(proc_data); @@ -175,23 +256,8 @@ int pmix_hash_remove_data(pmix_hash_table_t *table, } } } - while (PMIX_SUCCESS == pmix_hash_table_get_next_key_uint64(table, &id, - (void**)&proc_data, - node, (void**)&node)) { - if (NULL != proc_data) { - if (NULL == key) { - PMIX_RELEASE(proc_data); - } else { - PMIX_LIST_FOREACH(kv, &proc_data->data, pmix_kval_t) { - if (0 == strcmp(key, kv->key)) { - pmix_list_remove_item(&proc_data->data, &kv->super); - PMIX_RELEASE(kv); - break; - } - } - } - } - } + rc = pmix_hash_table_get_next_key_uint64(table, &id, + (void**)&proc_data, node, (void**)&node); } } diff --git a/opal/mca/pmix/pmix120/pmix/src/util/hash.h b/opal/mca/pmix/pmix120/pmix/src/util/hash.h index d40728e983..6b225d9095 100644 --- a/opal/mca/pmix/pmix120/pmix/src/util/hash.h +++ b/opal/mca/pmix/pmix120/pmix/src/util/hash.h @@ -27,11 +27,19 @@ BEGIN_C_DECLS pmix_status_t pmix_hash_store(pmix_hash_table_t *table, int rank, pmix_kval_t *kv); -/* Fetch the value for a specified key from within +/* Fetch the value for a specified key and rank from within * the given hash_table */ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, int rank, const char *key, pmix_value_t **kvs); +/* Fetch the value for a specified key from within + * the given hash_table + * It gets the next portion of data from table, where matching key. + * To get the first data from table, function is called with key parameter as string. + * Remaining data from table are obtained by calling function with a null pointer for the key parameter.*/ +pmix_status_t pmix_hash_fetch_by_key(pmix_hash_table_t *table, const char *key, + int *rank, pmix_value_t **kvs, void **last); + /* remove the specified key-value from the given hash_table. * A NULL key will result in removal of all data for the * given rank. A rank of PMIX_RANK_WILDCARD indicates that diff --git a/opal/mca/pmix/pmix120/pmix/src/util/output.c b/opal/mca/pmix/pmix120/pmix/src/util/output.c index d9aeab18df..732068762f 100644 --- a/opal/mca/pmix/pmix120/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix120/pmix/src/util/output.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #ifdef HAVE_SYSLOG_H @@ -468,6 +469,56 @@ void pmix_output_set_output_file_info(const char *dir, } } +void pmix_output_hexdump(int verbose_level, int output_id, + void *ptr, int buflen) +{ + unsigned char *buf = (unsigned char *) ptr; + char out_buf[120]; + int ret = 0; + int out_pos = 0; + int i, j; + + if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && + info[output_id].ldi_verbose_level >= verbose_level) { + pmix_output_verbose(verbose_level, output_id, "dump data at %p %d bytes\n", ptr, buflen); + for (i = 0; i < buflen; i += 16) { + out_pos = 0; + ret = sprintf(out_buf + out_pos, "%06x: ", i); + if (ret < 0) + return; + out_pos += ret; + for (j = 0; j < 16; j++) { + if (i + j < buflen) + ret = sprintf(out_buf + out_pos, "%02x ", + buf[i + j]); + else + ret = sprintf(out_buf + out_pos, " "); + if (ret < 0) + return; + out_pos += ret; + } + ret = sprintf(out_buf + out_pos, " "); + if (ret < 0) + return; + out_pos += ret; + for (j = 0; j < 16; j++) + if (i + j < buflen) { + ret = sprintf(out_buf + out_pos, "%c", + isprint(buf[i+j]) ? + buf[i + j] : + '.'); + if (ret < 0) + return; + out_pos += ret; + } + ret = sprintf(out_buf + out_pos, "\n"); + if (ret < 0) + return; + pmix_output_verbose(verbose_level, output_id, "%s", out_buf); + } + } +} + /* * Shut down the output stream system diff --git a/opal/mca/pmix/pmix120/pmix/src/util/output.h b/opal/mca/pmix/pmix120/pmix/src/util/output.h index 5adfa64d44..610e244a72 100644 --- a/opal/mca/pmix/pmix120/pmix/src/util/output.h +++ b/opal/mca/pmix/pmix120/pmix/src/util/output.h @@ -507,6 +507,12 @@ struct pmix_output_stream_t { char **olddir, char **oldprefix); + /** + * Same as pmix_output_verbose(), but pointer to buffer and size. + */ + PMIX_DECLSPEC void pmix_output_hexdump(int verbose_level, int output_id, + void *ptr, int buflen); + #if PMIX_ENABLE_DEBUG /** * Main macro for use in sending debugging output to output streams; diff --git a/opal/mca/pmix/pmix120/pmix120_server_north.c b/opal/mca/pmix/pmix120/pmix120_server_north.c index b484ab7da4..b13e51265c 100644 --- a/opal/mca/pmix/pmix120/pmix120_server_north.c +++ b/opal/mca/pmix/pmix120/pmix120_server_north.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. diff --git a/opal/mca/pmix/pmix120/pmix_pmix120.c b/opal/mca/pmix/pmix120/pmix_pmix120.c index 5bee6ff683..aad5125a26 100644 --- a/opal/mca/pmix/pmix120/pmix_pmix120.c +++ b/opal/mca/pmix/pmix120/pmix_pmix120.c @@ -211,6 +211,11 @@ static void reg_thread(int sd, short args, void *cbdata) int rc; opal_pmix120_etracker_t *trk; + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s register complete with status %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + cd->status); + /* convert the status */ rc = pmix120_convert_rc(cd->status); @@ -251,6 +256,11 @@ static void pmix120_register_errhandler(opal_list_t *info, size_t n; opal_value_t *ival; + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s REGISTER ERRHDNLR INFO %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == info) ? "NULL" : "NOT-NULL"); + /* setup a caddy for the operation so we can free * the array when done */ cd = OBJ_NEW(pmix120_opcaddy_t); @@ -266,7 +276,8 @@ static void pmix120_register_errhandler(opal_list_t *info, n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(cd->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix120_value_load(&cd->info[n].value, ival); + cd->info[n].value.type = PMIX_INT; + cd->info[n].value.data.status = pmix120_convert_opalrc(ival->data.integer); } } } diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 659b7bbb53..2eade6204b 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -248,8 +248,14 @@ opal_err2str(int errnum, const char **errmsg) case OPAL_ERR_SERVER_NOT_AVAIL: retval = "Server not available"; break; + case OPAL_ERR_IN_PROCESS: + retval = "Operation in process"; + break; + case OPAL_ERR_DEBUGGER_RELEASE: + retval = "Release debugger"; + break; default: - retval = NULL; + retval = "UNRECOGNIZED"; } *errmsg = retval; diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 2da80fce65..578f229603 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -540,6 +540,7 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; sig->signature[0].vpid = ORTE_VPID_WILDCARD; + sig->sz = 1; if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buffer))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); @@ -693,9 +694,6 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) } cleanup: - /* need to init_after_spawn for debuggers */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS); - /* cleanup */ OBJ_RELEASE(caddy); } diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 6b4ea0923b..10ff50d8c5 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -110,8 +110,8 @@ BEGIN_C_DECLS /* show help */ #define ORTE_RML_TAG_SHOW_HELP 36 -/* debugger release */ -#define ORTE_RML_TAG_DEBUGGER_RELEASE 37 +/* error notifications */ +#define ORTE_RML_TAG_NOTIFICATION 37 /* bootstrap */ #define ORTE_RML_TAG_BOOTSTRAP 38 diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 780286e3c0..ecd800d4fe 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science @@ -197,6 +197,7 @@ int pmix_server_init(void) ORTE_ERROR_LOG(rc); return rc; } + OBJ_CONSTRUCT(&orte_pmix_server_globals.notifications, opal_list_t); /* setup recv for direct modex requests */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX, @@ -214,6 +215,10 @@ int pmix_server_init(void) orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT, ORTE_RML_PERSISTENT, pmix_server_keyval_client, NULL); + /* setup recv for notifications */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION, + ORTE_RML_PERSISTENT, pmix_server_notify, NULL); + /* ensure the PMIx server uses the proper rendezvous directory */ opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ); @@ -348,12 +353,16 @@ void pmix_server_finalize(void) /* stop receives */ orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX); orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX_RESP); - - /* cleanup collectives */ - OBJ_DESTRUCT(&orte_pmix_server_globals.reqs); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION); /* shutdown the local server */ opal_pmix.server_finalize(); + + /* cleanup collectives */ + OBJ_DESTRUCT(&orte_pmix_server_globals.reqs); + OPAL_LIST_DESTRUCT(&orte_pmix_server_globals.notifications); } static void send_error(int status, opal_process_name_t *idreq, @@ -634,6 +643,7 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender, static void opcon(orte_pmix_server_op_caddy_t *p) { p->procs = NULL; + p->eprocs = NULL; p->info = NULL; p->cbdata = NULL; } diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 1a63094fe5..65edab73ab 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -111,24 +111,204 @@ int pmix_server_abort_fn(opal_process_name_t *proc, void *server_object, return OPAL_SUCCESS; } +static void _register_events(int sd, short args, void *cbdata) +{ + orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; + opal_value_t *info; + + /* the OPAL layer "owns" the list, but let's deconstruct it + * here so we don't have to duplicate the data */ + while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) { + /* don't worry about duplication as the underlying host + * server is already protecting us from it */ + opal_list_append(&orte_pmix_server_globals.notifications, &info->super); + } + + if (NULL != cd->cbfunc) { + cd->cbfunc(ORTE_SUCCESS, cd->cbdata); + } + OBJ_RELEASE(cd); +} + +/* hook for the local PMIX server to pass event registrations + * up to us - we will assume the responsibility for providing + * notifications for registered events */ int pmix_server_register_events_fn(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - /* for now, just execute the cbfunc */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, cbdata); - } - return OPAL_SUCCESS; + /* need to thread-shift this request as we are going + * to access our global list of registered events */ + ORTE_PMIX_OPERATION(NULL, info, _register_events, cbfunc, cbdata); + return ORTE_SUCCESS; } +static void _deregister_events(int sd, short args, void *cbdata) +{ + orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; + opal_value_t *info, *iptr, *nptr; + + /* the OPAL layer "owns" the list, but let's deconstruct it + * here for consistency */ + while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) { + /* search for matching requests */ + OPAL_LIST_FOREACH_SAFE(iptr, nptr, &orte_pmix_server_globals.notifications, opal_value_t) { + if (OPAL_EQUAL == opal_dss.compare(iptr, info, OPAL_VALUE)) { + opal_list_remove_item(&orte_pmix_server_globals.notifications, &iptr->super); + OBJ_RELEASE(iptr); + break; + } + } + OBJ_RELEASE(info); + } + + if (NULL != cd->cbfunc) { + cd->cbfunc(ORTE_SUCCESS, cd->cbdata); + } + OBJ_RELEASE(cd); +} +/* hook for the local PMIX server to pass event deregistrations + * up to us */ int pmix_server_deregister_events_fn(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - /* for now, just execute the cbfunc */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, cbdata); - } - return OPAL_SUCCESS; + /* need to thread-shift this request as we are going + * to access our global list of registered events */ + ORTE_PMIX_OPERATION(NULL, info, _deregister_events, cbfunc, cbdata); + return ORTE_SUCCESS; +} + +static void _notify_release(int status, void *cbdata) +{ + orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; + + if (NULL != cd->procs) { + OPAL_LIST_RELEASE(cd->procs); + } + if (NULL != cd->eprocs) { + OPAL_LIST_RELEASE(cd->eprocs); + } + if (NULL != cd->info) { + OPAL_LIST_RELEASE(cd->info); + } + OBJ_RELEASE(cd); +} +void pmix_server_notify(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tg, void *cbdata) +{ + opal_list_t *procs = NULL, *eprocs = NULL, *info = NULL; + int cnt, rc, ret, nprocs, n; + opal_namelist_t *nm; + opal_value_t *val; + orte_pmix_server_op_caddy_t *cd; + + opal_output_verbose(2, orte_pmix_server_globals.output, + "%s Notification received", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + + /* unpack the status */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* unpack the target procs that are to be notified */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* if any were provided, add them to the list */ + if (0 < nprocs) { + procs = OBJ_NEW(opal_list_t); + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(procs, &nm->super); + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nm->name, &cnt, OPAL_NAME))) { + ORTE_ERROR_LOG(rc); + OPAL_LIST_RELEASE(procs); + return; + } + } + } + + /* unpack the procs that were impacted by the error */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + if (NULL != procs) { + OPAL_LIST_RELEASE(procs); + } + return; + } + + /* if any were provided, add them to the list */ + if (0 < nprocs) { + eprocs = OBJ_NEW(opal_list_t); + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(eprocs, &nm->super); + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nm->name, &cnt, OPAL_NAME))) { + ORTE_ERROR_LOG(rc); + if (NULL != procs) { + OPAL_LIST_RELEASE(procs); + } + OPAL_LIST_RELEASE(eprocs); + return; + } + } + } + + /* unpack the infos that were provided */ + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nprocs, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + if (NULL != procs) { + OPAL_LIST_RELEASE(procs); + } + return; + } + + /* if any were provided, add them to the list */ + if (0 < nprocs) { + info = OBJ_NEW(opal_list_t); + for (n=0; n < nprocs; n++) { + val = OBJ_NEW(opal_value_t); + opal_list_append(info, &val->super); + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &val, &cnt, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + if (NULL != procs) { + OPAL_LIST_RELEASE(procs); + } + if (NULL != eprocs) { + OPAL_LIST_RELEASE(eprocs); + } + OPAL_LIST_RELEASE(info); + return; + } + } + } + + cd = OBJ_NEW(orte_pmix_server_op_caddy_t); + cd->procs = procs; + cd->eprocs = eprocs; + cd->info = info; + + if (OPAL_SUCCESS != (rc = opal_pmix.server_notify_error(ret, procs, eprocs, info, _notify_release, cd))) { + ORTE_ERROR_LOG(rc); + if (NULL != procs) { + OPAL_LIST_RELEASE(procs); + } + if (NULL != eprocs) { + OPAL_LIST_RELEASE(eprocs); + } + if (NULL != info) { + OPAL_LIST_RELEASE(info); + } + OBJ_RELEASE(cd); + } } diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 62d45dc7e3..c93d3624f4 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -75,6 +75,7 @@ typedef struct { opal_object_t super; opal_event_t ev; opal_list_t *procs; + opal_list_t *eprocs; opal_list_t *info; opal_pmix_op_cbfunc_t cbfunc; void *cbdata; @@ -175,6 +176,10 @@ extern void pmix_server_keyval_client(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tg, void *cbdata); +extern void pmix_server_notify(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tg, void *cbdata); + /* exposed shared variables */ typedef struct { bool initialized; @@ -186,6 +191,7 @@ typedef struct { char *server_uri; bool wait_for_server; orte_process_name_t server; + opal_list_t notifications; } pmix_server_globals_t; extern pmix_server_globals_t orte_pmix_server_globals; diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index d8c97d399b..2ca2292306 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -2243,7 +2243,7 @@ static void run_debugger(char *basename, opal_cmd_line_t *cmd_line, * - fills in the table MPIR_proctable, and sets MPIR_proctable_size * - sets MPIR_debug_state to MPIR_DEBUG_SPAWNED ( = 1) * - calls MPIR_Breakpoint() which the debugger will have a - * breakpoint on. + * breakpoint on. * * b) Applications start and then spin until MPIR_debug_gate is set * non-zero by the debugger. @@ -2382,8 +2382,8 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata) return; } strncpy(MPIR_attach_fifo, attach_fifo, MPIR_MAX_PATH_LENGTH - 1); - free(attach_fifo); - open_fifo(); + free(attach_fifo); + open_fifo(); } return; } @@ -2511,6 +2511,58 @@ static void setup_debugger_job(void) static bool mpir_breakpoint_fired = false; +static void _send_notification(void) +{ + opal_buffer_t buf; + int status = OPAL_ERR_DEBUGGER_RELEASE; + orte_grpcomm_signature_t sig; + int rc; + + OBJ_CONSTRUCT(&buf, opal_buffer_t); + + /* pack the debugger_attached status */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + return; + } + status = 0; + + /* notify all procs */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + return; + } + + /* all procs are impacted */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + return; + } + + /* no further info to provide */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + return; + } + + /* xcast it to everyone */ + OBJ_CONSTRUCT(&sig, orte_grpcomm_signature_t); + sig.signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); + sig.signature[0].jobid = ORTE_PROC_MY_NAME->jobid; + sig.signature[0].vpid = ORTE_VPID_WILDCARD; + sig.sz = 1; + + if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(&sig, ORTE_RML_TAG_NOTIFICATION, &buf))) { + ORTE_ERROR_LOG(rc); + } + OBJ_DESTRUCT(&sig); + OBJ_DESTRUCT(&buf); +} + /* * Initialization of data structures for running under a debugger * using the MPICH/TotalView parallel debugger interface. This stage @@ -2527,8 +2579,6 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata) orte_proc_t *proc; orte_app_context_t *appctx; orte_vpid_t i, j; - opal_buffer_t *buf; - int rc, k; char **aliases, *aptr; /* if we couldn't get thru the mapper stage, we might @@ -2548,31 +2598,8 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata) /* trigger the debugger */ MPIR_Breakpoint(); - /* send a message to rank=0 of any app jobs to release it */ - for (k=1; k < orte_job_data->size; k++) { - if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, k))) { - continue; - } - if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { - /* ignore debugger jobs */ - continue; - } - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 0)) || - ORTE_PROC_STATE_UNTERMINATED < proc->state || - NULL == proc->rml_uri) { - /* proc is already dead or never registered with us (so we don't have - * contact info for him) - */ - continue; - } - buf = OBJ_NEW(opal_buffer_t); /* don't need anything in this */ - if (0 > (rc = orte_rml.send_buffer_nb(&proc->name, buf, - ORTE_RML_TAG_DEBUGGER_RELEASE, - orte_rml_send_callback, NULL))) { - opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc)); - OBJ_RELEASE(buf); - } - } + /* notify all procs that the debugger is ready */ + _send_notification(); } return; } @@ -2665,35 +2692,8 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata) /* trigger the debugger */ MPIR_Breakpoint(); - /* send a message to rank=0 of any app jobs to release it */ - for (k=1; k < orte_job_data->size; k++) { - if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, k))) { - continue; - } - if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { - /* ignore debugger jobs */ - continue; - } - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 0)) || - ORTE_PROC_STATE_UNTERMINATED < proc->state || - NULL == proc->rml_uri) { - /* proc is already dead or never registered with us (so we don't have - * contact info for him) - */ - continue; - } - opal_output_verbose(2, orte_debug_output, - "%s sending debugger release to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name)); - buf = OBJ_NEW(opal_buffer_t); /* don't need anything in this */ - if (0 > (rc = orte_rml.send_buffer_nb(&proc->name, buf, - ORTE_RML_TAG_DEBUGGER_RELEASE, - orte_rml_send_callback, NULL))) { - opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc)); - OBJ_RELEASE(buf); - } - } + /* notify all procs that the debugger is ready */ + _send_notification(); } else { /* if I am launching debugger daemons, then I need to do so now * that the job has been started and I know which nodes have @@ -2727,14 +2727,14 @@ static void orte_debugger_detached(int fd, short event, void *cbdata) static void open_fifo (void) { if (attach_fd > 0) { - close(attach_fd); + close(attach_fd); } attach_fd = open(MPIR_attach_fifo, O_RDONLY | O_NONBLOCK, 0); if (attach_fd < 0) { - opal_output(0, "%s unable to open debugger attach fifo", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return; + opal_output(0, "%s unable to open debugger attach fifo", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + return; } /* Set this fd to be close-on-exec so that children don't see it */ @@ -2747,9 +2747,9 @@ static void open_fifo (void) } opal_output_verbose(2, orte_debug_output, - "%s Monitoring debugger attach fifo %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - MPIR_attach_fifo); + "%s Monitoring debugger attach fifo %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + MPIR_attach_fifo); attach = (opal_event_t*)malloc(sizeof(opal_event_t)); opal_event_set(orte_event_base, attach, attach_fd, OPAL_EV_READ, attach_debugger, attach); @@ -2766,16 +2766,16 @@ static void attach_debugger(int fd, short event, void *arg) if (fifo_active) { attach = (opal_event_t*)arg; - fifo_active = false; + fifo_active = false; rc = read(attach_fd, &fifo_cmd, sizeof(fifo_cmd)); - if (!rc) { + if (!rc) { /* release the current event */ opal_event_free(attach); - /* reopen device to clear hangup */ - open_fifo(); - return; - } + /* reopen device to clear hangup */ + open_fifo(); + return; + } if (1 != fifo_cmd) { /* ignore the cmd */ fifo_active = true; @@ -2805,7 +2805,7 @@ static void attach_debugger(int fd, short event, void *arg) * data is already available, so we only need to * check to see if we should spawn any daemons */ - if ('\0' != MPIR_executable_path[0] || NULL != orte_debugger_test_daemon) { + if ('\0' != MPIR_executable_path[0] || NULL != orte_debugger_test_daemon) { opal_output_verbose(2, orte_debug_output, "%s Spawning debugger daemons %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),