From 0434b615b524b3e0139df13aa8c3cd7f258b2038 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 23 Feb 2018 09:57:19 -0800 Subject: [PATCH 1/4] Update ORTE to support PMIx v3 This is a point-in-time update that includes support for several new PMIx features, mostly focused on debuggers and "instant on": * initial prototype support for PMIx-based debuggers. For the moment, this is restricted to using the DVM. Supports direct launch of apps under debugger control, and indirect launch using prun as the intermediate launcher. Includes ability for debuggers to control the environment of both the launcher and the spawned app procs. Work continues on completing support for indirect launch * IO forwarding for tools. Output of apps launched under tool control is directed to the tool and output there - includes support for XML formatting and output to files. Stdin can be forwarded from the tool to apps, but this hasn't been implemented in ORTE yet. * Fabric integration for "instant on". Enable collection of network "blobs" to be delivered to network libraries on compute nodes prior to local proc spawn. Infrastructure is in place - implementation will come later. * Harvesting and forwarding of envars. Enable network plugins to harvest envars and include them in the launch msg for setting the environment prior to local proc spawn. Currently, only OmniPath is supported. PMIx MCA params control which envars are included, and also allows envars to be excluded. Signed-off-by: Ralph Castain --- opal/dss/dss_compare.c | 48 +- opal/dss/dss_copy.c | 34 +- opal/dss/dss_internal.h | 12 +- opal/dss/dss_load_unload.c | 2 +- opal/dss/dss_open_close.c | 33 +- opal/dss/dss_pack.c | 28 +- opal/dss/dss_print.c | 31 +- opal/dss/dss_types.h | 15 +- opal/dss/dss_unpack.c | 39 +- opal/include/opal/constants.h | 5 +- opal/mca/pmix/pmix.h | 31 + opal/mca/pmix/pmix3x/pmix/VERSION | 4 +- opal/mca/pmix/pmix3x/pmix/config/pmix.m4 | 2 +- .../pmix3x/pmix/contrib/pmix-valgrind.supp | 2 +- .../mca/pmix/pmix3x/pmix/examples/debuggerd.c | 10 +- opal/mca/pmix/pmix3x/pmix/include/pmix.h | 113 ++- .../pmix/pmix3x/pmix/include/pmix_common.h.in | 188 +++- .../pmix/pmix3x/pmix/include/pmix_server.h | 106 ++- opal/mca/pmix/pmix3x/pmix/include/pmix_tool.h | 1 + .../pmix/pmix3x/pmix/src/class/pmix_hotel.c | 9 +- .../pmix/pmix3x/pmix/src/class/pmix_hotel.h | 20 +- .../pmix/pmix3x/pmix/src/client/pmix_client.c | 59 +- .../pmix3x/pmix/src/client/pmix_client_ops.h | 9 +- .../pmix3x/pmix/src/common/Makefile.include | 8 +- .../pmix/pmix3x/pmix/src/common/pmix_iof.c | 863 ++++++++++++++++++ .../pmix/pmix3x/pmix/src/common/pmix_iof.h | 197 ++++ .../pmix3x/pmix/src/common/pmix_strings.c | 35 + .../pmix/src/event/pmix_event_registration.c | 4 +- .../pmix3x/pmix/src/include/pmix_globals.c | 21 + .../pmix3x/pmix/src/include/pmix_globals.h | 28 +- opal/mca/pmix/pmix3x/pmix/src/include/types.h | 3 + .../pmix3x/pmix/src/mca/bfrops/base/base.h | 17 + .../src/mca/bfrops/base/bfrop_base_copy.c | 58 +- .../pmix/src/mca/bfrops/base/bfrop_base_fns.c | 490 +++------- .../src/mca/bfrops/base/bfrop_base_pack.c | 44 +- .../src/mca/bfrops/base/bfrop_base_print.c | 67 ++ .../src/mca/bfrops/base/bfrop_base_unpack.c | 58 +- .../pmix/pmix3x/pmix/src/mca/pnet/base/base.h | 4 +- .../pmix/src/mca/pnet/base/pnet_base_fns.c | 86 +- .../pmix/src/mca/pnet/base/pnet_base_frame.c | 2 +- .../pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c | 217 +++-- .../pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h | 13 +- .../src/mca/pnet/opa/pnet_opa_component.c | 81 +- opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h | 53 +- .../pmix/src/mca/psec/native/psec_native.c | 4 +- .../pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h | 1 + .../pmix3x/pmix/src/runtime/pmix_finalize.c | 3 +- .../pmix/pmix3x/pmix/src/runtime/pmix_init.c | 10 +- .../pmix3x/pmix/src/runtime/pmix_params.c | 181 ++-- .../pmix/pmix3x/pmix/src/server/pmix_server.c | 286 +++++- .../pmix3x/pmix/src/server/pmix_server_ops.c | 409 ++++++++- .../pmix3x/pmix/src/server/pmix_server_ops.h | 24 +- .../mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c | 156 +++- opal/mca/pmix/pmix3x/pmix/src/util/fd.c | 36 +- opal/mca/pmix/pmix3x/pmix/src/util/fd.h | 33 +- opal/mca/pmix/pmix3x/pmix3x.c | 33 +- opal/mca/pmix/pmix3x/pmix3x.h | 10 +- opal/mca/pmix/pmix3x/pmix3x_server_north.c | 38 +- opal/mca/pmix/pmix3x/pmix3x_server_south.c | 157 +++- opal/mca/pmix/pmix_server.h | 15 +- opal/mca/pmix/pmix_types.h | 55 +- opal/runtime/opal_init.c | 6 +- opal/runtime/opal_progress.c | 3 +- orte/include/orte/types.h | 3 +- orte/mca/iof/base/iof_base_frame.c | 2 +- orte/mca/iof/hnp/iof_hnp_read.c | 11 +- orte/mca/iof/hnp/iof_hnp_receive.c | 9 +- orte/mca/odls/base/odls_base_default_fns.c | 139 ++- orte/mca/plm/base/base.h | 3 +- orte/mca/plm/base/plm_base_launch_support.c | 48 +- orte/mca/plm/base/plm_base_receive.c | 3 +- orte/mca/plm/base/plm_private.h | 2 +- orte/mca/plm/plm_types.h | 27 +- orte/mca/schizo/alps/Makefile.am | 3 +- orte/mca/schizo/ompi/schizo_ompi.c | 130 ++- orte/mca/state/dvm/state_dvm.c | 2 + orte/mca/state/hnp/state_hnp.c | 4 +- orte/mca/state/novm/state_novm.c | 4 +- orte/orted/orted_comm.c | 2 +- orte/orted/pmix/pmix_server_dyn.c | 37 + .../data_type_support/orte_dt_packing_fns.c | 8 +- .../data_type_support/orte_dt_unpacking_fns.c | 7 +- orte/runtime/orte_globals.c | 5 +- orte/runtime/orte_globals.h | 4 +- orte/tools/prun/prun.c | 175 +++- orte/util/attr.c | 139 ++- orte/util/attr.h | 31 +- orte/util/error_strings.c | 4 +- 88 files changed, 4653 insertions(+), 759 deletions(-) create mode 100644 opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.c create mode 100644 opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h diff --git a/opal/dss/dss_compare.c b/opal/dss/dss_compare.c index 20ae1f0fe7..734306d937 100644 --- a/opal/dss/dss_compare.c +++ b/opal/dss/dss_compare.c @@ -12,7 +12,7 @@ * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -359,6 +359,8 @@ int opal_dss_compare_value(opal_value_t *value1, opal_value_t *value2, opal_data return opal_dss_compare_timeval(&value1->data.tv, &value2->data.tv, type); case OPAL_NAME: return opal_dss_compare_name(&value1->data.name, &value2->data.name, type); + case OPAL_ENVAR: + return opal_dss_compare_envar(&value1->data.envar, &value2->data.envar, type); default: opal_output(0, "COMPARE-OPAL-VALUE: UNSUPPORTED TYPE %d", (int)value1->type); return OPAL_EQUAL; @@ -458,3 +460,47 @@ int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type) return OPAL_EQUAL; } +int opal_dss_compare_envar(opal_envar_t *value1, opal_envar_t *value2, opal_data_type_t type) +{ + int rc; + + if (NULL != value1->envar) { + if (NULL == value2->envar) { + return OPAL_VALUE1_GREATER; + } + rc = strcmp(value1->envar, value2->envar); + if (rc < 0) { + return OPAL_VALUE2_GREATER; + } else if (0 < rc) { + return OPAL_VALUE1_GREATER; + } + } else if (NULL != value2->envar) { + /* we know value1->envar had to be NULL */ + return OPAL_VALUE2_GREATER; + } + + /* if both are NULL or are equal, then check value */ + if (NULL != value1->value) { + if (NULL == value2->value) { + return OPAL_VALUE1_GREATER; + } + rc = strcmp(value1->value, value2->value); + if (rc < 0) { + return OPAL_VALUE2_GREATER; + } else if (0 < rc) { + return OPAL_VALUE1_GREATER; + } + } else if (NULL != value2->value) { + /* we know value1->value had to be NULL */ + return OPAL_VALUE2_GREATER; + } + + /* finally, check separator */ + if (value1->separator < value2->separator) { + return OPAL_VALUE2_GREATER; + } + if (value2->separator < value1->separator) { + return OPAL_VALUE1_GREATER; + } + return OPAL_EQUAL; +} diff --git a/opal/dss/dss_copy.c b/opal/dss/dss_copy.c index a39798bd46..184897d77e 100644 --- a/opal/dss/dss_copy.c +++ b/opal/dss/dss_copy.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -338,6 +338,16 @@ int opal_dss_copy_value(opal_value_t **dest, opal_value_t *src, case OPAL_NAME: memcpy(&p->data.name, &src->data.name, sizeof(opal_process_name_t)); break; + case OPAL_ENVAR: + OBJ_CONSTRUCT(&p->data.envar, opal_envar_t); + if (NULL != src->data.envar.envar) { + p->data.envar.envar = strdup(src->data.envar.envar); + } + if (NULL != src->data.envar.value) { + p->data.envar.value = strdup(src->data.envar.value); + } + p->data.envar.separator = src->data.envar.separator; + break; default: opal_output(0, "COPY-OPAL-VALUE: UNSUPPORTED TYPE %d", (int)src->type); return OPAL_ERROR; @@ -409,3 +419,25 @@ int opal_dss_copy_vpid(opal_vpid_t **dest, opal_vpid_t *src, opal_data_type_t ty return OPAL_SUCCESS; } + +int opal_dss_copy_envar(opal_envar_t **dest, opal_envar_t *src, opal_data_type_t type) +{ + opal_envar_t *val; + + val = OBJ_NEW(opal_envar_t); + if (NULL == val) { + OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + if (NULL != src->envar) { + val->envar = strdup(src->envar); + } + if (NULL != src->value) { + val->value = strdup(src->value); + } + val->separator = src->separator; + *dest = val; + + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_internal.h b/opal/dss/dss_internal.h index a2514379ce..e4360b23f3 100644 --- a/opal/dss/dss_internal.h +++ b/opal/dss/dss_internal.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -334,6 +334,8 @@ int opal_dss_pack_vpid(opal_buffer_t *buffer, const void *src, int opal_dss_pack_status(opal_buffer_t *buffer, const void *src, int32_t num_vals, opal_data_type_t type); +int opal_dss_pack_envar(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type); /* * Internal unpack functions @@ -407,6 +409,9 @@ int opal_dss_unpack_vpid(opal_buffer_t *buffer, void *dest, int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest, int32_t *num_vals, opal_data_type_t type); +int opal_dss_unpack_envar(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type); + /* * Internal copy functions */ @@ -438,6 +443,8 @@ int opal_dss_copy_jobid(opal_jobid_t **dest, opal_jobid_t *src, opal_data_type_t int opal_dss_copy_vpid(opal_vpid_t **dest, opal_vpid_t *src, opal_data_type_t type); +int opal_dss_copy_envar(opal_envar_t **dest, opal_envar_t *src, opal_data_type_t type); + /* * Internal compare functions @@ -503,6 +510,7 @@ int opal_dss_compare_jobid(opal_jobid_t *value1, opal_data_type_t type); int opal_dss_compare_status(int *value1, int *value2, opal_data_type_t type); +int opal_dss_compare_envar(opal_envar_t *value1, opal_envar_t *value2, opal_data_type_t type); /* * Internal print functions @@ -544,6 +552,8 @@ int opal_dss_print_name(char **output, char *prefix, opal_process_name_t *name, int opal_dss_print_jobid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type); int opal_dss_print_vpid(char **output, char *prefix, opal_process_name_t *src, opal_data_type_t type); int opal_dss_print_status(char **output, char *prefix, int *src, opal_data_type_t type); +int opal_dss_print_envar(char **output, char *prefix, + opal_envar_t *src, opal_data_type_t type); /* diff --git a/opal/dss/dss_load_unload.c b/opal/dss/dss_load_unload.c index e84bfc4ccb..0fa02d01c2 100644 --- a/opal/dss/dss_load_unload.c +++ b/opal/dss/dss_load_unload.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ diff --git a/opal/dss/dss_open_close.c b/opal/dss/dss_open_close.c index 63a036851c..1b7085f8bd 100644 --- a/opal/dss/dss_open_close.c +++ b/opal/dss/dss_open_close.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -232,6 +232,26 @@ OBJ_CLASS_INSTANCE(opal_node_stats_t, opal_object_t, opal_node_stats_destruct); +static void opal_envar_construct(opal_envar_t *obj) +{ + obj->envar = NULL; + obj->value = NULL; + obj->separator = '\0'; +} +static void opal_envar_destruct(opal_envar_t *obj) +{ + if (NULL != obj->envar) { + free(obj->envar); + } + if (NULL != obj->value) { + free(obj->value); + } +} +OBJ_CLASS_INSTANCE(opal_envar_t, + opal_list_item_t, + opal_envar_construct, + opal_envar_destruct); + int opal_dss_register_vars (void) { mca_base_var_enum_t *new_enum; @@ -624,6 +644,17 @@ int opal_dss_open(void) "OPAL_STATUS", &tmp))) { return rc; } + + tmp = OPAL_ENVAR; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_envar, + opal_dss_unpack_envar, + (opal_dss_copy_fn_t)opal_dss_copy_envar, + (opal_dss_compare_fn_t)opal_dss_compare_envar, + (opal_dss_print_fn_t)opal_dss_print_envar, + OPAL_DSS_UNSTRUCTURED, + "OPAL_ENVAR", &tmp))) { + return rc; + } /* All done */ opal_dss_initialized = true; diff --git a/opal/dss/dss_pack.c b/opal/dss/dss_pack.c index 87a7573a03..703886856f 100644 --- a/opal/dss/dss_pack.c +++ b/opal/dss/dss_pack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -836,6 +836,11 @@ int opal_dss_pack_value(opal_buffer_t *buffer, const void *src, return ret; } break; + case OPAL_ENVAR: + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &ptr[i]->data.envar, 1, OPAL_ENVAR))) { + return ret; + } + break; default: opal_output(0, "PACK-OPAL-VALUE: UNSUPPORTED TYPE %d FOR KEY %s", (int)ptr[i]->type, ptr[i]->key); return OPAL_ERROR; @@ -981,3 +986,24 @@ int opal_dss_pack_status(opal_buffer_t *buffer, const void *src, return ret; } + +int opal_dss_pack_envar(opal_buffer_t *buffer, const void *src, + int32_t num_vals, opal_data_type_t type) +{ + int ret; + int32_t n; + opal_envar_t *ptr = (opal_envar_t*)src; + + for (n=0; n < num_vals; n++) { + if (OPAL_SUCCESS != (ret = opal_dss_pack_string(buffer, &ptr[n].envar, 1, OPAL_STRING))) { + return ret; + } + if (OPAL_SUCCESS != (ret = opal_dss_pack_string(buffer, &ptr[n].value, 1, OPAL_STRING))) { + return ret; + } + if (OPAL_SUCCESS != (ret = opal_dss_pack_byte(buffer, &ptr[n].separator, 1, OPAL_BYTE))) { + return ret; + } + } + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_print.c b/opal/dss/dss_print.c index 1d1d912b0e..8009c3f2c1 100644 --- a/opal/dss/dss_print.c +++ b/opal/dss/dss_print.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -806,6 +806,13 @@ int opal_dss_print_value(char **output, char *prefix, opal_value_t *src, opal_da case OPAL_PTR: asprintf(output, "%sOPAL_VALUE: Data type: OPAL_PTR\tKey: %s", prefx, src->key); break; + case OPAL_ENVAR: + asprintf(output, "%sOPAL_VALUE: Data type: OPAL_ENVAR\tKey: %s\tName: %s\tValue: %s\tSeparator: %c", + prefx, src->key, + (NULL == src->data.envar.envar) ? "NULL" : src->data.envar.envar, + (NULL == src->data.envar.value) ? "NULL" : src->data.envar.value, + ('\0' == src->data.envar.separator) ? ' ' : src->data.envar.separator); + break; default: asprintf(output, "%sOPAL_VALUE: Data type: UNKNOWN\tKey: %s\tValue: UNPRINTABLE", prefx, src->key); @@ -895,3 +902,25 @@ int opal_dss_print_status(char **output, char *prefix, asprintf(output, "%sData type: OPAL_STATUS\tValue: %s", prefx, opal_strerror(*src)); return OPAL_SUCCESS; } + + +int opal_dss_print_envar(char **output, char *prefix, + opal_envar_t *src, opal_data_type_t type) +{ + char *prefx = " "; + + /* deal with NULL prefix */ + if (NULL != prefix) prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: OPAL_ENVAR\tValue: NULL pointer", prefx); + return OPAL_SUCCESS; + } + + asprintf(output, "%sOPAL_VALUE: Data type: OPAL_ENVAR\tName: %s\tValue: %s\tSeparator: %c", + prefx, (NULL == src->envar) ? "NULL" : src->envar, + (NULL == src->value) ? "NULL" : src->value, + ('\0' == src->separator) ? ' ' : src->separator); + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h index 23d2f08dca..47da99da6c 100644 --- a/opal/dss/dss_types.h +++ b/opal/dss/dss_types.h @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,6 +107,7 @@ typedef struct { #define OPAL_INFO_DIRECTIVES (opal_data_type_t) 36 /**< corresponds to PMIx info directives type (uint32_t) */ #define OPAL_PROC_STATE (opal_data_type_t) 37 /**< corresponds to PMIx proc state type (uint8_t) */ #define OPAL_PROC_INFO (opal_data_type_t) 38 /**< corresponds to PMIx proc_info type */ +#define OPAL_ENVAR (opal_data_type_t) 39 /**< corresponds to PMIx envar type */ /* OPAL Dynamic */ #define OPAL_DSS_ID_DYNAMIC (opal_data_type_t) 100 @@ -131,7 +132,16 @@ typedef struct { opal_status_t exit_code; opal_proc_state_t state; } opal_proc_info_t; -OBJ_CLASS_DECLARATION(opal_proc_info_t); +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_proc_info_t); + +/* defaine a struct for envar directives */ +typedef struct { + opal_list_item_t super; + char *envar; + char *value; + char separator; +} opal_envar_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_envar_t); /* Data value object */ typedef struct { @@ -163,6 +173,7 @@ typedef struct { opal_process_name_t name; opal_proc_info_t pinfo; void *ptr; // never packed or passed anywhere + opal_envar_t envar; } data; } opal_value_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_value_t); diff --git a/opal/dss/dss_unpack.c b/opal/dss/dss_unpack.c index 212851bb94..bb28673d2f 100644 --- a/opal/dss/dss_unpack.c +++ b/opal/dss/dss_unpack.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -1099,6 +1099,11 @@ int opal_dss_unpack_value(opal_buffer_t *buffer, void *dest, return ret; } break; + case OPAL_ENVAR: + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &ptr[i]->data.envar, &m, OPAL_ENVAR))) { + return ret; + } + break; default: opal_output(0, "UNPACK-OPAL-VALUE: UNSUPPORTED TYPE %d FOR KEY %s", (int)ptr[i]->type, ptr[i]->key); return OPAL_ERROR; @@ -1261,3 +1266,35 @@ int opal_dss_unpack_status(opal_buffer_t *buffer, void *dest, return ret; } + + +int opal_dss_unpack_envar(opal_buffer_t *buffer, void *dest, + int32_t *num_vals, opal_data_type_t type) +{ + opal_envar_t *ptr; + int32_t i, n, m; + int ret; + + ptr = (opal_envar_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + m=1; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_string(buffer, &ptr[i].envar, &m, OPAL_STRING))) { + OPAL_ERROR_LOG(ret); + return ret; + } + m=1; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_string(buffer, &ptr[i].value, &m, OPAL_STRING))) { + OPAL_ERROR_LOG(ret); + return ret; + } + m=1; + if (OPAL_SUCCESS != (ret = opal_dss_unpack_byte(buffer, &ptr[i].separator, &m, OPAL_BYTE))) { + OPAL_ERROR_LOG(ret); + return ret; + } + } + + return OPAL_SUCCESS; +} diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index e3e1cd2528..246e964da0 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -97,7 +97,8 @@ enum { OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66), OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67), OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68), - OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69) + OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69), + OPAL_PMIX_LAUNCH_DIRECTIVE = (OPAL_ERR_BASE - 70) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index c9c7c9bb90..3f9fc11dc7 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -738,6 +738,35 @@ typedef int (*opal_pmix_base_module_server_push_io_fn_t)(const opal_process_name opal_pmix_iof_channel_t channel, unsigned char *data, size_t nbytes); +/* define a callback function for the setup_application API. The returned info + * array is owned by the PMIx server library and will be free'd when the + * provided cbfunc is called. */ +typedef void (*opal_pmix_setup_application_cbfunc_t)(int status, + opal_list_t *info, + void *provided_cbdata, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Provide a function by which we can request + * any application-specific environmental variables prior to + * launch of an application. For example, network libraries may + * opt to provide security credentials for the application. This + * is defined as a non-blocking operation in case network + * libraries need to perform some action before responding. The + * returned env will be distributed along with the application */ +typedef int (*opal_pmix_server_setup_application_fn_t)(opal_jobid_t jobid, + opal_list_t *info, + opal_pmix_setup_application_cbfunc_t cbfunc, void *cbdata); + +/* Provide a function by which the local PMIx server can perform + * any application-specific operations prior to spawning local + * clients of a given application. For example, a network library + * might need to setup the local driver for "instant on" addressing. + */ +typedef int (*opal_pmix_server_setup_local_support_fn_t)(opal_jobid_t jobid, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + + /************************************************************ * TOOL APIs * ************************************************************/ @@ -925,6 +954,8 @@ typedef struct { opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; opal_pmix_base_module_server_notify_event_fn_t server_notify_event; opal_pmix_base_module_server_push_io_fn_t server_iof_push; + opal_pmix_server_setup_application_fn_t server_setup_application; + opal_pmix_server_setup_local_support_fn_t server_setup_local_support; /* tool APIs */ opal_pmix_base_module_tool_init_fn_t tool_init; opal_pmix_base_module_tool_fini_fn_t tool_finalize; diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index d166cfb2f6..9832d74033 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitf47bda5 +repo_rev=git36e7e24 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Feb 19, 2018" +date="Feb 09, 2018" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 index 2245e3e396..47f5d94939 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 @@ -626,7 +626,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib PMIX_SEARCH_LIBS_CORE([ceil], [m]) - AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate]) + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get # confused. On others, it's in the standard library, but stubbed with diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/pmix-valgrind.supp b/opal/mca/pmix/pmix3x/pmix/contrib/pmix-valgrind.supp index ba8a28d5c6..5c2fb6ef6e 100644 --- a/opal/mca/pmix/pmix3x/pmix/contrib/pmix-valgrind.supp +++ b/opal/mca/pmix/pmix3x/pmix/contrib/pmix-valgrind.supp @@ -1,6 +1,6 @@ # -*- text -*- # -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix3x/pmix/examples/debuggerd.c b/opal/mca/pmix/pmix3x/pmix/examples/debuggerd.c index 5924dca717..2d21f241e9 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/debuggerd.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/debuggerd.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -41,6 +41,7 @@ typedef struct { } myquery_data_t; +static volatile bool waiting_for_debugger = true; static pmix_proc_t myproc; /* this is a callback function for the PMIx_Query @@ -133,6 +134,11 @@ int main(int argc, char **argv) size_t nq, n; myquery_data_t myquery_data; +fprintf(stderr, "I AM HERE\n"); +fflush(stderr); + sleep(10); + exit(0); + /* init us - since we were launched by the RM, our connection info * will have been provided at startup. */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { @@ -210,7 +216,7 @@ int main(int argc, char **argv) n = 0; fprintf(stderr, "[%s:%u] Hanging around awhile, doing debugger magic\n", myproc.nspace, myproc.rank); while (n < 5) { - usleep(10); + usleep(1000); ++n; } diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix.h b/opal/mca/pmix/pmix3x/pmix/include/pmix.h index cc7ed32f82..98f19bea74 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix.h +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix.h @@ -567,7 +567,6 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm PMIX_INFO_DESTRUCT(&_in); \ } while(0) - /* Request a credential from the PMIx server/SMS. * Input values include: * @@ -632,6 +631,118 @@ PMIX_EXPORT pmix_status_t PMIx_Validate_credential(const pmix_byte_object_t *cre const pmix_info_t info[], size_t ninfo, pmix_validation_cbfunc_t cbfunc, void *cbdata); +/* Define a callback function for delivering forwarded IO to a process + * This function will be called whenever data becomes available, or a + * specified buffering size and/or time has been met. The function + * will be passed the following values: + * + * iofhdlr - the returned registration number of the handler being invoked. + * This is required when deregistering the handler. + * + * channel - a bitmask identifying the channel the data arrived on + * + * source - the nspace/rank of the process that generated the data + * + * payload - pointer to character array containing the data. Note that + * multiple strings may be included, and that the array may + * _not_ be NULL terminated + * + * info - an optional array of info provided by the source containing + * metadata about the payload. This could include PMIX_IOF_COMPLETE + * + * ninfo - number of elements in the optional info array + */ + typedef void (*pmix_iof_cbfunc_t)(size_t iofhdlr, pmix_iof_channel_t channel, + pmix_proc_t *source, char *payload, + pmix_info_t info[], size_t ninfo); + + +/* Register to receive output forwarded from a remote process. + * + * procs - array of identifiers for sources whose IO is being + * requested. Wildcard rank indicates that all procs + * in the specified nspace are included in the request + * + * nprocs - number of identifiers in the procs array + * + * directives - optional array of attributes to control the + * behavior of the request. For example, this + * might include directives on buffering IO + * before delivery, and/or directives to include + * or exclude any backlogged data + * + * ndirs - number of elements in the directives array + * + * channel - bitmask of IO channels included in the request. + * NOTE: STDIN is not supported as it will always + * be delivered to the stdin file descriptor + * + * cbfunc - function to be called when relevant IO is received + * + * regcbfunc - since registration is async, this is the + * function to be called when registration is + * completed. The function itself will return + * a non-success error if the registration cannot + * be submitted - in this case, the regcbfunc + * will _not_ be called. + * + * cbdata - pointer to object to be returned in regcbfunc + */ +PMIX_EXPORT pmix_status_t PMIx_IOF_pull(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t directives[], size_t ndirs, + pmix_iof_channel_t channel, pmix_iof_cbfunc_t cbfunc, + pmix_hdlr_reg_cbfunc_t regcbfunc, void *regcbdata); + +/* Deregister from output forwarded from a remote process. + * + * iofhdlr - the registration number returned from the + * call to PMIx_IOF_pull + * + * directives - optional array of attributes to control the + * behavior of the request. For example, this + * might include directives regarding what to + * do with any data currently in the IO buffer + * for this process + * + * cbfunc - function to be called when deregistration has + * been completed. Note that any IO to be flushed + * may continue to be received after deregistration + * has completed. + * + * cbdata - pointer to object to be returned in cbfunc + */ +PMIX_EXPORT pmix_status_t PMIx_IOF_deregister(size_t iofhdlr, + const pmix_info_t directives[], size_t ndirs, + pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Push data collected locally (typically from stdin) to + * target recipients. + * + * targets - array of process identifiers to which the data is to be delivered. Note + * that a WILDCARD rank indicates that all procs in the given nspace are + * to receive a copy of the data + * + * ntargets - number of procs in the targets array + * + * directives - optional array of attributes to control the + * behavior of the request. For example, this + * might include directives on buffering IO + * before delivery, and/or directives to include + * or exclude any backlogged data + * + * ndirs - number of elements in the directives array + * + * bo - pointer to a byte object containing the stdin data + * + * cbfunc - callback function when the data has been forwarded + * + * cbdata - object to be returned in cbfunc + */ +PMIX_EXPORT pmix_status_t PMIx_IOF_push(const pmix_proc_t targets[], size_t ntargets, + pmix_byte_object_t *bo, + const pmix_info_t directives[], size_t ndirs, + pmix_op_cbfunc_t cbfunc, void *cbdata); + #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in index e4ef065bde..8ec8d8db13 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in @@ -147,6 +147,7 @@ typedef uint32_t pmix_rank_t; #define PMIX_CONNECT_RETRY_DELAY "pmix.tool.retry" // (uint32_t) time in seconds between connection attempts #define PMIX_TOOL_DO_NOT_CONNECT "pmix.tool.nocon" // (bool) the tool wants to use internal PMIx support, but does // not want to connect to a PMIx server + // from the specified processes to this tool /* identification attributes */ #define PMIX_USERID "pmix.euid" // (uint32_t) effective user id @@ -220,7 +221,9 @@ typedef uint32_t pmix_rank_t; #define PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace #define PMIX_PROC_URI "pmix.puri" // (char*) URI containing contact info for proc #define PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs -#define PMIX_PARENT_ID "pmix.parent" // (pmix_proc_t) process identifier of my parent process +#define PMIX_PARENT_ID "pmix.parent" // (pmix_proc_t*) identifier of the process that called PMIx_Spawn + // to launch this proc's application + /* size info */ #define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace @@ -324,7 +327,7 @@ typedef uint32_t pmix_rank_t; #define PMIX_EVENT_WANT_TERMINATION "pmix.evterm" // (bool) indicates that the handler has determined that the application should be terminated -/* attributes used to describe "spawn" attributes */ +/* attributes used to describe "spawn" directives */ #define PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use #define PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs #define PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs @@ -341,10 +344,8 @@ typedef uint32_t pmix_rank_t; #define PMIX_PRELOAD_BIN "pmix.preloadbin" // (bool) preload binaries #define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position #define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init -#define PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin -#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc -#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me -#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me +#define PMIX_STDIN_TGT "pmix.stdin" // (pmix_proc_t) proc that is to receive stdin + // (PMIX_RANK_WILDCARD = all in given nspace) #define PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons #define PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected // job - i.e., not part of the "comm_world" of the job @@ -364,6 +365,11 @@ typedef uint32_t pmix_rank_t; #define PMIX_JOB_CONTINUOUS "pmix.continuous" // (bool) application is continuous, all failed procs should // be immediately restarted #define PMIX_MAX_RESTARTS "pmix.maxrestarts" // (uint32_t) max number of times to restart a job +#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward the stdin from this process to the spawned processes +#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from the spawned processes to this process (typically used by a tool) +#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from the spawned processes to this process (typically used by a tool) +#define PMIX_FWD_STDDIAG "pmix.fwd.stddiag" // (bool) if a diagnostic channel exists, forward any output on it + // from the spawned processes to this process (typically used by a tool) /* connect attributes */ @@ -416,14 +422,24 @@ typedef uint32_t pmix_rank_t; #define PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification #define PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are #define PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release +#define PMIX_DEBUG_JOB_DIRECTIVES "pmix.dbg.jdirs" // (pmix_data_array_t) array of job-level directives +#define PMIX_DEBUG_APP_DIRECTIVES "pmix.dbg.adirs" // (pmix_data_array_t) array of app-level directives /* Resource Manager identification */ #define PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager #define PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string -/* attributes for setting envars */ -#define PMIX_SET_ENVAR "pmix.set.envar" // (char*) string "key=value" value shall be put into the environment -#define PMIX_UNSET_ENVAR "pmix.unset.envar" // (char*) unset envar specified in string +/* environmental variable operation attributes */ +#define PMIX_SET_ENVAR "pmix.envar.set" // (pmix_envar_t*) set the envar to the given value, + // overwriting any pre-existing one +#define PMIX_ADD_ENVAR "pmix.envar.add" // (pmix_envar_t*) add envar, but do not overwrite any existing one +#define PMIX_UNSET_ENVAR "pmix.envar.unset" // (char*) unset the envar, if present +#define PMIX_PREPEND_ENVAR "pmix.envar.prepnd" // (pmix_envar_t*) prepend the given value to the + // specified envar using the separator + // character, creating the envar if it doesn't already exist +#define PMIX_APPEND_ENVAR "pmix.envar.appnd" // (pmix_envar_t*) append the given value to the specified + // envar using the separator character, + // creating the envar if it doesn't already exist /* attributes relating to allocations */ #define PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request @@ -495,12 +511,38 @@ typedef uint32_t pmix_rank_t; // generating the event /* security attributes */ -#define PMIX_CRED_TYPE "pmix.sec.ctype" // when passed in PMIx_Get_credential, a prioritized, +#define PMIX_CRED_TYPE "pmix.sec.ctype" // (char*) when passed in PMIx_Get_credential, a prioritized, // comma-delimited list of desired credential types for use // in environments where multiple authentication mechanisms // may be available. When returned in a callback function, a // string identifier of the credential type +/* IO Forwarding Attributes */ +#define PMIX_IOF_CACHE_SIZE "pmix.iof.csize" // (uint32_t) requested size of the server cache in bytes for each specified channel. + // By default, the server is allowed (but not required) to drop + // all bytes received beyond the max size +#define PMIX_IOF_DROP_OLDEST "pmix.iof.old" // (bool) in an overflow situation, drop the oldest bytes to make room in the cache +#define PMIX_IOF_DROP_NEWEST "pmix.iof.new" // (bool) in an overflow situation, drop any new bytes received until room becomes + // available in the cache (default) +#define PMIX_IOF_BUFFERING_SIZE "pmix.iof.bsize" // (uint32_t) basically controls grouping of IO on the specified channel(s) to + // avoid being called every time a bit of IO arrives. The library + // will execute the callback whenever the specified number of bytes + // becomes available. Any remaining buffered data will be "flushed" + // upon call to deregister the respective channel +#define PMIX_IOF_BUFFERING_TIME "pmix.iof.btime" // (uint32_t) max time in seconds to buffer IO before delivering it. Used in conjunction + // with buffering size, this prevents IO from being held indefinitely + // while waiting for another payload to arrive +#define PMIX_IOF_COMPLETE "pmix.iof.cmp" // (bool) indicates whether or not the specified IO channel has been closed + // by the source +#define PMIX_IOF_PUSH_STDIN "pmix.iof.stdin" // (bool) Used by a tool to request that the PMIx library collect + // the tool's stdin and forward it to the procs specified in + // the PMIx_IOF_push call + +/* Attributes for controlling contents of application setup data */ +#define PMIX_SETUP_APP_ENVARS "pmix.setup.env" // (bool) harvest and include relevant envars +#define PMIX_SETUP_APP_NONENVARS "pmix.setup.nenv" // (bool) include all non-envar data +#define PMIX_SETUP_APP_ALL "pmix.setup.all" // (bool) include all relevant data + /**** PROCESS STATE DEFINITIONS ****/ typedef uint8_t pmix_proc_state_t; @@ -638,6 +680,7 @@ typedef int pmix_status_t; #define PMIX_GDS_ACTION_COMPLETE (PMIX_ERR_OP_BASE - 18) #define PMIX_PROC_HAS_CONNECTED (PMIX_ERR_OP_BASE - 19) #define PMIX_CONNECT_REQUESTED (PMIX_ERR_OP_BASE - 20) +#define PMIX_LAUNCH_DIRECTIVE (PMIX_ERR_OP_BASE - 21) /* define a starting point for system error constants so * we avoid renumbering when making additions */ @@ -720,6 +763,9 @@ typedef uint16_t pmix_data_type_t; #define PMIX_ALLOC_DIRECTIVE 43 /**** DEPRECATED ****/ #define PMIX_INFO_ARRAY 44 +/**** ****/ +#define PMIX_IOF_CHANNEL 45 +#define PMIX_ENVAR 46 /********************/ /* define a boundary for implementers so they can add their own data types */ @@ -786,11 +832,32 @@ typedef uint8_t pmix_alloc_directive_t; #define PMIX_ALLOC_EXTERNAL 128 +/* define a set of bit-mask flags for specifying IO + * forwarding channels. These can be OR'd together + * to reference multiple channels */ +typedef uint16_t pmix_iof_channel_t; +#define PMIX_FWD_NO_CHANNELS 0x0000 +#define PMIX_FWD_STDIN_CHANNEL 0x0001 +#define PMIX_FWD_STDOUT_CHANNEL 0x0002 +#define PMIX_FWD_STDERR_CHANNEL 0x0004 +#define PMIX_FWD_STDDIAG_CHANNEL 0x0008 +#define PMIX_FWD_ALL_CHANNELS 0x00ff + + /**** PMIX BYTE OBJECT ****/ typedef struct pmix_byte_object { char *bytes; size_t size; } pmix_byte_object_t; + +#define PMIX_BYTE_OBJECT_CREATE(m, n) \ + do { \ + (m) = (pmix_byte_object_t*)malloc((n) * sizeof(pmix_byte_object_t)); \ + if (NULL != (m)) { \ + memset((m), 0, (n)*sizeof(pmix_byte_object_t)); \ + } \ + } while(0) + #define PMIX_BYTE_OBJECT_CONSTRUCT(m) \ do { \ (m)->bytes = NULL; \ @@ -824,6 +891,62 @@ typedef struct pmix_byte_object { } while(0) +/**** PMIX ENVAR STRUCT ****/ +/* Provide a structure for specifying environment variable modifications + * Standard environment variables (e.g., PATH, LD_LIBRARY_PATH, and LD_PRELOAD) + * take multiple arguments separated by delimiters. Unfortunately, the delimiters + * depend upon the variable itself - some use semi-colons, some colons, etc. Thus, + * the operation requires not only the name of the variable to be modified and + * the value to be inserted, but also the separator to be used when composing + * the aggregate value + */ +typedef struct { + char *envar; + char *value; + char separator; +} pmix_envar_t; + +#define PMIX_ENVAR_CREATE(m, n) \ + do { \ + (m) = (pmix_envar_t*)calloc((n) , sizeof(pmix_envar_t)); \ + } while (0) +#define PMIX_ENVAR_FREE(m, n) \ + do { \ + size_t _k; \ + if (NULL != (m)) { \ + for (_k=0; _k < (n); _k++) { \ + PMIX_ENVAR_DESTRUCT(&(m)[_k]); \ + } \ + free((m)); \ + } \ + } while (0) +#define PMIX_ENVAR_CONSTRUCT(m) \ + do { \ + (m)->envar = NULL; \ + (m)->value = NULL; \ + (m)->separator = '\0'; \ + } while(0) +#define PMIX_ENVAR_DESTRUCT(m) \ + do { \ + if (NULL != (m)->envar) { \ + free((m)->envar); \ + } \ + if (NULL != (m)->value) { \ + free((m)->value); \ + } \ + } while(0) +#define PMIX_ENVAR_LOAD(m, e, v, s) \ + do { \ + if (NULL != (e)) { \ + (m)->envar = strdup(e); \ + } \ + if (NULL != (v)) { \ + (m)->value = strdup(v); \ + } \ + (m)->separator = (s); \ + } while(0) + + /**** PMIX DATA BUFFER ****/ typedef struct pmix_data_buffer { /** Start of my memory */ @@ -940,9 +1063,9 @@ typedef struct pmix_proc_info { (m) = (pmix_proc_info_t*)calloc((n) , sizeof(pmix_proc_info_t)); \ } while (0) -#define PMIX_PROC_INFO_RELEASE(m) \ - do { \ - PMIX_PROC_INFO_FREE((m)); \ +#define PMIX_PROC_INFO_RELEASE(m) \ + do { \ + PMIX_PROC_INFO_FREE((m), 1); \ } while (0) #define PMIX_PROC_INFO_CONSTRUCT(m) \ @@ -980,6 +1103,17 @@ typedef struct pmix_data_array { size_t size; void *array; } pmix_data_array_t; +#define PMIX_DATA_ARRAY_CONSTRUCT(m, n, t) \ + do { \ + (m)->type = (t); \ + (m)->size = (n); \ + } while(0) +#define PMIX_DATA_ARRAY_CREATE(m, n, t) \ + do { \ + (m) = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); \ + PMIX_DATA_ARRAY_CONSTRUCT((m), (n), (t)); \ + } while(0) + typedef struct pmix_info_array { size_t size; @@ -1026,6 +1160,7 @@ typedef struct pmix_value { pmix_data_array_t *darray; void *ptr; pmix_alloc_directive_t adir; + pmix_envar_t envar; /**** DEPRECATED ****/ pmix_info_array_t *array; /********************/ @@ -1136,6 +1271,8 @@ typedef struct pmix_value { } \ free(_p); \ /********************/ \ + } else if (PMIX_ENVAR == (m)->type) { \ + PMIX_ENVAR_DESTRUCT(&(m)->data.envar); \ } \ } while (0) @@ -1629,14 +1766,19 @@ typedef void (*pmix_notification_fn_t)(size_t evhdlr_registration_id, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata); -/* define a callback function for calls to PMIx_Register_evhdlr. The - * status indicates if the request was successful or not, evhdlr_ref is - * an integer reference assigned to the event handler by PMIx, this reference - * must be used to deregister the err handler. A ptr to the original - * cbdata is returned. */ -typedef void (*pmix_evhdlr_reg_cbfunc_t)(pmix_status_t status, - size_t evhdlr_ref, - void *cbdata); +/* define a callback function for calls to register handlers, e.g., event + * notification and IOF requests + * + * status - PMIX_SUCCESS or an appropriate error constant + * + * refid - reference identifier assigned to the handler by PMIx, + * used to deregister the handler + * + * cbdata - object provided to the registration call + */ +typedef void (*pmix_hdlr_reg_cbfunc_t)(pmix_status_t status, + size_t refid, + void *cbdata); /* define a callback function for calls to PMIx_Get_nb. The status * indicates if the requested data was found or not - a pointer to the @@ -1761,7 +1903,7 @@ typedef void (*pmix_validation_cbfunc_t)(pmix_status_t status, PMIX_EXPORT void PMIx_Register_event_handler(pmix_status_t codes[], size_t ncodes, pmix_info_t info[], size_t ninfo, pmix_notification_fn_t evhdlr, - pmix_evhdlr_reg_cbfunc_t cbfunc, + pmix_hdlr_reg_cbfunc_t cbfunc, void *cbdata); /* Deregister an event handler @@ -1819,6 +1961,7 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, * - pmix_info_directives_t (PMIX_INFO_DIRECTIVES) * - pmix_data_type_t (PMIX_DATA_TYPE) * - pmix_alloc_directive_t (PMIX_ALLOC_DIRECTIVE) + * - pmix_iof_channel_t (PMIX_IOF_CHANNEL) */ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t status); PMIX_EXPORT const char* PMIx_Proc_state_string(pmix_proc_state_t state); @@ -1828,6 +1971,7 @@ PMIX_EXPORT const char* PMIx_Data_range_string(pmix_data_range_t range); PMIX_EXPORT const char* PMIx_Info_directives_string(pmix_info_directives_t directives); PMIX_EXPORT const char* PMIx_Data_type_string(pmix_data_type_t type); PMIX_EXPORT const char* PMIx_Alloc_directive_string(pmix_alloc_directive_t directive); +PMIX_EXPORT const char* PMIx_IOF_channel_string(pmix_iof_channel_t channel); /* Get the PMIx version string. Note that the provided string is * statically defined and must NOT be free'd */ diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h b/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h index 152893ca2a..5cebee6aae 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -414,6 +414,74 @@ typedef pmix_status_t (*pmix_server_validate_cred_fn_t)(const pmix_proc_t *proc, const pmix_info_t directives[], size_t ndirs, pmix_validation_cbfunc_t cbfunc, void *cbdata); +/* Request the specified IO channels be forwarded from the given array of procs. + * The function shall return PMIX_SUCCESS once the host RM accepts the request for + * processing, or a PMIx error code if the request itself isn't correct or supported. + * The callback function shall be called when the request has been processed, + * returning either PMIX_SUCCESS to indicate that IO shall be forwarded as requested, + * or some appropriate error code if the request has been denied. + * + * NOTE: STDIN is not supported in this call! The forwarding of stdin is a "push" + * process - procs cannot request that it be "pulled" from some other source + * + * procs - array of process identifiers whose IO is being requested. + * + * nprocs - size of the procs array + * + * directives - array of key-value attributes further defining the request. This + * might include directives on buffering and security credentials for + * access to protected channels + * + * ndirs - size of the directives array + * + * channels - bitmask identifying the channels to be forwarded + * + * cbfunc - callback function when the IO forwarding has been setup + * + * cbdata - object to be returned in cbfunc + * + * This call serves as a registration with the host RM for the given IO channels from + * the specified procs - the host RM is expected to ensure that this local PMIx server + * is on the distribution list for the channel/proc combination + */ +typedef pmix_status_t (*pmix_server_iof_fn_t)(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t directives[], size_t ndirs, + pmix_iof_channel_t channels, + pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Passes stdin to the host RM for transmission to specified recipients. The host RM is + * responsible for forwarding the data to all PMIx servers that host the specified + * target. + * + * source - pointer to the identifier of the process whose stdin is being provided + * + * targets - array of process identifiers to which the data is to be delivered. Note + * that a WILDCARD rank indicates that all procs in the given nspace are + * to receive a copy of the data + * + * ntargets - number of procs in the targets array + * + * directives - array of key-value attributes further defining the request. This + * might include directives on buffering and security credentials for + * access to protected channels + * + * ndirs - size of the directives array + * + * bo - pointer to a byte object containing the stdin data + * + * cbfunc - callback function when the data has been forwarded + * + * cbdata - object to be returned in cbfunc + * + */ + +typedef pmix_status_t (*pmix_server_stdin_fn_t)(const pmix_proc_t *source, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + const pmix_byte_object_t *bo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + + typedef struct pmix_server_module_2_0_0_t { /* v1x interfaces */ pmix_server_client_connected_fn_t client_connected; @@ -441,9 +509,11 @@ typedef struct pmix_server_module_2_0_0_t { /* v3x interfaces */ pmix_server_get_cred_fn_t get_credential; pmix_server_validate_cred_fn_t validate_credential; + pmix_server_iof_fn_t iof_pull; + pmix_server_stdin_fn_t push_stdin; } pmix_server_module_t; -/**** SERVER SUPPORT INIT/FINALIZE FUNCTIONS ****/ +/**** HOST RM FUNCTIONS FOR INTERFACE TO PMIX SERVER ****/ /* Initialize the server support library, and provide a * pointer to a pmix_server_module_t structure @@ -611,6 +681,38 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_local_support(const char nspace[], pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Provide a function by which the host RM can pass forwarded IO + * to the local PMIx server for distribution to its clients. The + * PMIx server is responsible for determining which of its clients + * have actually registered for the provided data + * + * Parameters include: + * + * source - the process that provided the data being forwarded + * + * channel - the IOF channel (stdin, stdout, etc.) + * + * bo - a byte object containing the data + * + * info - an optional array of metadata describing the data, including + * attributes such as PMIX_IOF_COMPLETE to indicate that the + * source channel has been closed + * + * ninfo - number of elements in the info array + * + * cbfunc - a callback function to be executed once the provided data + * is no longer required. The host RM is required to retain + * the byte object until the callback is executed, or a + * non-success status is returned by the function + * + * cbdata - object pointer to be returned in the callback function + */ +PMIX_EXPORT pmix_status_t PMIx_server_IOF_deliver(const pmix_proc_t *source, + pmix_iof_channel_t channel, + const pmix_byte_object_t *bo, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_tool.h b/opal/mca/pmix/pmix3x/pmix/include/pmix_tool.h index f26445a3c8..ef05389f76 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_tool.h +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_tool.h @@ -98,6 +98,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_init(pmix_proc_t *proc, * operation. */ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void); + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.c b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.c index af1a55968f..ac159bcbb9 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.c +++ b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.c @@ -44,11 +44,10 @@ static void local_eviction_callback(int fd, short flags, void *arg) } -int pmix_hotel_init(pmix_hotel_t *h, int num_rooms, - pmix_event_base_t *evbase, - uint32_t eviction_timeout, - int eviction_event_priority, - pmix_hotel_eviction_callback_fn_t evict_callback_fn) +pmix_status_t pmix_hotel_init(pmix_hotel_t *h, int num_rooms, + pmix_event_base_t *evbase, + uint32_t eviction_timeout, + pmix_hotel_eviction_callback_fn_t evict_callback_fn) { int i; diff --git a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h index 8eb4064610..15b965ce1f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h +++ b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h @@ -142,7 +142,6 @@ PMIX_CLASS_DECLARATION(pmix_hotel_t); * @param evbase Pointer to event base used for eviction timeout * @param eviction_timeout Max length of a stay at the hotel before * the eviction callback is invoked (in microseconds) - * @param eviction_event_priority Event lib priority for the eviction timeout * @param evict_callback_fn Callback function invoked if an occupant * does not check out before the eviction_timeout. * @@ -158,11 +157,10 @@ PMIX_CLASS_DECLARATION(pmix_hotel_t); * @return PMIX_SUCCESS if all initializations were succesful. Otherwise, * the error indicate what went wrong in the function. */ -PMIX_EXPORT int pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms, - pmix_event_base_t *evbase, - uint32_t eviction_timeout, - int eviction_event_priority, - pmix_hotel_eviction_callback_fn_t evict_callback_fn); +PMIX_EXPORT pmix_status_t pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms, + pmix_event_base_t *evbase, + uint32_t eviction_timeout, + pmix_hotel_eviction_callback_fn_t evict_callback_fn); /** * Check in an occupant to the hotel. @@ -184,9 +182,9 @@ PMIX_EXPORT int pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms, * @return PMIX_ERR_TEMP_OUT_OF_RESOURCE is the hotel is full. Try * again later. */ -static inline int pmix_hotel_checkin(pmix_hotel_t *hotel, - void *occupant, - int *room_num) +static inline pmix_status_t pmix_hotel_checkin(pmix_hotel_t *hotel, + void *occupant, + int *room_num) { pmix_hotel_room_t *room; @@ -214,8 +212,8 @@ static inline int pmix_hotel_checkin(pmix_hotel_t *hotel, * caller *knows* that there is a room available. */ static inline void pmix_hotel_checkin_with_res(pmix_hotel_t *hotel, - void *occupant, - int *room_num) + void *occupant, + int *room_num) { pmix_hotel_room_t *room; diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c index abf600ed66..808f52e5c6 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c @@ -75,6 +75,7 @@ static const char pmix_version_string[] = PMIX_VERSION; #include "src/mca/preg/preg.h" #include "src/mca/ptl/base/base.h" #include "src/include/pmix_globals.h" +#include "src/common/pmix_iof.h" #include "pmix_client_ops.h" @@ -342,6 +343,53 @@ static void _check_for_notify(pmix_info_t info[], size_t ninfo) } } +static void client_iof_handler(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_peer_t *peer = (pmix_peer_t*)pr; + pmix_proc_t source; + pmix_iof_channel_t channel; + pmix_byte_object_t bo; + int32_t cnt; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_client_globals.iof_output, + "recvd IOF"); + + /* if the buffer is empty, they are simply closing the channel */ + if (0 == buf->bytes_used) { + return; + } + + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &source, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return; + } + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &channel, &cnt, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return; + } + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &bo, &cnt, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return; + } + if (NULL != bo.bytes && 0 < bo.size) { + if (channel & PMIX_FWD_STDOUT_CHANNEL) { + pmix_iof_write_output(&source, channel, &bo, &pmix_client_globals.iof_stdout.wev); + } else { + pmix_iof_write_output(&source, channel, &bo, &pmix_client_globals.iof_stderr.wev); + } + } + PMIX_BYTE_OBJECT_DESTRUCT(&bo); +} + PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_info_t info[], size_t ninfo) { @@ -358,6 +406,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_lock_t reglock; size_t n; bool found; + pmix_ptl_posted_recv_t *rcv; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -394,6 +443,13 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* setup the IO Forwarding recv */ + rcv = PMIX_NEW(pmix_ptl_posted_recv_t); + rcv->tag = PMIX_PTL_TAG_IOF; + rcv->cbfunc = client_iof_handler; + /* add it to the end of the list of recvs */ + pmix_list_append(&pmix_ptl_globals.posted_recvs, &rcv->super); + /* setup the globals */ PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); @@ -578,7 +634,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, } PMIX_RELEASE_THREAD(&pmix_global_lock); - /* lood for a debugger attach key */ + /* look for a debugger attach key */ (void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); wildcard.rank = PMIX_RANK_WILDCARD; PMIX_INFO_LOAD(&ginfo, PMIX_OPTIONAL, NULL, PMIX_BOOL); @@ -587,6 +643,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* if the value was found, then we need to wait for debugger attach here */ /* register for the debugger release notification */ PMIX_CONSTRUCT_LOCK(®lock); + PMIX_POST_OBJECT(®lock); PMIx_Register_event_handler(&code, 1, NULL, 0, notification_fn, NULL, (void*)®lock); /* wait for it to arrive */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h index 0cfd7d0ade..c84dffd6d6 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,6 +17,7 @@ #include "src/class/pmix_list.h" #include "src/class/pmix_pointer_array.h" #include "src/include/pmix_globals.h" +#include "src/common/pmix_iof.h" BEGIN_C_DECLS @@ -42,9 +43,15 @@ typedef struct { // verbosity for client event operations int event_output; int event_verbose; + // verbosity for client iof operations + int iof_output; + int iof_verbose; // verbosity for basic client functions int base_output; int base_verbose; + /* IOF output sinks */ + pmix_iof_sink_t iof_stdout; + pmix_iof_sink_t iof_stderr; } pmix_client_globals_t; PMIX_EXPORT extern pmix_client_globals_t pmix_client_globals; diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/Makefile.include b/opal/mca/pmix/pmix3x/pmix/src/common/Makefile.include index 5fd0666172..dcd723b956 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/Makefile.include +++ b/opal/mca/pmix/pmix3x/pmix/src/common/Makefile.include @@ -1,6 +1,6 @@ # -*- makefile -*- # -# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # @@ -15,4 +15,8 @@ sources += \ common/pmix_log.c \ common/pmix_control.c \ common/pmix_data.c \ - common/pmix_security.c + common/pmix_security.c \ + common/pmix_iof.c + +headers += \ + common/pmix_iof.h diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.c new file mode 100644 index 0000000000..c0b4199d05 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.c @@ -0,0 +1,863 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "src/threads/threads.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/name_fns.h" +#include "src/util/output.h" +#include "src/mca/bfrops/bfrops.h" +#include "src/mca/ptl/ptl.h" + +#include "src/client/pmix_client_ops.h" +#include "src/server/pmix_server_ops.h" +#include "src/include/pmix_globals.h" + +static void msgcbfunc(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; + int32_t m; + pmix_status_t rc, status; + + /* unpack the return status */ + m=1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &status, &m, PMIX_STATUS); + if (PMIX_SUCCESS == rc && PMIX_SUCCESS == status) { + /* store the request on our list - we are in an event, and + * so this is safe */ + pmix_list_append(&pmix_globals.iof_requests, &cd->iofreq->super); + } else if (PMIX_SUCCESS != rc) { + status = rc; + PMIX_RELEASE(cd->iofreq); + } + + pmix_output_verbose(2, pmix_client_globals.iof_output, + "pmix:iof_register returned status %s", PMIx_Error_string(status)); + + if (NULL != cd->cbfunc.opcbfn) { + cd->cbfunc.opcbfn(status, cd->cbdata); + } + PMIX_RELEASE(cd); +} + +PMIX_EXPORT pmix_status_t PMIx_IOF_pull(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t directives[], size_t ndirs, + pmix_iof_channel_t channel, pmix_iof_cbfunc_t cbfunc, + pmix_hdlr_reg_cbfunc_t regcbfunc, void *regcbdata) +{ + pmix_shift_caddy_t *cd; + pmix_cmd_t cmd = PMIX_IOF_PULL_CMD; + pmix_buffer_t *msg; + pmix_status_t rc; + + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + + pmix_output_verbose(2, pmix_client_globals.iof_output, + "pmix:iof_register"); + + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we are a server, we cannot do this */ + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOT_SUPPORTED; + } + + /* we don't allow stdin to flow thru this path */ + if (PMIX_FWD_STDIN_CHANNEL & channel) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOT_SUPPORTED; + } + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + + /* send this request to the server */ + cd = PMIX_NEW(pmix_shift_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->cbfunc.hdlrregcbfn = regcbfunc; + cd->cbdata = regcbdata; + /* setup the request item */ + cd->iofreq = PMIX_NEW(pmix_iof_req_t); + if (NULL == cd->iofreq) { + PMIX_RELEASE(cd); + return PMIX_ERR_NOMEM; + } + /* retain the channels and cbfunc */ + cd->iofreq->channels = channel; + cd->iofreq->cbfunc = cbfunc; + /* we don't need the source specifications - only the + * server cares as it will filter against them */ + + /* setup the registration cmd */ + msg = PMIX_NEW(pmix_buffer_t); + if (NULL == msg) { + PMIX_RELEASE(cd->iofreq); + PMIX_RELEASE(cd); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_COMMAND); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &nprocs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, procs, nprocs, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ndirs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + if (0 < ndirs) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, directives, ndirs, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &channel, 1, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + + pmix_output_verbose(2, pmix_client_globals.iof_output, + "pmix:iof_request sending to server"); + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, msgcbfunc, (void*)cd); + + cleanup: + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + PMIX_RELEASE(cd->iofreq); + PMIX_RELEASE(cd); + } + return rc; +} + +typedef struct { + pmix_op_cbfunc_t cbfunc; + void *cbdata; +} pmix_ltcaddy_t; + +static void stdincbfunc(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_ltcaddy_t *cd = (pmix_ltcaddy_t*)cbdata; + int cnt; + pmix_status_t rc, status; + + /* a zero-byte buffer indicates that this recv is being + * completed due to a lost connection */ + if (PMIX_BUFFER_IS_EMPTY(buf)) { + /* release the caller */ + if (NULL != cd->cbfunc) { + cd->cbfunc(PMIX_ERR_COMM_FAILURE, cd->cbdata); + } + free(cd); + return; + } + + /* unpack the status */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &status, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + status = rc; + } + if (NULL != cd->cbfunc) { + cd->cbfunc(status, cd->cbdata); + } + free(cd); +} + +pmix_status_t PMIx_IOF_push(const pmix_proc_t targets[], size_t ntargets, + pmix_byte_object_t *bo, + const pmix_info_t directives[], size_t ndirs, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_buffer_t *msg; + pmix_cmd_t cmd = PMIX_IOF_PUSH_CMD; + pmix_status_t rc; + pmix_ltcaddy_t *cd; + + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + + /* if we are not a server, then we send the provided + * data to our server for processing */ + if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + msg = PMIX_NEW(pmix_buffer_t); + if (NULL == msg) { + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_COMMAND); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ntargets, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + if (0 < ntargets) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, targets, ntargets, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ndirs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + if (0 < ndirs) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, directives, ndirs, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + + cd = (pmix_ltcaddy_t*)malloc(sizeof(pmix_ltcaddy_t)); + if (NULL == cd) { + PMIX_RELEASE(msg); + rc = PMIX_ERR_NOMEM; + return rc; + } + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, stdincbfunc, cd); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + free(cd); + } + return rc; + } + + /* if we are a server, just pass the data up to our host */ + if (NULL == pmix_host_server.push_stdin) { + return PMIX_ERR_NOT_SUPPORTED; + } + rc = pmix_host_server.push_stdin(&pmix_globals.myid, + targets, ntargets, + directives, ndirs, + bo, cbfunc, cbdata); + return PMIX_SUCCESS; +} + +pmix_status_t pmix_iof_write_output(const pmix_proc_t *name, + pmix_iof_channel_t stream, + const pmix_byte_object_t *bo, + pmix_iof_write_event_t *channel) +{ + char starttag[PMIX_IOF_BASE_TAG_MAX], endtag[PMIX_IOF_BASE_TAG_MAX], *suffix; + pmix_iof_write_output_t *output; + size_t i; + int j, k, starttaglen, endtaglen, num_buffered; + bool endtagged; + char qprint[10]; + + PMIX_OUTPUT_VERBOSE((1, pmix_client_globals.iof_output, + "%s write:output setting up to write %lu bytes to %s for %s on fd %d", + PMIX_NAME_PRINT(&pmix_globals.myid), + (unsigned long)bo->size, + PMIx_IOF_channel_string(stream), + PMIX_NAME_PRINT(name), + (NULL == channel) ? -1 : channel->fd)); + + /* setup output object */ + output = PMIX_NEW(pmix_iof_write_output_t); + + /* write output data to the corresponding tag */ + if (PMIX_FWD_STDIN_CHANNEL & stream) { + /* copy over the data to be written */ + if (0 < bo->size) { + /* don't copy 0 bytes - we just need to pass + * the zero bytes so the fd can be closed + * after it writes everything out + */ + memcpy(output->data, bo->bytes, bo->size); + } + output->numbytes = bo->size; + goto process; + } else if (PMIX_FWD_STDOUT_CHANNEL & stream) { + /* write the bytes to stdout */ + suffix = "stdout"; + } else if (PMIX_FWD_STDERR_CHANNEL & stream) { + /* write the bytes to stderr */ + suffix = "stderr"; + } else if (PMIX_FWD_STDDIAG_CHANNEL & stream) { + /* write the bytes to stderr */ + suffix = "stddiag"; + } else { + /* error - this should never happen */ + PMIX_ERROR_LOG(PMIX_ERR_VALUE_OUT_OF_BOUNDS); + PMIX_OUTPUT_VERBOSE((1, pmix_client_globals.iof_output, + "%s stream %0x", PMIX_NAME_PRINT(&pmix_globals.myid), stream)); + return PMIX_ERR_VALUE_OUT_OF_BOUNDS; + } + + /* if this is to be xml tagged, create a tag with the correct syntax - we do not allow + * timestamping of xml output + */ + if (pmix_globals.xml_output) { + snprintf(starttag, PMIX_IOF_BASE_TAG_MAX, "<%s rank=\"%s\">", suffix, PMIX_RANK_PRINT(name->rank)); + snprintf(endtag, PMIX_IOF_BASE_TAG_MAX, "", suffix); + goto construct; + } + + /* if we are to timestamp output, start the tag with that */ + if (pmix_globals.timestamp_output) { + time_t mytime; + char *cptr; + /* get the timestamp */ + time(&mytime); + cptr = ctime(&mytime); + cptr[strlen(cptr)-1] = '\0'; /* remove trailing newline */ + + if (pmix_globals.tag_output) { + /* if we want it tagged as well, use both */ + snprintf(starttag, PMIX_IOF_BASE_TAG_MAX, "%s[%s]<%s>:", + cptr, PMIX_NAME_PRINT(name), suffix); + } else { + /* only use timestamp */ + snprintf(starttag, PMIX_IOF_BASE_TAG_MAX, "%s<%s>:", cptr, suffix); + } + /* no endtag for this option */ + memset(endtag, '\0', PMIX_IOF_BASE_TAG_MAX); + goto construct; + } + + if (pmix_globals.tag_output) { + snprintf(starttag, PMIX_IOF_BASE_TAG_MAX, "[%s]<%s>:", + PMIX_NAME_PRINT(name), suffix); + /* no endtag for this option */ + memset(endtag, '\0', PMIX_IOF_BASE_TAG_MAX); + goto construct; + } + + /* if we get here, then the data is not to be tagged - just copy it + * and move on to processing + */ + if (0 < bo->size) { + /* don't copy 0 bytes - we just need to pass + * the zero bytes so the fd can be closed + * after it writes everything out + */ + memcpy(output->data, bo->bytes, bo->size); + } + output->numbytes = bo->size; + goto process; + + construct: + starttaglen = strlen(starttag); + endtaglen = strlen(endtag); + endtagged = false; + /* start with the tag */ + for (j=0, k=0; j < starttaglen && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; j++) { + output->data[k++] = starttag[j]; + } + /* cycle through the data looking for + * and replace those with the tag + */ + for (i=0; i < bo->size && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; i++) { + if (pmix_globals.xml_output) { + if ('&' == bo->bytes[i]) { + if (k+5 >= PMIX_IOF_BASE_TAGGED_OUT_MAX) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + goto process; + } + snprintf(qprint, 10, "&"); + for (j=0; j < (int)strlen(qprint) && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; j++) { + output->data[k++] = qprint[j]; + } + } else if ('<' == bo->bytes[i]) { + if (k+4 >= PMIX_IOF_BASE_TAGGED_OUT_MAX) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + goto process; + } + snprintf(qprint, 10, "<"); + for (j=0; j < (int)strlen(qprint) && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; j++) { + output->data[k++] = qprint[j]; + } + } else if ('>' == bo->bytes[i]) { + if (k+4 >= PMIX_IOF_BASE_TAGGED_OUT_MAX) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + goto process; + } + snprintf(qprint, 10, ">"); + for (j=0; j < (int)strlen(qprint) && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; j++) { + output->data[k++] = qprint[j]; + } + } else if (bo->bytes[i] < 32 || bo->bytes[i] > 127) { + /* this is a non-printable character, so escape it too */ + if (k+7 >= PMIX_IOF_BASE_TAGGED_OUT_MAX) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + goto process; + } + snprintf(qprint, 10, "&#%03d;", (int)bo->bytes[i]); + for (j=0; j < (int)strlen(qprint) && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; j++) { + output->data[k++] = qprint[j]; + } + /* if this was a \n, then we also need to break the line with the end tag */ + if ('\n' == bo->bytes[i] && (k+endtaglen+1) < PMIX_IOF_BASE_TAGGED_OUT_MAX) { + /* we need to break the line with the end tag */ + for (j=0; j < endtaglen && k < PMIX_IOF_BASE_TAGGED_OUT_MAX-1; j++) { + output->data[k++] = endtag[j]; + } + /* move the over */ + output->data[k++] = '\n'; + /* if this isn't the end of the data buffer, add a new start tag */ + if (i < bo->size-1 && (k+starttaglen) < PMIX_IOF_BASE_TAGGED_OUT_MAX) { + for (j=0; j < starttaglen && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; j++) { + output->data[k++] = starttag[j]; + endtagged = false; + } + } else { + endtagged = true; + } + } + } else { + output->data[k++] = bo->bytes[i]; + } + } else { + if ('\n' == bo->bytes[i]) { + /* we need to break the line with the end tag */ + for (j=0; j < endtaglen && k < PMIX_IOF_BASE_TAGGED_OUT_MAX-1; j++) { + output->data[k++] = endtag[j]; + } + /* move the over */ + output->data[k++] = '\n'; + /* if this isn't the end of the data buffer, add a new start tag */ + if (i < bo->size-1) { + for (j=0; j < starttaglen && k < PMIX_IOF_BASE_TAGGED_OUT_MAX; j++) { + output->data[k++] = starttag[j]; + endtagged = false; + } + } else { + endtagged = true; + } + } else { + output->data[k++] = bo->bytes[i]; + } + } + } + if (!endtagged && k < PMIX_IOF_BASE_TAGGED_OUT_MAX) { + /* need to add an endtag */ + for (j=0; j < endtaglen && k < PMIX_IOF_BASE_TAGGED_OUT_MAX-1; j++) { + output->data[k++] = endtag[j]; + } + output->data[k] = '\n'; + } + output->numbytes = k; + + process: + /* add this data to the write list for this fd */ + pmix_list_append(&channel->outputs, &output->super); + + /* record how big the buffer is */ + num_buffered = pmix_list_get_size(&channel->outputs); + + /* is the write event issued? */ + if (!channel->pending) { + /* issue it */ + PMIX_OUTPUT_VERBOSE((1, pmix_client_globals.iof_output, + "%s write:output adding write event", + PMIX_NAME_PRINT(&pmix_globals.myid))); + PMIX_IOF_SINK_ACTIVATE(channel); + } + + return num_buffered; +} + +void pmix_iof_static_dump_output(pmix_iof_sink_t *sink) +{ + bool dump; + int num_written; + pmix_iof_write_event_t *wev = &sink->wev; + pmix_iof_write_output_t *output; + + if (!pmix_list_is_empty(&wev->outputs)) { + dump = false; + /* make one last attempt to write this out */ + while (NULL != (output = (pmix_iof_write_output_t*)pmix_list_remove_first(&wev->outputs))) { + if (!dump) { + num_written = write(wev->fd, output->data, output->numbytes); + if (num_written < output->numbytes) { + /* don't retry - just cleanout the list and dump it */ + dump = true; + } + } + PMIX_RELEASE(output); + } + } +} + +void pmix_iof_write_handler(int _fd, short event, void *cbdata) +{ + pmix_iof_sink_t *sink = (pmix_iof_sink_t*)cbdata; + pmix_iof_write_event_t *wev = &sink->wev; + pmix_list_item_t *item; + pmix_iof_write_output_t *output; + int num_written, total_written = 0; + + PMIX_ACQUIRE_OBJECT(sink); + + PMIX_OUTPUT_VERBOSE((1, pmix_client_globals.iof_output, + "%s write:handler writing data to %d", + PMIX_NAME_PRINT(&pmix_globals.myid), + wev->fd)); + + while (NULL != (item = pmix_list_remove_first(&wev->outputs))) { + output = (pmix_iof_write_output_t*)item; + if (0 == output->numbytes) { + /* indicates we are to close this stream */ + PMIX_RELEASE(sink); + return; + } + num_written = write(wev->fd, output->data, output->numbytes); + if (num_written < 0) { + if (EAGAIN == errno || EINTR == errno) { + /* push this item back on the front of the list */ + pmix_list_prepend(&wev->outputs, item); + /* if the list is getting too large, abort */ + if (pmix_globals.output_limit < pmix_list_get_size(&wev->outputs)) { + pmix_output(0, "IO Forwarding is running too far behind - something is blocking us from writing"); + goto ABORT; + } + /* leave the write event running so it will call us again + * when the fd is ready. + */ + goto NEXT_CALL; + } + /* otherwise, something bad happened so all we can do is abort + * this attempt + */ + PMIX_RELEASE(output); + goto ABORT; + } else if (num_written < output->numbytes) { + /* incomplete write - adjust data to avoid duplicate output */ + memmove(output->data, &output->data[num_written], output->numbytes - num_written); + /* adjust the number of bytes remaining to be written */ + output->numbytes -= num_written; + /* push this item back on the front of the list */ + pmix_list_prepend(&wev->outputs, item); + /* if the list is getting too large, abort */ + if (pmix_globals.output_limit < pmix_list_get_size(&wev->outputs)) { + pmix_output(0, "IO Forwarding is running too far behind - something is blocking us from writing"); + goto ABORT; + } + /* leave the write event running so it will call us again + * when the fd is ready + */ + goto NEXT_CALL; + } + PMIX_RELEASE(output); + + total_written += num_written; + if(wev->always_writable && (PMIX_IOF_SINK_BLOCKSIZE <= total_written)){ + /* If this is a regular file it will never tell us it will block + * Write no more than PMIX_IOF_REGULARF_BLOCK at a time allowing + * other fds to progress + */ + goto NEXT_CALL; + } + } + ABORT: + wev->pending = false; + PMIX_POST_OBJECT(wev); + return; +NEXT_CALL: + PMIX_IOF_SINK_ACTIVATE(wev); +} + +/* return true if we should read stdin from fd, false otherwise */ +bool pmix_iof_stdin_check(int fd) +{ +#if defined(HAVE_TCGETPGRP) + if( isatty(fd) && (getpgrp() != tcgetpgrp(fd)) ) { + return false; + } +#endif + return true; +} + +void pmix_iof_stdin_cb(int fd, short event, void *cbdata) +{ + bool should_process; + pmix_iof_read_event_t *stdinev = (pmix_iof_read_event_t*)cbdata; + + PMIX_ACQUIRE_OBJECT(stdinev); + + should_process = pmix_iof_stdin_check(0); + + if (should_process) { + PMIX_IOF_READ_ACTIVATE(stdinev); + } else { + pmix_event_del(&stdinev->ev); + stdinev->active = false; + PMIX_POST_OBJECT(stdinev); + } +} + +static void restart_stdin(int fd, short event, void *cbdata) +{ + pmix_iof_read_event_t *tm = (pmix_iof_read_event_t*)cbdata; + + PMIX_ACQUIRE_OBJECT(tm); + + if (!tm->active) { + PMIX_IOF_READ_ACTIVATE(tm); + } +} + +/* this is the read handler for stdin */ +void pmix_iof_read_local_handler(int unusedfd, short event, void *cbdata) +{ + pmix_iof_read_event_t *rev = (pmix_iof_read_event_t*)cbdata; + unsigned char data[PMIX_IOF_BASE_MSG_MAX]; + int32_t numbytes; + int fd; + pmix_status_t rc; + pmix_buffer_t *msg; + pmix_cmd_t cmd = PMIX_IOF_PUSH_CMD; + + PMIX_ACQUIRE_OBJECT(rev); + + /* As we may use timer events, fd can be bogus (-1) + * use the right one here + */ + fd = fileno(stdin); + + /* read up to the fragment size */ + memset(data, 0, PMIX_IOF_BASE_MSG_MAX); + numbytes = read(fd, data, sizeof(data)); + + if (numbytes < 0) { + /* either we have a connection error or it was a non-blocking read */ + + /* non-blocking, retry */ + if (EAGAIN == errno || EINTR == errno) { + PMIX_IOF_READ_ACTIVATE(rev); + return; + } + + PMIX_OUTPUT_VERBOSE((1, pmix_client_globals.iof_output, + "%s iof:read handler Error on stdin", + PMIX_NAME_PRINT(&pmix_globals.myid))); + /* Un-recoverable error. Allow the code to flow as usual in order to + * to send the zero bytes message up the stream, and then close the + * file descriptor and delete the event. + */ + numbytes = 0; + } + + /* The event has fired, so it's no longer active until we + re-add it */ + rev->active = false; + + /* pass the data to our PMIx server so it can relay it + * to the host RM for distribution */ + msg = PMIX_NEW(pmix_buffer_t); + if (NULL == msg) { + /* don't restart the event - just return */ + return; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_COMMAND); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + goto restart; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &numbytes, 1, PMIX_INT32); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + goto restart; + } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, data, numbytes, PMIX_BYTE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + goto restart; + } + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, stdincbfunc, NULL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + } + + restart: + /* if num_bytes was zero, or we read the last piece of the file, then we need to terminate the event */ + if (0 == numbytes) { + /* this will also close our stdin file descriptor */ + PMIX_RELEASE(rev); + } else { + /* if we are looking at a tty, then we just go ahead and restart the + * read event assuming we are not backgrounded + */ + if (pmix_iof_stdin_check(fd)) { + restart_stdin(fd, 0, rev); + } else { + /* delay for awhile and then restart */ + pmix_event_evtimer_set(pmix_globals.evbase, + &rev->ev, restart_stdin, rev); + rev->tv.tv_sec = 0; + rev->tv.tv_usec = 10000; + PMIX_POST_OBJECT(rev); + pmix_event_evtimer_add(&rev->ev, &rev->tv); + } + } + /* nothing more to do */ + return; +} + +/* class instances */ +static void iof_sink_construct(pmix_iof_sink_t* ptr) +{ + PMIX_CONSTRUCT(&ptr->wev, pmix_iof_write_event_t); + ptr->xoff = false; + ptr->exclusive = false; + ptr->closed = false; +} +static void iof_sink_destruct(pmix_iof_sink_t* ptr) +{ + if (0 <= ptr->wev.fd) { + PMIX_OUTPUT_VERBOSE((20, pmix_client_globals.iof_output, + "%s iof: closing sink for process %s on fd %d", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(&ptr->name), ptr->wev.fd)); + PMIX_DESTRUCT(&ptr->wev); + } +} +PMIX_CLASS_INSTANCE(pmix_iof_sink_t, + pmix_list_item_t, + iof_sink_construct, + iof_sink_destruct); + + +static void iof_read_event_construct(pmix_iof_read_event_t* rev) +{ + rev->fd = -1; + rev->active = false; + rev->tv.tv_sec = 0; + rev->tv.tv_usec = 0; +} +static void iof_read_event_destruct(pmix_iof_read_event_t* rev) +{ + pmix_event_del(&rev->ev); + if (0 <= rev->fd) { + PMIX_OUTPUT_VERBOSE((20, pmix_client_globals.iof_output, + "%s iof: closing fd %d", + PMIX_NAME_PRINT(&pmix_globals.myid), rev->fd)); + close(rev->fd); + rev->fd = -1; + } +} +PMIX_CLASS_INSTANCE(pmix_iof_read_event_t, + pmix_object_t, + iof_read_event_construct, + iof_read_event_destruct); + +static void iof_write_event_construct(pmix_iof_write_event_t* wev) +{ + wev->pending = false; + wev->always_writable = false; + wev->fd = -1; + PMIX_CONSTRUCT(&wev->outputs, pmix_list_t); + wev->tv.tv_sec = 0; + wev->tv.tv_usec = 0; +} +static void iof_write_event_destruct(pmix_iof_write_event_t* wev) +{ + pmix_event_del(&wev->ev); + if (2 < wev->fd) { + PMIX_OUTPUT_VERBOSE((20, pmix_client_globals.iof_output, + "%s iof: closing fd %d for write event", + PMIX_NAME_PRINT(&pmix_globals.myid), wev->fd)); + close(wev->fd); + } + PMIX_DESTRUCT(&wev->outputs); +} +PMIX_CLASS_INSTANCE(pmix_iof_write_event_t, + pmix_list_item_t, + iof_write_event_construct, + iof_write_event_destruct); + +PMIX_CLASS_INSTANCE(pmix_iof_write_output_t, + pmix_list_item_t, + NULL, NULL); diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h new file mode 100644 index 0000000000..1a91b12baa --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * I/O Forwarding Service + */ + +#ifndef PMIX_IOF_H +#define PMIX_IOF_H + +#include + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_UIO_H +#include +#endif +#ifdef HAVE_NET_UIO_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "src/class/pmix_list.h" +#include "src/include/pmix_globals.h" +#include "src/util/fd.h" + +#include "src/common/pmix_iof.h" + +BEGIN_C_DECLS + +/* + * Maximum size of single msg + */ +#define PMIX_IOF_BASE_MSG_MAX 4096 +#define PMIX_IOF_BASE_TAG_MAX 50 +#define PMIX_IOF_BASE_TAGGED_OUT_MAX 8192 +#define PMIX_IOF_MAX_INPUT_BUFFERS 50 + +typedef struct { + pmix_list_item_t super; + bool pending; + bool always_writable; + pmix_event_t ev; + struct timeval tv; + int fd; + pmix_list_t outputs; +} pmix_iof_write_event_t; +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_iof_write_event_t); + +typedef struct { + pmix_list_item_t super; + pmix_proc_t name; + pmix_iof_channel_t tag; + pmix_iof_write_event_t wev; + bool xoff; + bool exclusive; + bool closed; +} pmix_iof_sink_t; +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_iof_sink_t); + +typedef struct { + pmix_list_item_t super; + char data[PMIX_IOF_BASE_TAGGED_OUT_MAX]; + int numbytes; +} pmix_iof_write_output_t; +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_iof_write_output_t); + +typedef struct { + pmix_object_t super; + pmix_event_t ev; + struct timeval tv; + int fd; + bool active; + bool always_readable; +} pmix_iof_read_event_t; +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_iof_read_event_t); + + +/* Write event macro's */ + +static inline bool +pmix_iof_fd_always_ready(int fd) +{ + return pmix_fd_is_regular(fd) || + (pmix_fd_is_chardev(fd) && !isatty(fd)) || + pmix_fd_is_blkdev(fd); +} + +#define PMIX_IOF_SINK_BLOCKSIZE (1024) + +#define PMIX_IOF_SINK_ACTIVATE(wev) \ + do { \ + struct timeval *tv = NULL; \ + wev->pending = true; \ + PMIX_POST_OBJECT(wev); \ + if (wev->always_writable) { \ + /* Regular is always write ready. Use timer to activate */ \ + tv = &wev->tv; \ + } \ + if (pmix_event_add(&wev->ev, tv)) { \ + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); \ + } \ + } while(0); + + +/* define an output "sink", adding it to the provided + * endpoint list for this proc */ +#define PMIX_IOF_SINK_DEFINE(snk, nm, fid, tg, wrthndlr) \ + do { \ + PMIX_OUTPUT_VERBOSE((1, pmix_client_globals.iof_output, \ + "defining endpt: file %s line %d fd %d", \ + __FILE__, __LINE__, (fid))); \ + PMIX_CONSTRUCT((snk), pmix_iof_sink_t); \ + (void)strncpy((snk)->name.nspace, (nm)->nspace, PMIX_MAX_NSLEN); \ + (snk)->name.rank = (nm)->rank; \ + (snk)->tag = (tg); \ + if (0 <= (fid)) { \ + (snk)->wev.fd = (fid); \ + (snk)->wev.always_writable = \ + pmix_iof_fd_always_ready(fid); \ + if ((snk)->wev.always_writable) { \ + pmix_event_evtimer_set(pmix_globals.evbase, \ + &(snk)->wev.ev, wrthndlr, (snk)); \ + } else { \ + pmix_event_set(pmix_globals.evbase, \ + &(snk)->wev.ev, (snk)->wev.fd, \ + PMIX_EV_WRITE, \ + wrthndlr, (snk)); \ + } \ + } \ + PMIX_POST_OBJECT(snk); \ + } while(0); + +/* Read event macro's */ +#define PMIX_IOF_READ_ADDEV(rev) \ + do { \ + struct timeval *tv = NULL; \ + if ((rev)->always_readable) { \ + tv = &(rev)->tv; \ + } \ + if (pmix_event_add(&(rev)->ev, tv)) { \ + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); \ + } \ + } while(0); + +#define PMIX_IOF_READ_ACTIVATE(rev) \ + do { \ + (rev)->active = true; \ + PMIX_POST_OBJECT(rev); \ + PMIX_IOF_READ_ADDEV(rev); \ + } while(0); + + +PMIX_EXPORT pmix_status_t pmix_iof_flush(void); + +PMIX_EXPORT pmix_status_t pmix_iof_write_output(const pmix_proc_t *name, + pmix_iof_channel_t stream, + const pmix_byte_object_t *bo, + pmix_iof_write_event_t *channel); +PMIX_EXPORT void pmix_iof_static_dump_output(pmix_iof_sink_t *sink); +PMIX_EXPORT void pmix_iof_write_handler(int fd, short event, void *cbdata); +PMIX_EXPORT void pmix_iof_stdin_write_handler(int fd, short event, void *cbdata); +PMIX_EXPORT bool pmix_iof_stdin_check(int fd); +PMIX_EXPORT void pmix_iof_stdin_cb(int fd, short event, void *cbdata); +PMIX_EXPORT void pmix_iof_read_local_handler(int fd, short event, void *cbdata); + +END_C_DECLS + +#endif /* PMIX_IOF_H */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c index 50b4aaf0c9..18ae2a34d1 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c @@ -216,7 +216,42 @@ PMIX_EXPORT const char* pmix_command_string(pmix_cmd_t cmd) return "JOB CONTROL"; case PMIX_MONITOR_CMD: return "MONITOR"; + case PMIX_IOF_PUSH_CMD: + return "IOF PUSH"; + case PMIX_IOF_PULL_CMD: + return "IOF PULL"; default: return "UNKNOWN"; } } + +/* this is not a thread-safe implementation. To correctly implement this, + * we need to port the thread-safe data code from OPAL and use it here */ +static char answer[300]; + +PMIX_EXPORT const char* PMIx_IOF_channel_string(pmix_iof_channel_t channel) +{ + size_t cnt=0; + + memset(answer, 0, sizeof(answer)); + if (PMIX_FWD_STDIN_CHANNEL & channel) { + strncpy(&answer[cnt], "STDIN ", strlen("STDIN ")); + cnt += strlen("STDIN "); + } + if (PMIX_FWD_STDOUT_CHANNEL & channel) { + strncpy(&answer[cnt], "STDOUT ", strlen("STDOUT ")); + cnt += strlen("STDOUT "); + } + if (PMIX_FWD_STDERR_CHANNEL & channel) { + strncpy(&answer[cnt], "STDERR ", strlen("STDERR ")); + cnt += strlen("STDERR "); + } + if (PMIX_FWD_STDDIAG_CHANNEL & channel) { + strncpy(&answer[cnt], "STDDIAG ", strlen("STDDIAG ")); + cnt += strlen("STDDIAG "); + } + if (0 == cnt) { + strncpy(&answer[cnt], "NONE", strlen("NONE")); + } + return answer; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c index 248dd4bf0e..0dceb4f9e3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c @@ -41,7 +41,7 @@ pmix_info_t *info; size_t ninfo; pmix_notification_fn_t evhdlr; - pmix_evhdlr_reg_cbfunc_t evregcbfn; + pmix_hdlr_reg_cbfunc_t evregcbfn; void *cbdata; } pmix_rshift_caddy_t; static void rscon(pmix_rshift_caddy_t *p) @@ -766,7 +766,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) PMIX_EXPORT void PMIx_Register_event_handler(pmix_status_t codes[], size_t ncodes, pmix_info_t info[], size_t ninfo, pmix_notification_fn_t event_hdlr, - pmix_evhdlr_reg_cbfunc_t cbfunc, + pmix_hdlr_reg_cbfunc_t cbfunc, void *cbdata) { pmix_rshift_caddy_t *cd; diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c index f5a87272a1..cfe2e8b0c7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c @@ -109,6 +109,7 @@ static void nscon(pmix_nspace_t *p) PMIX_CONSTRUCT(&p->epilog.cleanup_dirs, pmix_list_t); PMIX_CONSTRUCT(&p->epilog.cleanup_files, pmix_list_t); PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t); + PMIX_CONSTRUCT(&p->setup_data, pmix_list_t); } static void nsdes(pmix_nspace_t *p) { @@ -125,6 +126,7 @@ static void nsdes(pmix_nspace_t *p) PMIX_LIST_DESTRUCT(&p->epilog.cleanup_dirs); PMIX_LIST_DESTRUCT(&p->epilog.cleanup_files); PMIX_LIST_DESTRUCT(&p->epilog.ignores); + PMIX_LIST_DESTRUCT(&p->setup_data); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t, pmix_list_item_t, @@ -219,6 +221,24 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_peer_t, pmix_object_t, pcon, pdes); +static void iofreqcon(pmix_iof_req_t *p) +{ + p->peer = NULL; + memset(&p->pname, 0, sizeof(pmix_name_t)); + p->channels = PMIX_FWD_NO_CHANNELS; + p->cbfunc = NULL; +} +static void iofreqdes(pmix_iof_req_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_iof_req_t, + pmix_list_item_t, + iofreqcon, iofreqdes); + + static void scon(pmix_shift_caddy_t *p) { PMIX_CONSTRUCT_LOCK(&p->lock); @@ -234,6 +254,7 @@ static void scon(pmix_shift_caddy_t *p) p->directives = NULL; p->ndirs = 0; p->evhdlr = NULL; + p->iofreq = NULL; p->kv = NULL; p->vptr = NULL; p->cd = NULL; diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h index b90512944c..8085873f0d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h @@ -31,6 +31,7 @@ #endif #include PMIX_EVENT_HEADER +#include #include #include "src/class/pmix_hash_table.h" @@ -97,6 +98,8 @@ typedef uint8_t pmix_cmd_t; #define PMIX_MONITOR_CMD 19 #define PMIX_GET_CREDENTIAL_CMD 20 #define PMIX_VALIDATE_CRED_CMD 21 +#define PMIX_IOF_PULL_CMD 22 +#define PMIX_IOF_PUSH_CMD 23 /* provide a "pretty-print" function for cmds */ const char* pmix_command_string(pmix_cmd_t cmd); @@ -165,6 +168,8 @@ typedef struct { pmix_personality_t compat; pmix_epilog_t epilog; // things to do upon termination of all local clients // from this nspace + pmix_list_t setup_data; // list of pmix_kval_t containing info structs having blobs + // for setting up the local node for this nspace/application } pmix_nspace_t; PMIX_CLASS_DECLARATION(pmix_nspace_t); @@ -228,6 +233,17 @@ typedef struct pmix_peer_t { PMIX_CLASS_DECLARATION(pmix_peer_t); +/* tracker for IOF requests */ +typedef struct { + pmix_list_item_t super; + pmix_peer_t *peer; + pmix_name_t pname; + pmix_iof_channel_t channels; + pmix_iof_cbfunc_t cbfunc; +} pmix_iof_req_t; +PMIX_CLASS_DECLARATION(pmix_iof_req_t); + + /* caddy for query requests */ typedef struct { pmix_object_t super; @@ -307,6 +323,7 @@ PMIX_CLASS_DECLARATION(pmix_server_caddy_t); pmix_info_t *directives; size_t ndirs; pmix_notification_fn_t evhdlr; + pmix_iof_req_t *iofreq; pmix_kval_t *kv; pmix_value_t *vptr; pmix_server_caddy_t *cd; @@ -314,9 +331,8 @@ PMIX_CLASS_DECLARATION(pmix_server_caddy_t); bool enviro; union { pmix_release_cbfunc_t relfn; - pmix_evhdlr_reg_cbfunc_t evregcbfn; + pmix_hdlr_reg_cbfunc_t hdlrregcbfn; pmix_op_cbfunc_t opcbfn; - pmix_evhdlr_reg_cbfunc_t errregcbfn; } cbfunc; void *cbdata; size_t ref; @@ -340,7 +356,7 @@ typedef struct { pmix_lookup_cbfunc_t lookupfn; pmix_spawn_cbfunc_t spawnfn; pmix_connect_cbfunc_t cnctfn; - pmix_evhdlr_reg_cbfunc_t errregfn; + pmix_hdlr_reg_cbfunc_t hdlrregfn; } cbfunc; size_t errhandler_ref; void *cbdata; @@ -414,6 +430,7 @@ typedef struct { bool commits_pending; struct timeval event_window; pmix_list_t cached_events; // events waiting in the window prior to processing + pmix_list_t iof_requests; // list of pmix_iof_req_t IOF requests pmix_ring_buffer_t notifications; // ring buffer of pending notifications /* processes also need a place where they can store * their own internal data - e.g., data provided by @@ -422,6 +439,11 @@ typedef struct { * interface so that other parts of the process can * look them up */ pmix_gds_base_module_t *mygds; + /* IOF controls */ + bool tag_output; + bool xml_output; + bool timestamp_output; + size_t output_limit; } pmix_globals_t; /* provide access to a function to cleanup epilogs */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/types.h b/opal/mca/pmix/pmix3x/pmix/src/include/types.h index 7c073ccf4f..593f854a32 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/types.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/types.h @@ -244,6 +244,8 @@ typedef struct event pmix_event_t; #define pmix_event_assign(x, b, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +#define pmix_event_set(b, x, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) + #define pmix_event_add(ev, tv) event_add((ev), (tv)) #define pmix_event_del(ev) event_del((ev)) @@ -264,5 +266,6 @@ typedef struct event pmix_event_t; #define pmix_event_evtimer_del(x) pmix_event_del((x)) +#define pmix_event_signal_set(b, x, fd, cb, arg) event_assign((x), (b), (fd), EV_SIGNAL|EV_PERSIST, (event_callback_fn) (cb), (arg)) #endif /* PMIX_TYPES_H */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h index 0b08a5d0b1..18154ec1c3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h @@ -379,6 +379,10 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_val(pmix_buffer_t *buffer, pmix_value_t *p); PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_alloc_directive(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_iof_channel(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_envar(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); /* * "Standard" unpack functions @@ -466,6 +470,10 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_query(pmix_buffer_t *buffer, v int32_t *num_vals, pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_alloc_directive(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_iof_channel(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_envar(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); /**** DEPRECATED ****/ PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_array(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type); @@ -527,6 +535,9 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_query(pmix_query_t **dest, pmix_query_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_envar(pmix_envar_t **dest, + pmix_envar_t *src, + pmix_data_type_t type); /**** DEPRECATED ****/ PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, pmix_info_array_t *src, @@ -637,6 +648,12 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_rank(char **output, char *prefi PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_alloc_directive(char **output, char *prefix, pmix_alloc_directive_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_iof_channel(char **output, char *prefix, + pmix_iof_channel_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_envar(char **output, char *prefix, + pmix_envar_t *src, + pmix_data_type_t type); /* * Common helper functions diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c index 7b91c19bae..f2a9e9df81 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c @@ -113,6 +113,7 @@ pmix_status_t pmix_bfrops_base_std_copy(void **dest, void *src, case PMIX_INT16: case PMIX_UINT16: + case PMIX_IOF_CHANNEL: datasize = 2; break; @@ -366,18 +367,22 @@ pmix_status_t pmix_bfrops_base_copy_pinfo(pmix_proc_info_t **dest, pmix_proc_info_t *src, pmix_data_type_t type) { - *dest = (pmix_proc_info_t*)malloc(sizeof(pmix_proc_info_t)); - (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); - (*dest)->proc.rank = src->proc.rank; + pmix_proc_info_t *p; + + PMIX_PROC_INFO_CREATE(p, 1); + if (NULL == p) { + return PMIX_ERR_NOMEM; + } if (NULL != src->hostname) { - (*dest)->hostname = strdup(src->hostname); + p->hostname = strdup(src->hostname); } if (NULL != src->executable_name) { - (*dest)->executable_name = strdup(src->executable_name); + p->executable_name = strdup(src->executable_name); } - (*dest)->pid = src->pid; - (*dest)->exit_code = src->exit_code; - (*dest)->state = src->state; + memcpy(&p->pid, &src->pid, sizeof(pid_t)); + memcpy(&p->exit_code, &src->exit_code, sizeof(int)); + memcpy(&p->state, &src->state, sizeof(pmix_proc_state_t)); + *dest = p; return PMIX_SUCCESS; } @@ -402,6 +407,7 @@ pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, pmix_modex_data_t *pm, *sm; pmix_proc_info_t *pi, *si; pmix_query_t *pq, *sq; + pmix_envar_t *pe, *se; p = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); if (NULL == p) { @@ -822,6 +828,24 @@ pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, } } break; + case PMIX_ENVAR: + PMIX_ENVAR_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pe = (pmix_envar_t*)p->array; + se = (pmix_envar_t*)src->array; + for (n=0; n < src->size; n++) { + if (NULL != se[n].envar) { + pe[n].envar = strdup(se[n].envar); + } + if (NULL != se[n].value) { + pe[n].value = strdup(se[n].value); + } + pe[n].separator = se[n].separator; + } + break; default: free(p); return PMIX_ERR_UNKNOWN_DATA_TYPE; @@ -876,3 +900,21 @@ pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, return PMIX_SUCCESS; } /*******************/ + +pmix_status_t pmix_bfrops_base_copy_envar(pmix_envar_t **dest, + pmix_envar_t *src, + pmix_data_type_t type) +{ + PMIX_ENVAR_CREATE(*dest, 1); + if (NULL == (*dest)) { + return PMIX_ERR_NOMEM; + } + if (NULL != src->envar) { + (*dest)->envar = strdup(src->envar); + } + if (NULL != src->value) { + (*dest)->value = strdup(src->value); + } + (*dest)->separator = src->separator; + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c index 5abf19c1e8..4ee81a4b9e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c @@ -49,6 +49,9 @@ void pmix_bfrops_base_value_load(pmix_value_t *v, const void *data, { pmix_byte_object_t *bo; pmix_proc_info_t *pi; + pmix_envar_t *envar; + pmix_data_array_t *darray; + pmix_status_t rc; v->type = type; if (NULL == data) { @@ -174,6 +177,24 @@ void pmix_bfrops_base_value_load(pmix_value_t *v, const void *data, case PMIX_POINTER: memcpy(&(v->data.ptr), data, sizeof(void*)); break; + case PMIX_ENVAR: + envar = (pmix_envar_t*)data; + if (NULL != envar->envar) { + v->data.envar.envar = strdup(envar->envar); + } + if (NULL != envar->value) { + v->data.envar.value = strdup(envar->value); + } + v->data.envar.separator = envar->separator; + break; + case PMIX_DATA_ARRAY: + darray = (pmix_data_array_t*)data; + rc = pmix_bfrops_base_copy_darray(&v->data.darray, darray, PMIX_DATA_ARRAY); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + break; + default: /* silence warnings */ break; @@ -187,6 +208,8 @@ pmix_status_t pmix_bfrops_base_value_unload(pmix_value_t *kv, size_t *sz) { pmix_status_t rc; + pmix_envar_t *envar; + pmix_data_array_t **darray; rc = PMIX_SUCCESS; if (NULL == data || @@ -304,6 +327,26 @@ pmix_status_t pmix_bfrops_base_value_unload(pmix_value_t *kv, memcpy(*data, &(kv->data.ptr), sizeof(void*)); *sz = sizeof(void*); break; + case PMIX_DATA_ARRAY: + darray = (pmix_data_array_t**)data; + rc = pmix_bfrops_base_copy_darray(darray, kv->data.darray, PMIX_DATA_ARRAY); + *sz = sizeof(pmix_data_array_t); + break; + case PMIX_ENVAR: + PMIX_ENVAR_CREATE(envar, 1); + if (NULL == envar) { + return PMIX_ERR_NOMEM; + } + if (NULL != kv->data.envar.envar) { + envar->envar = strdup(kv->data.envar.envar); + } + if (NULL != kv->data.envar.value) { + envar->value = strdup(kv->data.envar.value); + } + envar->separator = kv->data.envar.separator; + *data = envar; + *sz = sizeof(pmix_envar_t); + break; default: /* silence warnings */ rc = PMIX_ERROR; @@ -398,16 +441,58 @@ pmix_value_cmp_t pmix_bfrops_base_value_cmp(pmix_value_t *p, } break; case PMIX_COMPRESSED_STRING: - if (p->data.bo.size != p1->data.bo.size) { - return false; + if (p->data.bo.size > p1->data.bo.size) { + return PMIX_VALUE2_GREATER; } else { - return true; + return PMIX_VALUE1_GREATER; } + break; case PMIX_STATUS: if (p->data.status == p1->data.status) { rc = PMIX_EQUAL; } break; + case PMIX_ENVAR: + if (NULL != p->data.envar.envar) { + if (NULL == p1->data.envar.envar) { + return PMIX_VALUE1_GREATER; + } + rc = strcmp(p->data.envar.envar, p1->data.envar.envar); + if (rc < 0) { + return PMIX_VALUE2_GREATER; + } else if (0 < rc) { + return PMIX_VALUE1_GREATER; + } + } else if (NULL != p1->data.envar.envar) { + /* we know value1->envar had to be NULL */ + return PMIX_VALUE2_GREATER; + } + + /* if both are NULL or are equal, then check value */ + if (NULL != p->data.envar.value) { + if (NULL == p1->data.envar.value) { + return PMIX_VALUE1_GREATER; + } + rc = strcmp(p->data.envar.value, p1->data.envar.value); + if (rc < 0) { + return PMIX_VALUE2_GREATER; + } else if (0 < rc) { + return PMIX_VALUE1_GREATER; + } + } else if (NULL != p1->data.envar.value) { + /* we know value1->value had to be NULL */ + return PMIX_VALUE2_GREATER; + } + + /* finally, check separator */ + if (p->data.envar.separator < p1->data.envar.separator) { + return PMIX_VALUE2_GREATER; + } + if (p1->data.envar.separator < p->data.envar.separator) { + return PMIX_VALUE1_GREATER; + } + rc = PMIX_EQUAL; + break; default: pmix_output(0, "COMPARE-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type); } @@ -431,6 +516,7 @@ pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, pmix_modex_data_t *pm, *sm; pmix_proc_info_t *pi, *si; pmix_query_t *pq, *sq; + pmix_envar_t *pe, *se; /* copy the right field */ p->type = src->type; @@ -544,395 +630,23 @@ pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, memcpy(&p->data.state, &src->data.state, sizeof(pmix_proc_state_t)); break; case PMIX_PROC_INFO: - PMIX_PROC_INFO_CREATE(p->data.pinfo, 1); - if (NULL != src->data.pinfo->hostname) { - p->data.pinfo->hostname = strdup(src->data.pinfo->hostname); - } - if (NULL != src->data.pinfo->executable_name) { - p->data.pinfo->executable_name = strdup(src->data.pinfo->executable_name); - } - memcpy(&p->data.pinfo->pid, &src->data.pinfo->pid, sizeof(pid_t)); - memcpy(&p->data.pinfo->exit_code, &src->data.pinfo->exit_code, sizeof(int)); - memcpy(&p->data.pinfo->state, &src->data.pinfo->state, sizeof(pmix_proc_state_t)); - break; + return pmix_bfrops_base_copy_pinfo(&p->data.pinfo, src->data.pinfo, PMIX_PROC_INFO); case PMIX_DATA_ARRAY: - p->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); - p->data.darray->type = src->data.darray->type; - p->data.darray->size = src->data.darray->size; - if (0 == p->data.darray->size || NULL == src->data.darray->array) { - p->data.darray->array = NULL; - p->data.darray->size = 0; - break; - } - /* allocate space and do the copy */ - switch (src->data.darray->type) { - case PMIX_UINT8: - case PMIX_INT8: - case PMIX_BYTE: - p->data.darray->array = (char*)malloc(src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size); - break; - case PMIX_UINT16: - case PMIX_INT16: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint16_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint16_t)); - break; - case PMIX_UINT32: - case PMIX_INT32: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint32_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint32_t)); - break; - case PMIX_UINT64: - case PMIX_INT64: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint64_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint64_t)); - break; - case PMIX_BOOL: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(bool)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(bool)); - break; - case PMIX_SIZE: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(size_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(size_t)); - break; - case PMIX_PID: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(pid_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pid_t)); - break; - case PMIX_STRING: - p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - prarray = (char**)p->data.darray->array; - strarray = (char**)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != strarray[n]) { - prarray[n] = strdup(strarray[n]); - } - } - break; - case PMIX_INT: - case PMIX_UINT: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(int)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(int)); - break; - case PMIX_FLOAT: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(float)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(float)); - break; - case PMIX_DOUBLE: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(double)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(double)); - break; - case PMIX_TIMEVAL: - p->data.darray->array = (struct timeval*)malloc(src->data.darray->size * sizeof(struct timeval)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(struct timeval)); - break; - case PMIX_TIME: - p->data.darray->array = (time_t*)malloc(src->data.darray->size * sizeof(time_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(time_t)); - break; - case PMIX_STATUS: - p->data.darray->array = (pmix_status_t*)malloc(src->data.darray->size * sizeof(pmix_status_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_status_t)); - break; - case PMIX_VALUE: - PMIX_VALUE_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pv = (pmix_value_t*)p->data.darray->array; - sv = (pmix_value_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (PMIX_SUCCESS != (rc = pmix_value_xfer(&pv[n], &sv[n]))) { - PMIX_VALUE_FREE(pv, src->data.darray->size); - return rc; - } - } - break; - case PMIX_PROC: - PMIX_PROC_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_proc_t)); - break; - case PMIX_APP: - PMIX_APP_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pa = (pmix_app_t*)p->data.darray->array; - sa = (pmix_app_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sa[n].cmd) { - pa[n].cmd = strdup(sa[n].cmd); - } - if (NULL != sa[n].argv) { - pa[n].argv = pmix_argv_copy(sa[n].argv); - } - if (NULL != sa[n].env) { - pa[n].env = pmix_argv_copy(sa[n].env); - } - if (NULL != sa[n].cwd) { - pa[n].cwd = strdup(sa[n].cwd); - } - pa[n].maxprocs = sa[n].maxprocs; - if (0 < sa[n].ninfo && NULL != sa[n].info) { - PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo); - if (NULL == pa[n].info) { - PMIX_APP_FREE(pa, src->data.darray->size); - return PMIX_ERR_NOMEM; - } - pa[n].ninfo = sa[n].ninfo; - for (m=0; m < pa[n].ninfo; m++) { - PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]); - } - } - } - break; - case PMIX_INFO: - PMIX_INFO_CREATE(p->data.darray->array, src->data.darray->size); - p1 = (pmix_info_t*)p->data.darray->array; - s1 = (pmix_info_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_XFER(&p1[n], &s1[n]); - } - break; - case PMIX_PDATA: - PMIX_PDATA_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pd = (pmix_pdata_t*)p->data.darray->array; - sd = (pmix_pdata_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_PDATA_XFER(&pd[n], &sd[n]); - } - break; - case PMIX_BUFFER: - p->data.darray->array = (pmix_buffer_t*)malloc(src->data.darray->size * sizeof(pmix_buffer_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pb = (pmix_buffer_t*)p->data.darray->array; - sb = (pmix_buffer_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); - pmix_bfrops_base_copy_payload(&pb[n], &sb[n]); - } - break; - case PMIX_BYTE_OBJECT: - case PMIX_COMPRESSED_STRING: - p->data.darray->array = (pmix_byte_object_t*)malloc(src->data.darray->size * sizeof(pmix_byte_object_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pbo = (pmix_byte_object_t*)p->data.darray->array; - sbo = (pmix_byte_object_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sbo[n].bytes && 0 < sbo[n].size) { - pbo[n].size = sbo[n].size; - pbo[n].bytes = (char*)malloc(pbo[n].size); - memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size); - } else { - pbo[n].bytes = NULL; - pbo[n].size = 0; - } - } - break; - case PMIX_KVAL: - p->data.darray->array = (pmix_kval_t*)calloc(src->data.darray->size , sizeof(pmix_kval_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pk = (pmix_kval_t*)p->data.darray->array; - sk = (pmix_kval_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sk[n].key) { - pk[n].key = strdup(sk[n].key); - } - if (NULL != sk[n].value) { - PMIX_VALUE_CREATE(pk[n].value, 1); - if (NULL == pk[n].value) { - free(p->data.darray->array); - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (rc = pmix_value_xfer(pk[n].value, sk[n].value))) { - return rc; - } - } - } - break; - case PMIX_MODEX: - PMIX_MODEX_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pm = (pmix_modex_data_t*)p->data.darray->array; - sm = (pmix_modex_data_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); - if (NULL != sm[n].blob && 0 < sm[n].size) { - pm[n].blob = (uint8_t*)malloc(sm[n].size); - if (NULL == pm[n].blob) { - return PMIX_ERR_NOMEM; - } - memcpy(pm[n].blob, sm[n].blob, sm[n].size); - pm[n].size = sm[n].size; - } else { - pm[n].blob = NULL; - pm[n].size = 0; - } - } - break; - case PMIX_PERSIST: - p->data.darray->array = (pmix_persistence_t*)malloc(src->data.darray->size * sizeof(pmix_persistence_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_persistence_t)); - break; - case PMIX_POINTER: - p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - prarray = (char**)p->data.darray->array; - strarray = (char**)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - prarray[n] = strarray[n]; - } - break; - case PMIX_SCOPE: - p->data.darray->array = (pmix_scope_t*)malloc(src->data.darray->size * sizeof(pmix_scope_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_scope_t)); - break; - case PMIX_DATA_RANGE: - p->data.darray->array = (pmix_data_range_t*)malloc(src->data.darray->size * sizeof(pmix_data_range_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_data_range_t)); - break; - case PMIX_COMMAND: - p->data.darray->array = (pmix_cmd_t*)malloc(src->data.darray->size * sizeof(pmix_cmd_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_cmd_t)); - break; - case PMIX_INFO_DIRECTIVES: - p->data.darray->array = (pmix_info_directives_t*)malloc(src->data.darray->size * sizeof(pmix_info_directives_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_info_directives_t)); - break; - case PMIX_PROC_INFO: - PMIX_PROC_INFO_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pi = (pmix_proc_info_t*)p->data.darray->array; - si = (pmix_proc_info_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t)); - if (NULL != si[n].hostname) { - pi[n].hostname = strdup(si[n].hostname); - } else { - pi[n].hostname = NULL; - } - if (NULL != si[n].executable_name) { - pi[n].executable_name = strdup(si[n].executable_name); - } else { - pi[n].executable_name = NULL; - } - pi[n].pid = si[n].pid; - pi[n].exit_code = si[n].exit_code; - pi[n].state = si[n].state; - } - break; - case PMIX_DATA_ARRAY: - PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); - return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays - case PMIX_QUERY: - PMIX_QUERY_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pq = (pmix_query_t*)p->data.darray->array; - sq = (pmix_query_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sq[n].keys) { - pq[n].keys = pmix_argv_copy(sq[n].keys); - } - if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) { - PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual); - if (NULL == pq[n].qualifiers) { - PMIX_QUERY_FREE(pq, src->data.darray->size); - return PMIX_ERR_NOMEM; - } - for (m=0; m < sq[n].nqual; m++) { - PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]); - } - pq[n].nqual = sq[n].nqual; - } else { - pq[n].qualifiers = NULL; - pq[n].nqual = 0; - } - } - break; - default: - return PMIX_ERR_UNKNOWN_DATA_TYPE; - } - break; + return pmix_bfrops_base_copy_darray(&p->data.darray, src->data.darray, PMIX_DATA_ARRAY); case PMIX_POINTER: memcpy(&p->data.ptr, &src->data.ptr, sizeof(void*)); break; + case PMIX_ENVAR: + PMIX_ENVAR_CONSTRUCT(&p->data.envar); + if (NULL != src->data.envar.envar) { + p->data.envar.envar = strdup(src->data.envar.envar); + } + if (NULL != src->data.envar.value) { + p->data.envar.value = strdup(src->data.envar.value); + } + p->data.envar.separator = src->data.envar.separator; + break; + /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: p->data.array->size = src->data.array->size; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c index a493ed989e..c5bf49475f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1000,6 +1000,12 @@ pmix_status_t pmix_bfrops_base_pack_darray(pmix_buffer_t *buffer, const void *sr return ret; } break; + case PMIX_ENVAR: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_envar(buffer, p[i].array, p[i].size, PMIX_ENVAR))) { + return ret; + } + break; + /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_array(buffer, p[i].array, p[i].size, PMIX_INFO_ARRAY))) { @@ -1223,6 +1229,12 @@ pmix_status_t pmix_bfrops_base_pack_val(pmix_buffer_t *buffer, return ret; } break; + case PMIX_ENVAR: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_envar(buffer, &p->data.envar, 1, PMIX_ENVAR))) { + return ret; + } + break; + /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_array(buffer, p->data.array, 1, PMIX_INFO_ARRAY))) { @@ -1270,3 +1282,33 @@ pmix_status_t pmix_bfrops_base_pack_array(pmix_buffer_t *buffer, const void *src return PMIX_SUCCESS; } + +pmix_status_t pmix_bfrops_base_pack_iof_channel(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_int16(buffer, src, num_vals, PMIX_UINT16); +} + +pmix_status_t pmix_bfrops_base_pack_envar(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_envar_t *ptr = (pmix_envar_t*)src; + int32_t i; + pmix_status_t ret; + + for (i=0; i < num_vals; ++i) { + /* pack the name */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &ptr[i].envar, 1, PMIX_STRING))) { + return ret; + } + /* pack the value */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &ptr[i].value, 1, PMIX_STRING))) { + return ret; + } + /* pack the separator */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, &ptr[i].separator, 1, PMIX_BYTE))) { + return ret; + } + } + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c index 051efd08b9..9447e2df81 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c @@ -1014,6 +1014,13 @@ int pmix_bfrops_base_print_status(char **output, char *prefix, rc = asprintf(output, "%sPMIX_VALUE: Data type: DATA_ARRAY\tARRAY SIZE: %ld", prefx, (long)src->data.darray->size); break; + case PMIX_ENVAR: + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_ENVAR\tName: %s\tValue: %s\tSeparator: %c", + prefx, (NULL == src->data.envar.envar) ? "NULL" : src->data.envar.envar, + (NULL == src->data.envar.value) ? "NULL" : src->data.envar.value, + src->data.envar.separator); + break; + /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: rc = asprintf(output, "%sPMIX_VALUE: Data type: INFO_ARRAY\tARRAY SIZE: %ld", @@ -1636,6 +1643,66 @@ pmix_status_t pmix_bfrops_base_print_alloc_directive(char **output, char *prefix } } +pmix_status_t pmix_bfrops_base_print_iof_channel(char **output, char *prefix, + pmix_iof_channel_t *src, + pmix_data_type_t type) +{ + char *prefx; + int ret; + + /* deal with NULL prefix */ + if (NULL == prefix) { + if (0 > asprintf(&prefx, " ")) { + return PMIX_ERR_NOMEM; + } + } else { + prefx = prefix; + } + + ret = asprintf(output, "%sData type: PMIX_IOF_CHANNEL\tValue: %s", + prefx, PMIx_IOF_channel_string(*src)); + if (prefx != prefix) { + free(prefx); + } + + if (0 > ret) { + return PMIX_ERR_OUT_OF_RESOURCE; + } else { + return PMIX_SUCCESS; + } +} + +pmix_status_t pmix_bfrops_base_print_envar(char **output, char *prefix, + pmix_envar_t *src, + pmix_data_type_t type) +{ + char *prefx; + int ret; + + /* deal with NULL prefix */ + if (NULL == prefix) { + if (0 > asprintf(&prefx, " ")) { + return PMIX_ERR_NOMEM; + } + } else { + prefx = prefix; + } + + ret = asprintf(output, "%sData type: PMIX_ENVAR\tName: %s\tValue: %s\tSeparator: %c", + prefx, (NULL == src->envar) ? "NULL" : src->envar, + (NULL == src->value) ? "NULL" : src->value, + ('\0' == src->separator) ? ' ' : src->separator); + if (prefx != prefix) { + free(prefx); + } + + if (0 > ret) { + return PMIX_ERR_OUT_OF_RESOURCE; + } else { + return PMIX_SUCCESS; + } +} + /**** DEPRECATED ****/ pmix_status_t pmix_bfrops_base_print_array(char **output, char *prefix, diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c index 1b48477259..20043eb3db 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c @@ -111,6 +111,7 @@ pmix_status_t pmix_bfrops_base_unpack(pmix_pointer_array_t *regtypes, } if (PMIX_INT32 != local_type) { /* if the length wasn't first, then error */ *num_vals = 0; + PMIX_ERROR_LOG(PMIX_ERR_UNPACK_FAILURE); return PMIX_ERR_UNPACK_FAILURE; } } @@ -753,8 +754,13 @@ pmix_status_t pmix_bfrops_base_unpack_val(pmix_buffer_t *buffer, return ret; } break; - case PMIX_QUERY: - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_query(buffer, val->data.darray, &m, PMIX_QUERY))) { + case PMIX_ALLOC_DIRECTIVE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_alloc_directive(buffer, &val->data.adir, &m, PMIX_ALLOC_DIRECTIVE))) { + return ret; + } + break; + case PMIX_ENVAR: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_envar(buffer, &val->data.envar, &m, PMIX_ENVAR))) { return ret; } break; @@ -1524,6 +1530,15 @@ pmix_status_t pmix_bfrops_base_unpack_darray(pmix_buffer_t *buffer, void *dest, return ret; } break; + case PMIX_ENVAR: + ptr[i].array = (pmix_envar_t*)malloc(m * sizeof(pmix_envar_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_envar(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: ptr[i].array = (pmix_info_array_t*)malloc(m * sizeof(pmix_info_array_t)); @@ -1603,6 +1618,45 @@ pmix_status_t pmix_bfrops_base_unpack_alloc_directive(pmix_buffer_t *buffer, voi return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); } +pmix_status_t pmix_bfrops_base_unpack_iof_channel(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_int16(buffer, dest, num_vals, PMIX_UINT16); +} + +pmix_status_t pmix_bfrops_base_unpack_envar(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_envar_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, + "pmix_bfrop_unpack: %d envars", *num_vals); + + ptr = (pmix_envar_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + PMIX_ENVAR_CONSTRUCT(&ptr[i]); + /* unpack the name */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &ptr[i].envar, &m, PMIX_STRING))) { + return ret; + } + /* unpack the value */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &ptr[i].value, &m, PMIX_STRING))) { + return ret; + } + /* unpack the separator */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, &ptr[i].separator, &m, PMIX_BYTE))) { + return ret; + } + } + return PMIX_SUCCESS; +} /**** DEPRECATED ****/ pmix_status_t pmix_bfrops_base_unpack_array(pmix_buffer_t *buffer, void *dest, diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h index 24b9349a34..0ce562426a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h @@ -78,7 +78,9 @@ typedef struct pmix_pnet_globals_t pmix_pnet_globals_t; PMIX_EXPORT extern pmix_pnet_globals_t pmix_pnet_globals; -PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_app(char *nspace, pmix_list_t *ilist); +PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_app(char *nspace, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist); PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, pmix_info_t info[], size_t ninfo); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c index 3572fdf8e8..6602da9a51 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c @@ -18,27 +18,58 @@ #include "src/class/pmix_list.h" #include "src/util/error.h" +#include "src/server/pmix_server_ops.h" #include "src/mca/pnet/base/base.h" -pmix_status_t pmix_pnet_base_setup_app(char *nspace, pmix_list_t *ilist) +/* NOTE: a tool (e.g., prun) may call this function to + * harvest local envars for inclusion in a call to + * PMIx_Spawn */ +pmix_status_t pmix_pnet_base_setup_app(char *nspace, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist) { pmix_pnet_base_active_module_t *active; pmix_status_t rc; + pmix_nspace_t *nptr, *ns; if (!pmix_pnet_globals.initialized) { return PMIX_ERR_INIT; } + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: setup_app called"); + /* protect against bozo inputs */ if (NULL == nspace || NULL == ilist) { return PMIX_ERR_BAD_PARAM; } + nptr = NULL; + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + /* find this nspace - note that it may not have + * been registered yet */ + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(ns->nspace, nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + /* add it */ + nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == nptr) { + return PMIX_ERR_NOMEM; + } + nptr->nspace = strdup(nspace); + pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + } + } + PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { if (NULL != active->module->setup_app) { - if (PMIX_SUCCESS != (rc = active->module->setup_app(nspace, ilist))) { + if (PMIX_SUCCESS != (rc = active->module->setup_app(nptr, info, ninfo, ilist))) { return rc; } } @@ -47,25 +78,48 @@ pmix_status_t pmix_pnet_base_setup_app(char *nspace, pmix_list_t *ilist) return PMIX_SUCCESS; } +/* can only be called by a server */ pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, pmix_info_t info[], size_t ninfo) { pmix_pnet_base_active_module_t *active; pmix_status_t rc; + pmix_nspace_t *nptr, *ns; if (!pmix_pnet_globals.initialized) { return PMIX_ERR_INIT; } + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: setup_local_network called"); + /* protect against bozo inputs */ if (NULL == nspace) { return PMIX_ERR_BAD_PARAM; } + /* find this proc's nspace object */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(ns->nspace, nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + /* add it */ + nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == nptr) { + return PMIX_ERR_NOMEM; + } + nptr->nspace = strdup(nspace); + pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + } + PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { if (NULL != active->module->setup_local_network) { - if (PMIX_SUCCESS != (rc = active->module->setup_local_network(nspace, info, ninfo))) { + if (PMIX_SUCCESS != (rc = active->module->setup_local_network(nptr, info, ninfo))) { return rc; } } @@ -74,23 +128,43 @@ pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, return PMIX_SUCCESS; } -pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *peer, char ***env) +/* can only be called by a server */ +pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *proc, char ***env) { pmix_pnet_base_active_module_t *active; pmix_status_t rc; + pmix_nspace_t *nptr, *ns; if (!pmix_pnet_globals.initialized) { return PMIX_ERR_INIT; } /* protect against bozo inputs */ - if (NULL == peer || NULL == env) { + if (NULL == proc || NULL == env) { return PMIX_ERR_BAD_PARAM; } + /* find this proc's nspace object */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(ns->nspace, proc->nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + /* add it */ + nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == nptr) { + return PMIX_ERR_NOMEM; + } + nptr->nspace = strdup(proc->nspace); + pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + } + PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { if (NULL != active->module->setup_fork) { - if (PMIX_SUCCESS != (rc = active->module->setup_fork(peer, env))) { + if (PMIX_SUCCESS != (rc = active->module->setup_fork(nptr, env))) { return rc; } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_frame.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_frame.c index 0dd5410dae..542746176c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_frame.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_frame.c @@ -45,7 +45,7 @@ /* Instantiate the global vars */ pmix_pnet_globals_t pmix_pnet_globals = {{{0}}}; -pmix_pnet_module_t pmix_pnet = { +pmix_pnet_API_module_t pmix_pnet = { .setup_app = pmix_pnet_base_setup_app, .setup_local_network = pmix_pnet_base_setup_local_network, .setup_fork = pmix_pnet_base_setup_fork, diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c index e3740b5a64..53ad2e5973 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * * $COPYRIGHT$ @@ -39,15 +39,18 @@ #include "src/util/pmix_environ.h" #include "src/mca/pnet/pnet.h" +#include "src/mca/pnet/base/base.h" #include "pnet_opa.h" static pmix_status_t opa_init(void); static void opa_finalize(void); -static pmix_status_t setup_app(char *nspace, pmix_list_t *ilist); -static pmix_status_t setup_local_network(char *nspace, +static pmix_status_t setup_app(pmix_nspace_t *nptr, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist); +static pmix_status_t setup_local_network(pmix_nspace_t *nptr, pmix_info_t info[], size_t ninfo); -static pmix_status_t setup_fork(const pmix_proc_t *peer, char ***env); +static pmix_status_t setup_fork(pmix_nspace_t *nptr, char ***env); static void child_finalized(pmix_peer_t *peer); static void local_app_finalized(char *nspace); @@ -63,14 +66,14 @@ pmix_pnet_module_t pmix_opa_module = { static pmix_status_t opa_init(void) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet: opa init"); return PMIX_SUCCESS; } static void opa_finalize(void) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet: opa finalize"); } @@ -157,75 +160,185 @@ static char* transports_print(uint64_t *unique_key) return string_key; } -static pmix_status_t setup_app(char *nspace, pmix_list_t *ilist) +/* NOTE: if there is any binary data to be transferred, then + * this function MUST pack it for transport as the host will + * not know how to do so */ +static pmix_status_t setup_app(pmix_nspace_t *nptr, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist) { uint64_t unique_key[2]; char *string_key, *cs_env; int fd_rand; - size_t bytes_read; - pmix_kval_t *kv; + size_t n, bytes_read, len; + pmix_kval_t *kv, *next; + int i, j; + bool envars, seckeys; - /* put the number here - or else create an appropriate string. this just needs to - * eventually be a string variable - */ - if(-1 == (fd_rand = open("/dev/urandom", O_RDONLY))) { - transports_use_rand(unique_key); + if (NULL == info) { + envars = true; + seckeys = true; } else { - bytes_read = read(fd_rand, (char *) unique_key, 16); - if(bytes_read != 16) { - transports_use_rand(unique_key); + envars = false; + seckeys = false; + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_SETUP_APP_ENVARS, PMIX_MAX_KEYLEN)) { + envars = PMIX_INFO_TRUE(&info[n]); + } else if (0 == strncmp(info[n].key, PMIX_SETUP_APP_ALL, PMIX_MAX_KEYLEN)) { + envars = PMIX_INFO_TRUE(&info[n]); + seckeys = PMIX_INFO_TRUE(&info[n]); + } else if (0 == strncmp(info[n].key, PMIX_SETUP_APP_NONENVARS, PMIX_MAX_KEYLEN)) { + seckeys = PMIX_INFO_TRUE(&info[n]); + } } - close(fd_rand); } - if (NULL == (string_key = transports_print(unique_key))) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - return PMIX_ERR_OUT_OF_RESOURCE; + if (seckeys) { + /* put the number here - or else create an appropriate string. this just needs to + * eventually be a string variable + */ + if(-1 == (fd_rand = open("/dev/urandom", O_RDONLY))) { + transports_use_rand(unique_key); + } else { + bytes_read = read(fd_rand, (char *) unique_key, 16); + if(bytes_read != 16) { + transports_use_rand(unique_key); + } + close(fd_rand); + } + + if (NULL == (string_key = transports_print(unique_key))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + return PMIX_ERR_OUT_OF_RESOURCE; + } + + if (PMIX_SUCCESS != pmix_mca_base_var_env_name("opa_precondition_transports", &cs_env)) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + free(string_key); + return PMIX_ERR_OUT_OF_RESOURCE; + } + + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + free(string_key); + free(cs_env); + return PMIX_ERR_OUT_OF_RESOURCE; + } + kv->key = strdup(PMIX_SET_ENVAR); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + free(string_key); + free(cs_env); + PMIX_RELEASE(kv); + return PMIX_ERR_OUT_OF_RESOURCE; + } + kv->value->type = PMIX_ENVAR; + PMIX_ENVAR_LOAD(&kv->value->data.envar, cs_env, string_key, ':'); + pmix_list_append(ilist, &kv->super); + free(cs_env); + free(string_key); } - if (PMIX_SUCCESS != pmix_mca_base_var_env_name("pmix_precondition_transports", &cs_env)) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - free(string_key); - return PMIX_ERR_OUT_OF_RESOURCE; + if (envars) { + /* harvest envars to pass along */ + if (NULL != mca_pnet_opa_component.include) { + for (j=0; NULL != mca_pnet_opa_component.include[j]; j++) { + len = strlen(mca_pnet_opa_component.include[j]); + if ('*' == mca_pnet_opa_component.include[j][len-1]) { + --len; + } + for (i = 0; NULL != environ[i]; ++i) { + if (0 == strncmp(environ[i], mca_pnet_opa_component.include[j], len)) { + cs_env = strdup(environ[i]); + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + kv->key = strdup(PMIX_SET_ENVAR); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_RELEASE(kv); + return PMIX_ERR_OUT_OF_RESOURCE; + } + kv->value->type = PMIX_ENVAR; + string_key = strchr(cs_env, '='); + *string_key = '\0'; + ++string_key; + PMIX_ENVAR_LOAD(&kv->value->data.envar, cs_env, string_key, ':'); + pmix_list_append(ilist, &kv->super); + free(cs_env); + } + } + } + } + /* now check the exclusions and remove any that match */ + if (NULL != mca_pnet_opa_component.exclude) { + for (j=0; NULL != mca_pnet_opa_component.exclude[j]; j++) { + len = strlen(mca_pnet_opa_component.exclude[j]); + if ('*' == mca_pnet_opa_component.exclude[j][len-1]) { + --len; + } + PMIX_LIST_FOREACH_SAFE(kv, next, ilist, pmix_kval_t) { + if (0 == strncmp(kv->value->data.envar.envar, mca_pnet_opa_component.exclude[j], len)) { + pmix_list_remove_item(ilist, &kv->super); + PMIX_RELEASE(kv); + } + } + } + } } - kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - free(string_key); - free(cs_env); - return PMIX_ERR_OUT_OF_RESOURCE; - } - kv->key = strdup(PMIX_SET_ENVAR); - kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - if (NULL == kv->value) { - free(string_key); - free(cs_env); - PMIX_RELEASE(kv); - return PMIX_ERR_OUT_OF_RESOURCE; - } - kv->value->type = PMIX_STRING; - if (0 > asprintf(&kv->value->data.string, "%s=%s", cs_env, string_key)) { - free(string_key); - free(cs_env); - PMIX_RELEASE(kv); - return PMIX_ERR_OUT_OF_RESOURCE; - } - pmix_list_append(ilist, &kv->super); - free(cs_env); - free(string_key); - return PMIX_SUCCESS; } -static pmix_status_t setup_local_network(char *nspace, +static pmix_status_t setup_local_network(pmix_nspace_t *nptr, pmix_info_t info[], size_t ninfo) { + size_t n; + pmix_status_t rc; + pmix_kval_t *kv; + + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_PNET_OPA_BLOB, PMIX_MAX_KEYLEN)) { + /* the byte object contains a packed blob that needs to be + * cached until we determine we have local procs for this + * nspace, and then delivered to the local OPA driver when + * we have a means for doing so */ + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + return PMIX_ERR_NOMEM; + } + kv->key = strdup(info[n].key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + pmix_value_xfer(kv->value, &info[n].value); + pmix_list_append(&nptr->setup_data, &kv->super); + } + } + } return PMIX_SUCCESS; } -static pmix_status_t setup_fork(const pmix_proc_t *peer, char ***env) +static pmix_status_t setup_fork(pmix_nspace_t *nptr, char ***env) { + pmix_kval_t *kv, *next; + + /* if there are any cached nspace prep blobs, execute them, + * ensuring that we only do so once per nspace - note that + * we don't expect to find any envars here, though we could + * have included some if we needed to set them per-client */ + PMIX_LIST_FOREACH_SAFE(kv, next, &nptr->setup_data, pmix_kval_t) { + if (0 == strcmp(kv->key, PMIX_PNET_OPA_BLOB)) { + pmix_list_remove_item(&nptr->setup_data, &kv->super); + /* deliver to the local lib */ + PMIX_RELEASE(kv); + } + } return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h index 278c894863..f43dab9c0f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -18,10 +18,19 @@ BEGIN_C_DECLS +typedef struct { + pmix_pnet_base_component_t super; + char **include; + char **exclude; +} pmix_pnet_opa_component_t; + /* the component must be visible data for the linker to find it */ -PMIX_EXPORT extern pmix_pnet_base_component_t mca_pnet_opa_component; +PMIX_EXPORT extern pmix_pnet_opa_component_t mca_pnet_opa_component; extern pmix_pnet_module_t pmix_opa_module; +/* define a key for any blob we need to send in a launch msg */ +#define PMIX_PNET_OPA_BLOB "pmix.pnet.opa.blob" + END_C_DECLS #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c index 7d07c400f6..9a726c3f4e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,48 +29,85 @@ #include #include "pmix_common.h" - +#include "src/util/argv.h" #include "src/mca/pnet/pnet.h" #include "pnet_opa.h" static pmix_status_t component_open(void); static pmix_status_t component_close(void); static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); +static pmix_status_t component_register(void); /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -pmix_pnet_base_component_t mca_pnet_opa_component = { - .base = { - PMIX_PNET_BASE_VERSION_1_0_0, +pmix_pnet_opa_component_t mca_pnet_opa_component = { + .super = { + .base = { + PMIX_PNET_BASE_VERSION_1_0_0, - /* Component name and version */ - .pmix_mca_component_name = "opa", - PMIX_MCA_BASE_MAKE_VERSION(component, - PMIX_MAJOR_VERSION, - PMIX_MINOR_VERSION, - PMIX_RELEASE_VERSION), + /* Component name and version */ + .pmix_mca_component_name = "opa", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), - /* Component open and close functions */ - .pmix_mca_open_component = component_open, - .pmix_mca_close_component = component_close, - .pmix_mca_query_component = component_query, + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_register_component_params = component_register, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + } }, - .data = { - /* The component is checkpoint ready */ - PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT - } + .include = NULL, + .exclude = NULL }; +static char *includeparam; +static char *excludeparam; -static int component_open(void) +static pmix_status_t component_register(void) +{ + pmix_mca_base_component_t *component = &mca_pnet_opa_component.super.base; + + includeparam = "HFI_*,PSM2_*"; + (void)pmix_mca_base_component_var_register(component, "include_envars", + "Comma-delimited list of envars to harvest (\'*\' and \'?\' supported)", + PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + PMIX_INFO_LVL_2, + PMIX_MCA_BASE_VAR_SCOPE_LOCAL, + &includeparam); + if (NULL != includeparam) { + mca_pnet_opa_component.include = pmix_argv_split(includeparam, ','); + } + + excludeparam = NULL; + (void)pmix_mca_base_component_var_register(component, "exclude_envars", + "Comma-delimited list of envars to exclude (\'*\' and \'?\' supported)", + PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + PMIX_INFO_LVL_2, + PMIX_MCA_BASE_VAR_SCOPE_LOCAL, + &excludeparam); + if (NULL != excludeparam) { + mca_pnet_opa_component.exclude = pmix_argv_split(excludeparam, ','); + } + + return PMIX_SUCCESS; +} + +static pmix_status_t component_open(void) { return PMIX_SUCCESS; } -static int component_query(pmix_mca_base_module_t **module, int *priority) +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority) { *priority = 10; *module = (pmix_mca_base_module_t *)&pmix_opa_module; @@ -78,7 +115,7 @@ static int component_query(pmix_mca_base_module_t **module, int *priority) } -static int component_close(void) +static pmix_status_t component_close(void) { return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h index dedb1eb63f..480b54c929 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h @@ -56,13 +56,15 @@ typedef void (*pmix_pnet_base_module_fini_fn_t)(void); * tokens required for application processes to communicate with * each other */ -typedef pmix_status_t (*pmix_pnet_base_module_setup_app_fn_t)(char *nspace, pmix_list_t *ilist); +typedef pmix_status_t (*pmix_pnet_base_module_setup_app_fn_t)(pmix_nspace_t *nptr, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist); /** * Give the local network library an opportunity to setup address information * for the application by passing in the layout type and a regex describing * the layout */ -typedef pmix_status_t (*pmix_pnet_base_module_setup_local_net_fn_t)(char *nspace, +typedef pmix_status_t (*pmix_pnet_base_module_setup_local_net_fn_t)(pmix_nspace_t *nptr, pmix_info_t info[], size_t ninfo); @@ -70,7 +72,7 @@ typedef pmix_status_t (*pmix_pnet_base_module_setup_local_net_fn_t)(char *nspace * Give the local network library an opportunity to add any envars to the * environment of a local application process prior to fork/exec */ -typedef pmix_status_t (*pmix_pnet_base_module_setup_fork_fn_t)(const pmix_proc_t *peer, char ***env); +typedef pmix_status_t (*pmix_pnet_base_module_setup_fork_fn_t)(pmix_nspace_t *nptr, char ***env); /** * Provide an opportunity for the local network library to cleanup when a @@ -99,8 +101,51 @@ typedef struct { pmix_pnet_base_module_local_app_finalized_fn_t local_app_finalized; } pmix_pnet_module_t; + +/* define a few API versions of the functions */ +/** + * Provide an opportunity for the network to define values that + * are to be passed to an application. This can include security + * tokens required for application processes to communicate with + * each other + */ +typedef pmix_status_t (*pmix_pnet_base_API_setup_app_fn_t)(char *nspace, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist); + +/** + * Give the local network library an opportunity to setup address information + * for the application by passing in the layout type and a regex describing + * the layout */ +typedef pmix_status_t (*pmix_pnet_base_API_setup_local_net_fn_t)(char *nspace, + pmix_info_t info[], + size_t ninfo); + +/** + * Give the local network library an opportunity to add any envars to the + * environment of a local application process prior to fork/exec + */ +typedef pmix_status_t (*pmix_pnet_base_API_setup_fork_fn_t)(const pmix_proc_t *peer, char ***env); + + +/** + * Base structure for a PNET API + */ +typedef struct { + char *name; + /* init/finalize */ + pmix_pnet_base_module_init_fn_t init; + pmix_pnet_base_module_fini_fn_t finalize; + pmix_pnet_base_API_setup_app_fn_t setup_app; + pmix_pnet_base_API_setup_local_net_fn_t setup_local_network; + pmix_pnet_base_API_setup_fork_fn_t setup_fork; + pmix_pnet_base_module_child_finalized_fn_t child_finalized; + pmix_pnet_base_module_local_app_finalized_fn_t local_app_finalized; +} pmix_pnet_API_module_t; + + /* declare the global APIs */ -PMIX_EXPORT extern pmix_pnet_module_t pmix_pnet; +PMIX_EXPORT extern pmix_pnet_API_module_t pmix_pnet; /* * the standard component data structure diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c index 1c6126664f..a1b2938a0b 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -100,6 +100,7 @@ static pmix_status_t create_cred(struct pmix_peer_t *peer, } } if (!takeus) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } } @@ -124,6 +125,7 @@ static pmix_status_t create_cred(struct pmix_peer_t *peer, goto complete; } else { /* unrecognized protocol */ + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h index 7243b0af0d..ffb15baaad 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h @@ -89,6 +89,7 @@ typedef uint32_t pmix_ptl_tag_t; * within the system */ #define PMIX_PTL_TAG_NOTIFY 0 #define PMIX_PTL_TAG_HEARTBEAT 1 +#define PMIX_PTL_TAG_IOF 2 /* define the start of dynamic tags that are * assigned for send/recv operations */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c index 13d3328a07..bf9ef63d9a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -115,6 +115,7 @@ void pmix_rte_finalize(void) PMIX_DESTRUCT(&pmix_globals.events); PMIX_LIST_DESTRUCT(&pmix_globals.cached_events); PMIX_DESTRUCT(&pmix_globals.notifications); + PMIX_LIST_DESTRUCT(&pmix_globals.iof_requests); /* now safe to release the event base */ if (!pmix_globals.external_evbase) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c index b66a300b5f..bddc958f3b 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -159,6 +159,8 @@ int pmix_rte_init(pmix_proc_type_t type, /* construct the global notification ring buffer */ PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); pmix_ring_buffer_init(&pmix_globals.notifications, 256); + /* and setup the iof request tracking list */ + PMIX_CONSTRUCT(&pmix_globals.iof_requests, pmix_list_t); /* Setup client verbosities as all procs are allowed to * access client APIs */ @@ -198,6 +200,12 @@ int pmix_rte_init(pmix_proc_type_t type, pmix_output_set_verbosity(pmix_client_globals.event_output, pmix_client_globals.event_verbose); } + if (0 < pmix_client_globals.iof_verbose) { + /* set default output */ + pmix_client_globals.iof_output = pmix_output_open(NULL); + pmix_output_set_verbosity(pmix_client_globals.iof_output, + pmix_client_globals.iof_verbose); + } /* get our effective id's */ pmix_globals.uid = geteuid(); diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c index 61b12fa65f..4524c216a9 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c @@ -21,7 +21,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -95,105 +95,152 @@ pmix_status_t pmix_register_params(void) } (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "event_caching_window", - "Time (in seconds) to aggregate events before reporting them - this " - "suppresses event cascades when processes abnormally terminate", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_event_caching_window); + "Time (in seconds) to aggregate events before reporting them - this " + "suppresses event cascades when processes abnormally terminate", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_event_caching_window); (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "suppress_missing_data_warning", - "Suppress warning that PMIx is missing job-level data that " - "is supposed to be provided by the host RM.", - PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_suppress_missing_data_warning); + "Suppress warning that PMIx is missing job-level data that " + "is supposed to be provided by the host RM.", + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_suppress_missing_data_warning); /**** CLIENT: VERBOSE OUTPUT PARAMS ****/ (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "get_verbose", - "Verbosity for client get operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_client_globals.get_verbose); + "Verbosity for client get operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.get_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "connect_verbose", - "Verbosity for client connect operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_client_globals.connect_verbose); + "Verbosity for client connect operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.connect_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "fence_verbose", - "Verbosity for client fence operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_client_globals.fence_verbose); + "Verbosity for client fence operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.fence_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "pub_verbose", - "Verbosity for client publish, lookup, and unpublish operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_client_globals.pub_verbose); + "Verbosity for client publish, lookup, and unpublish operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.pub_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "spawn_verbose", - "Verbosity for client spawn operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_client_globals.spawn_verbose); + "Verbosity for client spawn operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.spawn_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "event_verbose", - "Verbosity for eventt spawn operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_client_globals.event_verbose); + "Verbosity for client event notifications", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.event_verbose); + + (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "iof_verbose", + "Verbosity for client iof operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.iof_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "client", "base_verbose", - "Verbosity for basic client operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_client_globals.base_verbose); + "Verbosity for basic client operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_client_globals.base_verbose); /**** SERVER: VERBOSE OUTPUT PARAMS ****/ (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "get_verbose", - "Verbosity for server get operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_server_globals.get_verbose); + "Verbosity for server get operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.get_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "connect_verbose", - "Verbosity for server connect operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_server_globals.connect_verbose); + "Verbosity for server connect operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.connect_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "fence_verbose", - "Verbosity for server fence operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_server_globals.fence_verbose); + "Verbosity for server fence operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.fence_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "pub_verbose", - "Verbosity for server publish, lookup, and unpublish operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_server_globals.pub_verbose); + "Verbosity for server publish, lookup, and unpublish operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.pub_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "spawn_verbose", - "Verbosity for server spawn operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_server_globals.spawn_verbose); + "Verbosity for server spawn operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.spawn_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "event_verbose", - "Verbosity for server event operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_server_globals.event_verbose); + "Verbosity for server event operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.event_verbose); + + (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "iof_verbose", + "Verbosity for server iof operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.iof_verbose); (void) pmix_mca_base_var_register ("pmix", "pmix", "server", "base_verbose", - "Verbosity for basic server operations", - PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_server_globals.base_verbose); + "Verbosity for basic server operations", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.base_verbose); + /* check for maximum number of pending output messages */ + pmix_globals.output_limit = (size_t) INT_MAX; + (void) pmix_mca_base_var_register("pmix", "iof", NULL, "output_limit", + "Maximum backlog of output messages [default: unlimited]", + PMIX_MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + PMIX_INFO_LVL_9, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &pmix_globals.output_limit); + + pmix_globals.xml_output = false; + (void) pmix_mca_base_var_register ("pmix", "iof", NULL, "xml_output", + "Display all output in XML format (default: false)", + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &pmix_globals.xml_output); + + /* whether to tag output */ + /* if we requested xml output, be sure to tag the output as well */ + pmix_globals.tag_output = pmix_globals.xml_output; + (void) pmix_mca_base_var_register ("pmix", "iof", NULL, "tag_output", + "Tag all output with [job,rank] (default: false)", + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &pmix_globals.tag_output); + if (pmix_globals.xml_output) { + pmix_globals.tag_output = true; + } + + /* whether to timestamp output */ + pmix_globals.timestamp_output = false; + (void) pmix_mca_base_var_register ("pmix", "iof", NULL, "timestamp_output", + "Timestamp all application process output (default: false)", + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &pmix_globals.timestamp_output); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c index da6febf759..1541207e83 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c @@ -53,6 +53,7 @@ #include "src/util/argv.h" #include "src/util/error.h" +#include "src/util/name_fns.h" #include "src/util/output.h" #include "src/util/pmix_environ.h" #include "src/util/show_help.h" @@ -87,6 +88,14 @@ static void server_message_handler(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata); +static void iof_eviction_cbfunc(struct pmix_hotel_t *hotel, + int room_num, + void *occupant) +{ + pmix_setup_caddy_t *cache = (pmix_setup_caddy_t*)occupant; + PMIX_RELEASE(cache); +} + PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_info_t info[], size_t ninfo) { @@ -129,6 +138,15 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, PMIX_CONSTRUCT(&pmix_server_globals.events, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.nspaces, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.iof, pmix_hotel_t); + rc = pmix_hotel_init(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE, + pmix_globals.evbase, PMIX_IOF_MAX_STAY, + iof_eviction_cbfunc); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } pmix_output_verbose(2, pmix_server_globals.base_output, "pmix:server init called"); @@ -170,6 +188,12 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_output_set_verbosity(pmix_server_globals.event_output, pmix_server_globals.event_verbose); } + if (0 < pmix_server_globals.iof_verbose) { + /* set default output */ + pmix_server_globals.iof_output = pmix_output_open(NULL); + pmix_output_set_verbosity(pmix_server_globals.iof_output, + pmix_server_globals.iof_verbose); + } /* setup the base verbosity */ if (0 < pmix_server_globals.base_verbose) { /* set default output */ @@ -347,6 +371,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) int i; pmix_peer_t *peer; pmix_nspace_t *ns; + pmix_setup_caddy_t *cd; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -374,6 +399,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) pmix_ptl_base_stop_listening(); + /* cleanout any IOF */ + for (i=0; i < PMIX_IOF_HOTEL_SIZE; i++) { + pmix_hotel_checkout_and_return_occupant(&pmix_server_globals.iof, i, (void**)&cd); + if (NULL != cd) { + PMIX_RELEASE(cd); + } + } + PMIX_DESTRUCT(&pmix_server_globals.iof); for (i=0; i < pmix_server_globals.clients.size; i++) { if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) { /* ensure that we do the specified cleanup - if this is an @@ -1298,19 +1331,20 @@ static void _setup_op(pmix_status_t rc, void *cbdata) static void _setup_app(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; - pmix_buffer_t buffer; - pmix_byte_object_t blob; pmix_setup_caddy_t *fcd = NULL; pmix_status_t rc; pmix_list_t ilist; pmix_kval_t *kv; + size_t n; PMIX_ACQUIRE_OBJECT(cd); PMIX_CONSTRUCT(&ilist, pmix_list_t); /* pass to the network libraries */ - if (PMIX_SUCCESS != (rc = pmix_pnet.setup_app(cd->nspace, &ilist))) { + if (PMIX_SUCCESS != (rc = pmix_pnet.setup_app(cd->nspace, + cd->info, cd->ninfo, + &ilist))) { goto depart; } @@ -1322,29 +1356,20 @@ static void _setup_app(int sd, short args, void *cbdata) goto depart; } - /* if anything came back, construct the blob */ - if (0 < pmix_list_get_size(&ilist)) { - PMIX_CONSTRUCT(&buffer, pmix_buffer_t); - PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { - PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &buffer, kv, 1, PMIX_KVAL); - if (PMIX_SUCCESS != rc) { - PMIX_RELEASE(fcd); - fcd = NULL; - goto depart; - } - } - PMIX_INFO_CREATE(fcd->info, 1); + /* if anything came back, construct an info array */ + if (0 < (fcd->ninfo = pmix_list_get_size(&ilist))) { + PMIX_INFO_CREATE(fcd->info, fcd->ninfo); if (NULL == fcd->info) { + rc = PMIX_ERR_NOMEM; PMIX_RELEASE(fcd); - fcd = NULL; goto depart; } - fcd->ninfo = 1; - PMIX_BYTE_OBJECT_CONSTRUCT(&blob); - PMIX_BYTE_OBJECT_LOAD(&blob, buffer.base_ptr, buffer.bytes_used); - PMIX_DESTRUCT(&buffer); - PMIX_INFO_LOAD(&fcd->info[0], PMIX_PNET_SETUP_APP, &blob, PMIX_BYTE_OBJECT); - PMIX_BYTE_OBJECT_DESTRUCT(&blob); + n = 0; + PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { + (void)strncpy(fcd->info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&fcd->info[n].value, kv->value); + ++n; + } } depart: @@ -1446,6 +1471,148 @@ pmix_status_t PMIx_server_setup_local_support(const char nspace[], return PMIX_SUCCESS; } +static void _iofdeliver(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + pmix_iof_req_t *req; + pmix_status_t rc; + pmix_buffer_t *msg; + bool found = false; + bool cached = false; + int ignore; + + pmix_output_verbose(2, pmix_server_globals.iof_output, + "PMIX:SERVER delivering IOF"); + + /* cycle across our list of IOF requestors and see who wants + * this channel from this source */ + PMIX_LIST_FOREACH(req, &pmix_globals.iof_requests, pmix_iof_req_t) { + /* if the channel wasn't included, then ignore it */ + if (!(cd->channels & req->channels)) { + continue; + } + /* if the source matches the request, then forward this along */ + if (0 != strncmp(cd->procs->nspace, req->pname.nspace, PMIX_MAX_NSLEN) || + (PMIX_RANK_WILDCARD != req->pname.rank && cd->procs->rank != req->pname.rank)) { + continue; + } + found = true; + /* setup the msg */ + if (NULL == (msg = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + rc = PMIX_ERR_OUT_OF_RESOURCE; + break; + } + /* provide the source */ + PMIX_BFROPS_PACK(rc, req->peer, msg, cd->procs, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* provide the channel */ + PMIX_BFROPS_PACK(rc, req->peer, msg, &cd->channels, 1, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* pack the data */ + PMIX_BFROPS_PACK(rc, req->peer, msg, cd->bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* send it to the requestor */ + PMIX_PTL_SEND_ONEWAY(rc, req->peer, msg, PMIX_PTL_TAG_IOF); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + } + } + + /* if nobody has registered for this yet, then cache it */ + if (!found) { + /* add this output to our hotel so it is cached until someone + * registers to receive it */ + if (PMIX_SUCCESS != (rc = pmix_hotel_checkin(&pmix_server_globals.iof, cd, &ignore))) { + /* we can't cache it for some reason */ + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(cd); + return; + } + cached = true; + } + + + if (NULL != cd->opcbfunc) { + cd->opcbfunc(rc, cd->cbdata); + } + if (!cached) { + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + PMIX_PROC_FREE(cd->procs, 1); + PMIX_BYTE_OBJECT_FREE(cd->bo, 1); + PMIX_RELEASE(cd); + } +} + +pmix_status_t PMIx_server_IOF_deliver(const pmix_proc_t *source, + pmix_iof_channel_t channel, + const pmix_byte_object_t *bo, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_setup_caddy_t *cd; + size_t n; + + /* need to threadshift this request */ + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + /* unfortunately, we need to copy the input because we + * might have to cache it for later delivery */ + PMIX_PROC_CREATE(cd->procs, 1); + if (NULL == cd->procs) { + PMIX_RELEASE(cd); + return PMIX_ERR_NOMEM; + } + (void)strncpy(cd->procs[0].nspace, source->nspace, PMIX_MAX_NSLEN); + cd->procs[0].rank = source->rank; + cd->channels = channel; + PMIX_BYTE_OBJECT_CREATE(cd->bo, 1); + if (NULL == cd->bo) { + PMIX_RELEASE(cd); + return PMIX_ERR_NOMEM; + } + cd->bo[0].bytes = (char*)malloc(bo->size); + if (NULL == cd->bo[0].bytes) { + PMIX_BYTE_OBJECT_FREE(cd->bo, 1); + PMIX_RELEASE(cd); + return PMIX_ERR_NOMEM; + } + memcpy(cd->bo[0].bytes, bo->bytes, bo->size); + cd->bo[0].size = bo->size; + if (0 < ninfo) { + PMIX_INFO_CREATE(cd->info, ninfo); + if (NULL == cd->info) { + PMIX_BYTE_OBJECT_FREE(cd->bo, 1); + PMIX_RELEASE(cd); + return PMIX_ERR_NOMEM; + } + cd->ninfo = ninfo; + for (n=0; n < ninfo; n++) { + PMIX_INFO_XFER(&cd->info[n], (pmix_info_t*)&info[n]); + } + } + cd->opcbfunc = cbfunc; + cd->cbdata = cbdata; + PMIX_THREADSHIFT(cd, _iofdeliver); + return PMIX_SUCCESS; +} /**** THE FOLLOWING CALLBACK FUNCTIONS ARE USED BY THE HOST SERVER **** **** THEY THEREFORE CAN OCCUR IN EITHER THE HOST SERVER'S THREAD **** @@ -2357,6 +2524,69 @@ static void validate_cbfunc(pmix_status_t status, } +static void _iofreg(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + pmix_server_caddy_t *scd = (pmix_server_caddy_t*)cd->cbdata; + pmix_buffer_t *reply; + pmix_status_t rc; + + PMIX_ACQUIRE_OBJECT(cd); + + /* setup the reply to the requestor */ + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + /* start with the status */ + PMIX_BFROPS_PACK(rc, scd->peer, reply, &cd->status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); + goto cleanup; + } + + /* was the request a success? */ + if (PMIX_SUCCESS != cd->status) { + /* find and remove the tracker(s) */ + } + + pmix_output_verbose(2, pmix_server_globals.iof_output, + "server:_iofreg reply being sent to %s:%u", + scd->peer->info->pname.nspace, scd->peer->info->pname.rank); + PMIX_SERVER_QUEUE_REPLY(scd->peer, scd->hdr.tag, reply); + + cleanup: + /* release the cached info */ + if (NULL != cd->procs) { + PMIX_PROC_FREE(cd->procs, cd->nprocs); + } + PMIX_INFO_FREE(cd->info, cd->ninfo); + /* we are done */ + PMIX_RELEASE(cd); +} + +static void iof_cbfunc(pmix_status_t status, + void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + + pmix_output_verbose(2, pmix_server_globals.iof_output, + "server:iof_cbfunc called with status %d", + status); + + if (NULL == cd) { + /* nothing to do */ + return; + } + cd->status = status; + + /* need to thread-shift this callback as it accesses global data */ + PMIX_THREADSHIFT(cd, _iofreg); +} + /* the switchyard is the primary message handling function. It's purpose * is to take incoming commands (packed into a buffer), unpack them, * and then call the corresponding host server's function to execute @@ -2608,6 +2838,18 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, return rc; } + if (PMIX_IOF_PULL_CMD == cmd) { + PMIX_GDS_CADDY(cd, peer, tag); + rc = pmix_server_iofreg(peer, buf, iof_cbfunc, cd); + return rc; + } + + if (PMIX_IOF_PUSH_CMD == cmd) { + PMIX_GDS_CADDY(cd, peer, tag); + rc = pmix_server_iofstdin(peer, buf, op_cbfunc, cd); + return rc; + } + return PMIX_ERR_NOT_SUPPORTED; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c index 5d8a22be5f..130d85f66f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c @@ -48,6 +48,7 @@ #endif #include PMIX_EVENT_HEADER +#include "src/class/pmix_hotel.h" #include "src/class/pmix_list.h" #include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" @@ -1025,7 +1026,76 @@ static void spcbfunc(pmix_status_t status, char nspace[], void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + pmix_iof_req_t *req; + pmix_setup_caddy_t *occupant; + int i; + pmix_buffer_t *msg; + pmix_status_t rc; + /* if it was successful, and there are IOF requests, then + * register them now */ + if (PMIX_SUCCESS == status && PMIX_FWD_NO_CHANNELS != cd->channels) { + /* record the request */ + req = PMIX_NEW(pmix_iof_req_t); + if (NULL != req) { + PMIX_RETAIN(cd->peer); + req->peer = cd->peer; + req->pname.nspace = strdup(nspace); + req->pname.rank = PMIX_RANK_WILDCARD; + req->channels = cd->channels; + pmix_list_append(&pmix_globals.iof_requests, &req->super); + } + /* process any cached IO */ + for (i=0; i < PMIX_IOF_HOTEL_SIZE; i++) { + pmix_hotel_knock(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1, (void**)&occupant); + if (NULL != occupant) { + if (!(occupant->channels & req->channels)) { + continue; + } + /* if the source matches the request, then forward this along */ + if (0 != strncmp(occupant->procs->nspace, req->pname.nspace, PMIX_MAX_NSLEN) || + (PMIX_RANK_WILDCARD != req->pname.rank && occupant->procs->rank != req->pname.rank)) { + continue; + } + /* setup the msg */ + if (NULL == (msg = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + rc = PMIX_ERR_OUT_OF_RESOURCE; + break; + } + /* provide the source */ + PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->procs, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* provide the channel */ + PMIX_BFROPS_PACK(rc, req->peer, msg, &occupant->channels, 1, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* pack the data */ + PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* send it to the requestor */ + PMIX_PTL_SEND_ONEWAY(rc, req->peer, msg, PMIX_PTL_TAG_IOF); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + } + /* remove it from the hotel since it has now been forwarded */ + pmix_hotel_checkout(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1); + PMIX_RELEASE(occupant); + } + } + } /* cleanup the caddy */ if (NULL != cd->info) { PMIX_INFO_FREE(cd->info, cd->ninfo); @@ -1048,7 +1118,8 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, int32_t cnt; pmix_status_t rc; pmix_proc_t proc; - size_t ninfo; + size_t ninfo, n; + bool stdout_found = false, stderr_found = false, stddiag_found = false; pmix_output_verbose(2, pmix_server_globals.spawn_output, "recvd SPAWN"); @@ -1063,6 +1134,8 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, if (NULL == cd) { return PMIX_ERR_NOMEM; } + PMIX_RETAIN(peer); + cd->peer = peer; cd->spcbfunc = cbfunc; cd->cbdata = cbdata; @@ -1091,10 +1164,48 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, PMIX_ERROR_LOG(rc); goto cleanup; } + /* run a quick check of the directives to see if any IOF + * requests were included so we can set that up now - helps + * to catch any early output */ + cd->channels = PMIX_FWD_NO_CHANNELS; + for (n=0; n < cd->ninfo; n++) { + if (0 == strncmp(cd->info[n].key, PMIX_FWD_STDIN, PMIX_MAX_KEYLEN)) { + stdout_found = true; + if (PMIX_INFO_TRUE(&cd->info[n])) { + cd->channels |= PMIX_FWD_STDIN_CHANNEL; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_FWD_STDOUT, PMIX_MAX_KEYLEN)) { + if (PMIX_INFO_TRUE(&cd->info[n])) { + cd->channels |= PMIX_FWD_STDOUT_CHANNEL; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_FWD_STDERR, PMIX_MAX_KEYLEN)) { + stderr_found = true; + if (PMIX_INFO_TRUE(&cd->info[n])) { + cd->channels |= PMIX_FWD_STDERR_CHANNEL; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_FWD_STDDIAG, PMIX_MAX_KEYLEN)) { + stddiag_found = true; + if (PMIX_INFO_TRUE(&cd->info[n])) { + cd->channels |= PMIX_FWD_STDDIAG_CHANNEL; + } + } + } + /* we will construct any required iof request tracker upon completion of the spawn */ } /* add the directive to the end */ if (PMIX_PROC_IS_TOOL(peer)) { PMIX_INFO_LOAD(&cd->info[ninfo], PMIX_REQUESTOR_IS_TOOL, NULL, PMIX_BOOL); + /* if the requestor is a tool, we default to forwarding all + * output IO channels */ + if (!stdout_found) { + cd->channels |= PMIX_FWD_STDOUT_CHANNEL; + } + if (!stderr_found) { + cd->channels |= PMIX_FWD_STDERR_CHANNEL; + } + if (!stddiag_found) { + cd->channels |= PMIX_FWD_STDDIAG_CHANNEL; + } } else { PMIX_INFO_LOAD(&cd->info[ninfo], PMIX_REQUESTOR_IS_CLIENT, NULL, PMIX_BOOL); } @@ -2542,6 +2653,296 @@ pmix_status_t pmix_server_validate_credential(pmix_peer_t *peer, return rc; } +pmix_status_t pmix_server_iofreg(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + pmix_setup_caddy_t *cd; + pmix_iof_req_t *req; + bool notify, match; + size_t n; + int i; + pmix_setup_caddy_t *occupant; + pmix_buffer_t *msg; + + pmix_output_verbose(2, pmix_server_globals.iof_output, + "recvd IOF PULL request from client"); + + if (NULL == pmix_host_server.iof_pull) { + return PMIX_ERR_NOT_SUPPORTED; + } + + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->cbdata = cbdata; // this is the pmix_server_caddy_t + + /* unpack the number of procs */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->nprocs, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } + /* unpack the procs */ + if (0 < cd->nprocs) { + PMIX_PROC_CREATE(cd->procs, cd->nprocs); + cnt = cd->nprocs; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->procs, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + + /* unpack the number of directives */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } + /* unpack the directives */ + if (0 < cd->ninfo) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + cnt = cd->ninfo; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + + /* unpack the channels */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->channels, &cnt, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } + + /* check to see if we have already registered this source/channel combination */ + notify = false; + for (n=0; n < cd->nprocs; n++) { + match = false; + PMIX_LIST_FOREACH(req, &pmix_globals.iof_requests, pmix_iof_req_t) { + /* is this request from the same peer? */ + if (peer != req->peer) { + continue; + } + /* do we already have this source for this peer? */ + if (0 == strncmp(cd->procs[n].nspace, req->pname.nspace, PMIX_MAX_NSLEN) && + (PMIX_RANK_WILDCARD == req->pname.rank || cd->procs[n].rank == req->pname.rank)) { + match = true; + if ((req->channels & cd->channels) != cd->channels) { + /* this is a channel update */ + req->channels |= cd->channels; + /* we need to notify the host */ + notify = true; + } + break; + } + } + /* if we didn't find the matching entry, then add it */ + if (!match) { + /* record the request */ + req = PMIX_NEW(pmix_iof_req_t); + if (NULL == req) { + rc = PMIX_ERR_NOMEM; + goto exit; + } + PMIX_RETAIN(peer); + req->peer = peer; + req->pname.nspace = strdup(cd->procs[n].nspace); + req->pname.rank = cd->procs[n].rank; + req->channels = cd->channels; + pmix_list_append(&pmix_globals.iof_requests, &req->super); + } + /* process any cached IO */ + for (i=0; i < PMIX_IOF_HOTEL_SIZE; i++) { + pmix_hotel_knock(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1, (void**)&occupant); + if (NULL != occupant) { + if (!(occupant->channels & req->channels)) { + continue; + } + /* if the source matches the request, then forward this along */ + if (0 != strncmp(occupant->procs->nspace, req->pname.nspace, PMIX_MAX_NSLEN) || + (PMIX_RANK_WILDCARD != req->pname.rank && occupant->procs->rank != req->pname.rank)) { + continue; + } + /* setup the msg */ + if (NULL == (msg = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + rc = PMIX_ERR_OUT_OF_RESOURCE; + break; + } + /* provide the source */ + PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->procs, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* provide the channel */ + PMIX_BFROPS_PACK(rc, req->peer, msg, &occupant->channels, 1, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* pack the data */ + PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* send it to the requestor */ + PMIX_PTL_SEND_ONEWAY(rc, req->peer, msg, PMIX_PTL_TAG_IOF); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + } + /* remove it from the hotel since it has now been forwarded */ + pmix_hotel_checkout(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1); + PMIX_RELEASE(occupant); + } + } + } + if (notify) { + /* ask the host to execute the request */ + if (PMIX_SUCCESS != (rc = pmix_host_server.iof_pull(cd->procs, cd->nprocs, + cd->info, cd->ninfo, + cd->channels, + cbfunc, cd))) { + goto exit; + } + } + return PMIX_SUCCESS; + + exit: + PMIX_RELEASE(cd); + return rc; +} + +static void stdcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + + if (NULL != cd->opcbfunc) { + cd->opcbfunc(status, cd->cbdata); + } + if (NULL != cd->procs) { + PMIX_PROC_FREE(cd->procs, cd->nprocs); + } + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + if (NULL != cd->bo) { + PMIX_BYTE_OBJECT_FREE(cd->bo, 1); + } + PMIX_RELEASE(cd); +} + +pmix_status_t pmix_server_iofstdin(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + pmix_proc_t source; + pmix_setup_caddy_t *cd; + + pmix_output_verbose(2, pmix_server_globals.iof_output, + "recvd stdin IOF data from tool"); + + if (NULL == pmix_host_server.push_stdin) { + return PMIX_ERR_NOT_SUPPORTED; + } + + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->opcbfunc = cbfunc; + cd->cbdata = cbdata; + + /* unpack the number of targets */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->nprocs, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto error; + } + if (0 < cd->nprocs) { + PMIX_PROC_CREATE(cd->procs, cd->nprocs); + if (NULL == cd->procs) { + rc = PMIX_ERR_NOMEM; + goto error; + } + cnt = cd->nprocs; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->procs, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto error; + } + } + + /* unpack the number of directives */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto error; + } + if (0 < cd->ninfo) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + if (NULL == cd->info) { + rc = PMIX_ERR_NOMEM; + goto error; + } + cnt = cd->ninfo; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto error; + } + } + + /* unpack the data */ + PMIX_BYTE_OBJECT_CREATE(cd->bo, 1); + if (NULL == cd->bo) { + rc = PMIX_ERR_NOMEM; + goto error; + } + + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->bo, &cnt, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto error; + } + + /* pass the data to the host */ + (void)strncpy(source.nspace, peer->nptr->nspace, PMIX_MAX_NSLEN); + source.rank = peer->info->pname.rank; + if (PMIX_SUCCESS != (rc = pmix_host_server.push_stdin(&source, cd->procs, cd->nprocs, + cd->info, cd->ninfo, cd->bo, + stdcbfunc, cd))) { + goto error; + } + return PMIX_SUCCESS; + + error: + PMIX_RELEASE(cd); + return rc; +} + /***** INSTANCE SERVER LIBRARY CLASSES *****/ static void tcon(pmix_server_trkr_t *t) { @@ -2603,6 +3004,7 @@ PMIX_CLASS_INSTANCE(pmix_server_caddy_t, static void scadcon(pmix_setup_caddy_t *p) { + p->peer = NULL; memset(&p->proc, 0, sizeof(pmix_proc_t)); PMIX_CONSTRUCT_LOCK(&p->lock); p->nspace = NULL; @@ -2615,6 +3017,8 @@ static void scadcon(pmix_setup_caddy_t *p) p->info = NULL; p->ninfo = 0; p->keys = NULL; + p->channels = PMIX_FWD_NO_CHANNELS; + p->bo = NULL; p->cbfunc = NULL; p->opcbfunc = NULL; p->setupcbfunc = NULL; @@ -2624,6 +3028,9 @@ static void scadcon(pmix_setup_caddy_t *p) } static void scaddes(pmix_setup_caddy_t *p) { + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } PMIX_DESTRUCT_LOCK(&p->lock); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h index aa4c4d6b74..3913553053 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h @@ -18,12 +18,15 @@ #include "src/include/types.h" #include -#include +#include #include #include "src/threads/threads.h" #include "src/include/pmix_globals.h" #include "src/util/hash.h" +#define PMIX_IOF_HOTEL_SIZE 256 +#define PMIX_IOF_MAX_STAY 300000000 + typedef struct { pmix_object_t super; pmix_event_t ev; @@ -35,6 +38,7 @@ typedef struct { pmix_object_t super; pmix_event_t ev; pmix_lock_t lock; + pmix_peer_t *peer; char *nspace; pmix_status_t status; pmix_status_t *codes; @@ -51,6 +55,8 @@ typedef struct { char **keys; pmix_app_t *apps; size_t napps; + pmix_iof_channel_t channels; + pmix_byte_object_t *bo; pmix_op_cbfunc_t opcbfunc; pmix_dmodex_response_fn_t cbfunc; pmix_setup_application_cbfunc_t setupcbfunc; @@ -96,7 +102,7 @@ PMIX_CLASS_DECLARATION(pmix_peer_events_info_t); typedef struct { pmix_list_item_t super; - pmix_list_t peers; // list of pmix_prevents_info_t + pmix_list_t peers; // list of pmix_peer_events_info_t int code; } pmix_regevents_info_t; PMIX_CLASS_DECLARATION(pmix_regevents_info_t); @@ -109,6 +115,7 @@ typedef struct { pmix_list_t local_reqs; // list of pmix_dmdx_local_t awaiting arrival of data from local neighbours pmix_list_t gdata; // cache of data given to me for passing to all clients pmix_list_t events; // list of pmix_regevents_info_t registered events + pmix_hotel_t iof; // IO to be forwarded to clients bool tool_connections_allowed; // verbosity for server get operations int get_output; @@ -128,6 +135,9 @@ typedef struct { // verbosity for server event operations int event_output; int event_verbose; + // verbosity for server iof operations + int iof_output; + int iof_verbose; // verbosity for basic server functions int base_output; int base_verbose; @@ -256,6 +266,16 @@ pmix_status_t pmix_server_validate_credential(pmix_peer_t *peer, pmix_validation_cbfunc_t cbfunc, void *cbdata); +pmix_status_t pmix_server_iofreg(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_op_cbfunc_t cbfunc, + void *cbdata); + +pmix_status_t pmix_server_iofstdin(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_op_cbfunc_t cbfunc, + void *cbdata); + pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, diff --git a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c index 0221869fc3..8ac864ab62 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c @@ -58,8 +58,6 @@ #ident PMIX_VERSION #endif -extern pmix_client_globals_t pmix_client_globals; - #include "src/class/pmix_list.h" #include "src/util/argv.h" #include "src/util/error.h" @@ -69,12 +67,17 @@ extern pmix_client_globals_t pmix_client_globals; #include "src/runtime/pmix_rte.h" #include "src/mca/bfrops/base/base.h" #include "src/mca/gds/base/base.h" -#include "src/mca/ptl/ptl.h" +#include "src/mca/ptl/base/base.h" #include "src/mca/psec/psec.h" #include "src/include/pmix_globals.h" +#include "src/common/pmix_iof.h" #define PMIX_MAX_RETRIES 10 +extern pmix_client_globals_t pmix_client_globals; +static pmix_event_t stdinsig; +static pmix_iof_read_event_t stdinev; + static void _notify_complete(pmix_status_t status, void *cbdata) { pmix_event_chain_t *chain = (pmix_event_chain_t*)cbdata; @@ -91,7 +94,7 @@ static void pmix_tool_notify_recv(struct pmix_peer_t *peer, pmix_event_chain_t *chain; size_t ninfo; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.event_output, "pmix:tool_notify_recv - processing event"); /* a zero-byte buffer indicates that this recv is being @@ -172,7 +175,7 @@ static void pmix_tool_notify_recv(struct pmix_peer_t *peer, /* now put the callback object tag in the last element */ PMIX_INFO_LOAD(&chain->info[ninfo], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.event_output, "[%s:%d] pmix:tool_notify_recv - processing event %d, calling errhandler", pmix_globals.myid.nspace, pmix_globals.myid.rank, chain->status); @@ -181,7 +184,7 @@ static void pmix_tool_notify_recv(struct pmix_peer_t *peer, error: /* we always need to return */ - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.event_output, "pmix:tool_notify_recv - unpack error status =%d, calling def errhandler", rc); chain = PMIX_NEW(pmix_event_chain_t); chain->status = rc; @@ -189,6 +192,53 @@ static void pmix_tool_notify_recv(struct pmix_peer_t *peer, } +static void tool_iof_handler(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_peer_t *peer = (pmix_peer_t*)pr; + pmix_proc_t source; + pmix_iof_channel_t channel; + pmix_byte_object_t bo; + int32_t cnt; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_client_globals.iof_output, + "recvd IOF"); + + /* if the buffer is empty, they are simply closing the channel */ + if (0 == buf->bytes_used) { + return; + } + + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &source, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return; + } + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &channel, &cnt, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return; + } + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &bo, &cnt, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return; + } + if (NULL != bo.bytes && 0 < bo.size) { + if (channel & PMIX_FWD_STDOUT_CHANNEL) { + pmix_iof_write_output(&source, channel, &bo, &pmix_client_globals.iof_stdout.wev); + } else { + pmix_iof_write_output(&source, channel, &bo, &pmix_client_globals.iof_stderr.wev); + } + } + PMIX_BYTE_OBJECT_DESTRUCT(&bo); +} + PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_info_t info[], size_t ninfo) { @@ -198,9 +248,12 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, bool found, do_not_connect = false; bool nspace_given = false; bool rank_given = false; + bool fwd_stdin = false; pmix_info_t ginfo; size_t n; + pmix_ptl_posted_recv_t *rcv; pmix_proc_t wildcard; + int fd; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -238,6 +291,13 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* setup the IO Forwarding recv */ + rcv = PMIX_NEW(pmix_ptl_posted_recv_t); + rcv->tag = PMIX_PTL_TAG_IOF; + rcv->cbfunc = tool_iof_handler; + /* add it to the end of the list of recvs */ + pmix_list_append(&pmix_ptl_globals.posted_recvs, &rcv->super); + PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); PMIX_CONSTRUCT(&pmix_client_globals.peers, pmix_pointer_array_t); @@ -296,6 +356,9 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, } else if (0 == strncmp(info[n].key, PMIX_TOOL_RANK, PMIX_MAX_KEYLEN)) { pmix_globals.myid.rank = info[n].value.data.rank; rank_given = true; + } else if (0 == strncmp(info[n].key, PMIX_FWD_STDIN, PMIX_MAX_KEYLEN)) { + /* they want us to forward our stdin to someone */ + fwd_stdin = true; } } } @@ -342,6 +405,81 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_globals.mypeer->info->pname.nspace = strdup(proc->nspace); pmix_globals.mypeer->info->pname.rank = proc->rank; + /* setup IOF */ + PMIX_IOF_SINK_DEFINE(&pmix_client_globals.iof_stdout, &pmix_globals.myid, + 1, PMIX_FWD_STDOUT_CHANNEL, pmix_iof_write_handler); + PMIX_IOF_SINK_DEFINE(&pmix_client_globals.iof_stderr, &pmix_globals.myid, + 2, PMIX_FWD_STDERR_CHANNEL, pmix_iof_write_handler); + if (fwd_stdin) { + /* setup the read - we don't want to set nonblocking on our + * stdio stream. If we do so, we set the file descriptor to + * non-blocking for everyone that has that file descriptor, which + * includes everyone else in our shell pipeline chain. (See + * http://lists.freebsd.org/pipermail/freebsd-hackers/2005-January/009742.html). + * This causes things like "prun -np 1 big_app | cat" to lose + * output, because cat's stdout is then ALSO non-blocking and cat + * isn't built to deal with that case (same with almost all other + * unix text utils).*/ + fd = fileno(stdin); + if (isatty(fd)) { + /* We should avoid trying to read from stdin if we + * have a terminal, but are backgrounded. Catch the + * signals that are commonly used when we switch + * between being backgrounded and not. If the + * filedescriptor is not a tty, don't worry about it + * and always stay connected. + */ + pmix_event_signal_set(pmix_globals.evbase, &stdinsig, + SIGCONT, pmix_iof_stdin_cb, + &stdinev); + + /* setup a read event to read stdin, but don't activate it yet. The + * dst_name indicates who should receive the stdin. If that recipient + * doesn't do a corresponding pull, however, then the stdin will + * be dropped upon receipt at the local daemon + */ + PMIX_CONSTRUCT(&stdinev, pmix_iof_read_event_t); + stdinev.fd = fd; + stdinev.always_readable = pmix_iof_fd_always_ready(fd); + if (stdinev.always_readable) { + pmix_event_evtimer_set(pmix_globals.evbase, + &stdinev.ev, + pmix_iof_read_local_handler, + &stdinev); + } else { + pmix_event_set(pmix_globals.evbase, + &stdinev.ev, fd, + PMIX_EV_READ, + pmix_iof_read_local_handler, &stdinev); + } \ + /* check to see if we want the stdin read event to be + * active - we will always at least define the event, + * but may delay its activation + */ + if (pmix_iof_stdin_check(fd)) { + PMIX_IOF_READ_ACTIVATE(&stdinev); + } + } else { + /* if we are not looking at a tty, just setup a read event + * and activate it + */ + PMIX_CONSTRUCT(&stdinev, pmix_iof_read_event_t); + stdinev.fd = fd; + stdinev.always_readable = pmix_iof_fd_always_ready(fd); + if (stdinev.always_readable) { + pmix_event_evtimer_set(pmix_globals.evbase, + &stdinev.ev, + pmix_iof_read_local_handler, + &stdinev); + } else { + pmix_event_set(pmix_globals.evbase, + &stdinev.ev, fd, + PMIX_EV_READ, + pmix_iof_read_local_handler, &stdinev); + } \ + PMIX_IOF_READ_ACTIVATE(&stdinev); + } + } /* increment our init reference counter */ pmix_globals.init_cntr++; @@ -721,6 +859,12 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool finalize called"); + /* flush anything that is still trying to be written out */ + pmix_iof_static_dump_output(&pmix_client_globals.iof_stdout); + pmix_iof_static_dump_output(&pmix_client_globals.iof_stderr); + PMIX_DESTRUCT(&pmix_client_globals.iof_stdout); + PMIX_DESTRUCT(&pmix_client_globals.iof_stderr); + /* setup a cmd message to notify the PMIx * server that we are normally terminating */ msg = PMIX_NEW(pmix_buffer_t); diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/fd.c b/opal/mca/pmix/pmix3x/pmix/src/util/fd.c index 616c6fe97c..c4f033f9da 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/fd.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/fd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -20,6 +20,12 @@ #endif #include #include +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif #include "src/util/error.h" #include "src/util/fd.h" @@ -93,3 +99,31 @@ pmix_status_t pmix_fd_set_cloexec(int fd) return PMIX_SUCCESS; } + + +bool pmix_fd_is_regular(int fd) +{ + struct stat buf; + if (fstat(fd, &buf)) { + return false; + } + return S_ISREG(buf.st_mode); +} + +bool pmix_fd_is_chardev(int fd) +{ + struct stat buf; + if (fstat(fd, &buf)) { + return false; + } + return S_ISCHR(buf.st_mode); +} + +bool pmix_fd_is_blkdev(int fd) +{ + struct stat buf; + if (fstat(fd, &buf)) { + return false; + } + return S_ISBLK(buf.st_mode); +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/fd.h b/opal/mca/pmix/pmix3x/pmix/src/util/fd.h index d67fe24835..58c7edfe01 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/fd.h +++ b/opal/mca/pmix/pmix3x/pmix/src/util/fd.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -67,6 +67,37 @@ PMIX_EXPORT pmix_status_t pmix_fd_write(int fd, int len, const void *buffer); */ PMIX_EXPORT pmix_status_t pmix_fd_set_cloexec(int fd); +/** + * Convenience function to check if fd point to an accessible regular file. + * + * @param fd File descriptor + * + * @returns true if "fd" points to a regular file. + * @returns false otherwise. + */ +PMIX_EXPORT bool pmix_fd_is_regular(int fd); + +/** + * Convenience function to check if fd point to an accessible character device. + * + * @param fd File descriptor + * + * @returns true if "fd" points to a regular file. + * @returns false otherwise. + */ +PMIX_EXPORT bool pmix_fd_is_chardev(int fd); + +/** + * Convenience function to check if fd point to an accessible block device. + * + * @param fd File descriptor + * + * @returns true if "fd" points to a regular file. + * @returns false otherwise. + */ +PMIX_EXPORT bool pmix_fd_is_blkdev(int fd); + + END_C_DECLS #endif diff --git a/opal/mca/pmix/pmix3x/pmix3x.c b/opal/mca/pmix/pmix3x/pmix3x.c index 16cb017f1c..1f3b5fbff9 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.c +++ b/opal/mca/pmix/pmix3x/pmix3x.c @@ -120,7 +120,9 @@ const opal_pmix_base_module_t opal_pmix_pmix3x_module = { .server_setup_fork = pmix3x_server_setup_fork, .server_dmodex_request = pmix3x_server_dmodex, .server_notify_event = pmix3x_server_notify_event, - .server_iof_push = NULL, //pmix3x_server_iof_push, + .server_iof_push = pmix3x_server_iof_push, + .server_setup_application = pmix3x_server_setup_application, + .server_setup_local_support = pmix3x_server_setup_local_support, /* tool APIs */ .tool_init = pmix3x_tool_init, .tool_finalize = pmix3x_tool_fini, @@ -514,6 +516,9 @@ pmix_status_t pmix3x_convert_opalrc(int rc) case OPAL_ERR_MODEL_DECLARED: return PMIX_MODEL_DECLARED; + case OPAL_PMIX_LAUNCH_DIRECTIVE: + return PMIX_LAUNCH_DIRECTIVE; + case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: @@ -607,6 +612,8 @@ int pmix3x_convert_rc(pmix_status_t rc) case PMIX_MODEL_DECLARED: return OPAL_ERR_MODEL_DECLARED; + case PMIX_LAUNCH_DIRECTIVE: + return OPAL_PMIX_LAUNCH_DIRECTIVE; case PMIX_ERROR: return OPAL_ERROR; @@ -910,6 +917,17 @@ void pmix3x_value_load(pmix_value_t *v, v->data.darray->array = NULL; } break; + case OPAL_ENVAR: + v->type = PMIX_ENVAR; + PMIX_ENVAR_CONSTRUCT(&v->data.envar); + if (NULL != kv->data.envar.envar) { + v->data.envar.envar = strdup(kv->data.envar.envar); + } + if (NULL != kv->data.envar.value) { + v->data.envar.value = strdup(kv->data.envar.value); + } + v->data.envar.separator = kv->data.envar.separator; + break; default: /* silence warnings */ break; @@ -917,7 +935,7 @@ void pmix3x_value_load(pmix_value_t *v, } int pmix3x_value_unload(opal_value_t *kv, - const pmix_value_t *v) + const pmix_value_t *v) { int rc=OPAL_SUCCESS; bool found; @@ -1092,6 +1110,17 @@ int pmix3x_value_unload(opal_value_t *kv, } } break; + case PMIX_ENVAR: + kv->type = OPAL_ENVAR; + OBJ_CONSTRUCT(&kv->data.envar, opal_envar_t); + if (NULL != v->data.envar.envar) { + kv->data.envar.envar = strdup(v->data.envar.envar); + } + if (NULL != v->data.envar.value) { + kv->data.envar.value = strdup(v->data.envar.value); + } + kv->data.envar.separator = v->data.envar.separator; + break; default: /* silence warnings */ rc = OPAL_ERROR; diff --git a/opal/mca/pmix/pmix3x/pmix3x.h b/opal/mca/pmix/pmix3x/pmix3x.h index 10887e9f5c..4c56afc7cf 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.h +++ b/opal/mca/pmix/pmix3x/pmix3x.h @@ -109,6 +109,7 @@ typedef struct { opal_pmix_spawn_cbfunc_t spcbfunc; opal_pmix_evhandler_reg_cbfunc_t evregcbfunc; opal_pmix_info_cbfunc_t qcbfunc; + opal_pmix_setup_application_cbfunc_t setupcbfunc; void *cbdata; } pmix3x_opcaddy_t; OBJ_CLASS_DECLARATION(pmix3x_opcaddy_t); @@ -297,11 +298,16 @@ OPAL_MODULE_DECLSPEC int pmix3x_server_notify_event(int status, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -#if 0 OPAL_MODULE_DECLSPEC int pmix3x_server_iof_push(const opal_process_name_t *source, opal_pmix_iof_channel_t channel, unsigned char *data, size_t nbytes); -#endif + +OPAL_MODULE_DECLSPEC int pmix3x_server_setup_application(opal_jobid_t jobid, + opal_list_t *info, + opal_pmix_setup_application_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix3x_server_setup_local_support(opal_jobid_t jobid, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); /**** COMPONENT UTILITY FUNCTIONS ****/ OPAL_MODULE_DECLSPEC int opal_pmix_pmix3x_check_evars(void); diff --git a/opal/mca/pmix/pmix3x/pmix3x_server_north.c b/opal/mca/pmix/pmix3x/pmix3x_server_north.c index 46bf652dbf..c86aa30b36 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_server_north.c +++ b/opal/mca/pmix/pmix3x/pmix3x_server_north.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -111,6 +111,15 @@ static pmix_status_t server_job_control(const pmix_proc_t *requestor, const pmix_proc_t targets[], size_t ntargets, const pmix_info_t directives[], size_t ndirs, pmix_info_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_iof_pull(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t directives[], size_t ndirs, + pmix_iof_channel_t channels, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_stdin(const pmix_proc_t *source, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + const pmix_byte_object_t *bo, + pmix_op_cbfunc_t cbfunc, void *cbdata); pmix_server_module_t mymodule = { .client_connected = server_client_connected_fn, @@ -131,9 +140,11 @@ pmix_server_module_t mymodule = { .tool_connected = server_tool_connection, .log = server_log, .allocate = server_allocate, - .job_control = server_job_control + .job_control = server_job_control, /* we do not support monitoring, but use the * PMIx internal monitoring capability */ + .iof_pull = server_iof_pull, + .push_stdin = server_stdin }; opal_pmix_server_module_t *host_module = NULL; @@ -1274,3 +1285,26 @@ static pmix_status_t server_job_control(const pmix_proc_t *proct, return PMIX_SUCCESS; } + +static pmix_status_t server_iof_pull(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t directives[], size_t ndirs, + pmix_iof_channel_t channels, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + if (NULL == host_module || NULL == host_module->iof_pull) { + return PMIX_ERR_NOT_SUPPORTED; + } + return PMIX_ERR_NOT_SUPPORTED; +} + +static pmix_status_t server_stdin(const pmix_proc_t *source, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + const pmix_byte_object_t *bo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + if (NULL == host_module || NULL == host_module->iof_push) { + return PMIX_ERR_NOT_SUPPORTED; + } + return PMIX_ERR_NOT_SUPPORTED; +} diff --git a/opal/mca/pmix/pmix3x/pmix3x_server_south.c b/opal/mca/pmix/pmix3x/pmix3x_server_south.c index b7d07fe602..203ddefaed 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_server_south.c +++ b/opal/mca/pmix/pmix3x/pmix3x_server_south.c @@ -569,7 +569,6 @@ int pmix3x_server_notify_event(int status, return pmix3x_convert_rc(rc); } -#if 0 int pmix3x_server_iof_push(const opal_process_name_t *source, opal_pmix_iof_channel_t channel, unsigned char *data, size_t nbytes) @@ -622,7 +621,7 @@ int pmix3x_server_iof_push(const opal_process_name_t *source, /* push the IO */ OPAL_PMIX_CONSTRUCT_LOCK(&lock); - rc = PMIx_IOF_push(&op->p, pchan, &bo, NULL, 0, lkcbfunc, (void*)&lock); + rc = PMIx_server_IOF_deliver(&op->p, pchan, &bo, NULL, 0, lkcbfunc, (void*)&lock); if (PMIX_SUCCESS != rc) { ret = pmix3x_convert_rc(rc); } else { @@ -636,4 +635,156 @@ int pmix3x_server_iof_push(const opal_process_name_t *source, return ret; } -#endif + +static void final_cleanup(int status, void *cbdata) +{ + pmix3x_opalcaddy_t *opalcaddy = (pmix3x_opalcaddy_t*)cbdata; + OBJ_RELEASE(opalcaddy); +} + +static void setup_cbfunc(pmix_status_t status, + pmix_info_t info[], size_t ninfo, + void *provided_cbdata, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix3x_opcaddy_t *op = (pmix3x_opcaddy_t*)provided_cbdata; + pmix3x_opalcaddy_t *opalcaddy; + size_t n; + opal_value_t *iptr; + int rc; + pmix_status_t ret = PMIX_SUCCESS; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix3x_opalcaddy_t); + + rc = pmix3x_convert_rc(status); + if (OPAL_SUCCESS == rc && NULL != info) { + /* need to convert the info array to a list */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix3x_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + ret = pmix3x_convert_opalrc(rc); + goto done; + } + } + } + + done: + /* release our caller */ + if (NULL != cbfunc) { + cbfunc(ret, cbdata); + } + /* pass what we have upstairs */ + if (NULL != op->setupcbfunc) { + op->setupcbfunc(rc, &opalcaddy->info, op->cbdata, + final_cleanup, opalcaddy); + } + OBJ_RELEASE(op); +} + +int pmix3x_server_setup_application(opal_jobid_t jobid, + opal_list_t *info, + opal_pmix_setup_application_cbfunc_t cbfunc, void *cbdata) +{ + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, n; + pmix_status_t rc; + pmix3x_opcaddy_t *op; + + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s setup application for job %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_JOBID_PRINT(jobid)); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* convert the list to an array of pmix_info_t */ + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix3x_value_load(&pinfo[n].value, kv); + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + /* setup the caddy */ + op = OBJ_NEW(pmix3x_opcaddy_t); + op->info = pinfo; + op->sz = sz; + op->setupcbfunc = cbfunc; + op->cbdata = cbdata; + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, jobid); + + rc = PMIx_server_setup_application(op->p.nspace, op->info, op->sz, + setup_cbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix3x_convert_rc(rc); +} + +int pmix3x_server_setup_local_support(opal_jobid_t jobid, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, n; + pmix_status_t rc; + pmix3x_opcaddy_t *op; + + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s setup local support for job %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_JOBID_PRINT(jobid)); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* convert the list to an array of pmix_info_t */ + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix3x_value_load(&pinfo[n].value, kv); + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + /* setup the caddy */ + op = OBJ_NEW(pmix3x_opcaddy_t); + op->info = pinfo; + op->sz = sz; + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, jobid); + + rc = PMIx_server_setup_local_support(op->p.nspace, op->info, op->sz, + opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix3x_convert_rc(rc); +} diff --git a/opal/mca/pmix/pmix_server.h b/opal/mca/pmix/pmix_server.h index e0d8e605e4..a3aacaef3d 100644 --- a/opal/mca/pmix/pmix_server.h +++ b/opal/mca/pmix/pmix_server.h @@ -243,10 +243,17 @@ typedef int (*opal_pmix_server_job_control_fn_t)(const opal_process_name_t *requ /* we do not provide a monitoring capability */ +/* Request forwarding of specified IO channels to the local PMIx server + * for distribution to local clients */ +typedef int (*opal_pmix_server_iof_pull_fn_t)(opal_list_t *sources, + opal_list_t *directives, + opal_pmix_iof_channel_t channels, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + /* Entry point for pushing forwarded IO to clients/tools */ -typedef int (*opal_pmix_server_iof_fn_t)(const opal_process_name_t *source, - opal_pmix_iof_channel_t channel, - unsigned char *data, size_t nbytes); +typedef int (*opal_pmix_server_iof_push_fn_t)(const opal_process_name_t *source, + opal_pmix_iof_channel_t channel, + unsigned char *data, size_t nbytes); typedef struct opal_pmix_server_module_1_0_0_t { opal_pmix_server_client_connected_fn_t client_connected; @@ -269,6 +276,8 @@ typedef struct opal_pmix_server_module_1_0_0_t { opal_pmix_server_listener_fn_t listener; opal_pmix_server_alloc_fn_t allocate; opal_pmix_server_job_control_fn_t job_control; + opal_pmix_server_iof_pull_fn_t iof_pull; + opal_pmix_server_iof_push_fn_t iof_push; } opal_pmix_server_module_t; diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index b76f0ddb3c..456a462032 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -254,9 +254,6 @@ BEGIN_C_DECLS #define OPAL_PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position #define OPAL_PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init #define OPAL_PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin -#define OPAL_PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc -#define OPAL_PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me -#define OPAL_PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me #define OPAL_PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons #define OPAL_PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected // job - i.e., not part of the "comm_world" of the job @@ -278,6 +275,18 @@ BEGIN_C_DECLS #define OPAL_PMIX_MAX_RESTARTS "pmix.maxrestarts" // (uint32_t) max number of times to restart a job +/* environmental variable operation attributes */ +#define OPAL_PMIX_SET_ENVAR "pmix.envar.set" // (pmix_envar_t*) set the envar to the given value, + // overwriting any pre-existing one +#define OPAL_PMIX_ADD_ENVAR "pmix.envar.add" // (pmix_envar_t*) add envar, but do not overwrite any existing one +#define OPAL_PMIX_UNSET_ENVAR "pmix.envar.unset" // (char*) unset the envar, if present +#define OPAL_PMIX_PREPEND_ENVAR "pmix.envar.prepnd" // (pmix_envar_t*) prepend the given value to the + // specified envar using the separator + // character, creating the envar if it doesn't already exist +#define OPAL_PMIX_APPEND_ENVAR "pmix.envar.appnd" // (pmix_envar_t*) append the given value to the specified + // envar using the separator character, + // creating the envar if it doesn't already exist + /* query attributes */ #define OPAL_PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces #define OPAL_PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job @@ -318,6 +327,8 @@ BEGIN_C_DECLS #define OPAL_PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification #define OPAL_PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are #define OPAL_PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release +#define OPAL_PMIX_DEBUG_JOB_DIRECTIVES "pmix.dbg.jdirs" // (opal_list_t*) list of job-level directives +#define OPAL_PMIX_DEBUG_APP_DIRECTIVES "pmix.dbg.adirs" // (opal_list_t*) list of app-level directives /* Resource Manager identification */ @@ -325,11 +336,6 @@ BEGIN_C_DECLS #define OPAL_PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string -/* attributes for setting envars */ -#define OPAL_PMIX_SET_ENVAR "pmix.set.envar" // (char*) string "key=value" value shall be put into the environment -#define OPAL_PMIX_UNSET_ENVAR "pmix.unset.envar" // (char*) unset envar specified in string - - /* attributes relating to allocations */ #define OPAL_PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request // which can later be used to query status of the request @@ -392,6 +398,39 @@ BEGIN_C_DECLS #define OPAL_PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before taking // specified action +/* security attributes */ +#define OPAL_PMIX_CRED_TYPE "pmix.sec.ctype" // (char*) when passed in PMIx_Get_credential, a prioritized, + // comma-delimited list of desired credential types for use + // in environments where multiple authentication mechanisms + // may be available. When returned in a callback function, a + // string identifier of the credential type + +/* IO Forwarding Attributes */ +#define OPAL_PMIX_IOF_CACHE_SIZE "pmix.iof.csize" // (uint32_t) requested size of the server cache in bytes for each specified channel. + // By default, the server is allowed (but not required) to drop + // all bytes received beyond the max size +#define OPAL_PMIX_IOF_DROP_OLDEST "pmix.iof.old" // (bool) in an overflow situation, drop the oldest bytes to make room in the cache +#define OPAL_PMIX_IOF_DROP_NEWEST "pmix.iof.new" // (bool) in an overflow situation, drop any new bytes received until room becomes + // available in the cache (default) +#define OPAL_PMIX_IOF_BUFFERING_SIZE "pmix.iof.bsize" // (uint32_t) basically controls grouping of IO on the specified channel(s) to + // avoid being called every time a bit of IO arrives. The library + // will execute the callback whenever the specified number of bytes + // becomes available. Any remaining buffered data will be "flushed" + // upon call to deregister the respective channel +#define OPAL_PMIX_IOF_BUFFERING_TIME "pmix.iof.btime" // (uint32_t) max time in seconds to buffer IO before delivering it. Used in conjunction + // with buffering size, this prevents IO from being held indefinitely + // while waiting for another payload to arrive +#define OPAL_PMIX_IOF_COMPLETE "pmix.iof.cmp" // (bool) indicates whether or not the specified IO channel has been closed + // by the source +#define OPAL_PMIX_IOF_PUSH_STDIN "pmix.iof.stdin" // (bool) Used by a tool to request that the PMIx library collect + // the tool's stdin and forward it to the procs specified in + // the PMIx_IOF_push call + +/* Attributes for controlling contents of application setup data */ +#define OPAL_PMIX_SETUP_APP_ENVARS "pmix.setup.env" // (bool) harvest and include relevant envars +#define OPAL_PMIX_SETUP_APP_NONENVARS "pmix.setup.nenv" // (bool) include all non-envar data +#define OPAL_PMIX_SETUP_APP_ALL "pmix.setup.all" // (bool) include all relevant data + /* define a scope for data "put" by PMI per the following: * diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 08f9efb767..81ab8be1fb 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. @@ -305,6 +305,10 @@ opal_err2str(int errnum, const char **errmsg) case OPAL_ERR_MODEL_DECLARED: retval = "Model declared"; break; + case OPAL_PMIX_LAUNCH_DIRECTIVE: + retval = "Launch directive"; + break; + default: retval = "UNRECOGNIZED"; } diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index cd16a2173d..24607ec71f 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -15,6 +15,7 @@ * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -168,7 +169,7 @@ opal_progress_finalize(void) return OPAL_SUCCESS; } -static int opal_progress_events() +static int opal_progress_events(void) { int events = 0; diff --git a/orte/include/orte/types.h b/orte/include/orte/types.h index 2f9306c33b..59865ed61b 100644 --- a/orte/include/orte/types.h +++ b/orte/include/orte/types.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -144,6 +144,7 @@ typedef struct { orte_vpid_t vpid; orte_jobid_t jobid; opal_process_name_t name; + opal_envar_t envar; } data; } orte_attribute_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(orte_attribute_t); diff --git a/orte/mca/iof/base/iof_base_frame.c b/orte/mca/iof/base/iof_base_frame.c index 91bd4ffc5e..3e6584fd78 100644 --- a/orte/mca/iof/base/iof_base_frame.c +++ b/orte/mca/iof/base/iof_base_frame.c @@ -109,7 +109,7 @@ static int orte_iof_base_close(void) */ static int orte_iof_base_open(mca_base_open_flag_t flags) { - int rc, xmlfd; + int xmlfd; /* daemons do not need to do this as they do not write out stdout/err */ if (!ORTE_PROC_IS_DAEMON) { diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index ae40464e88..f6bda08a43 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -252,10 +252,13 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s sending data of size %d via PMIx to tool %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)numbytes, - ORTE_NAME_PRINT(&sink->daemon)); - rc = opal_pmix.server_iof_push(&proct->name, rev->tag, data, numbytes)); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); + ORTE_NAME_PRINT(&sink->daemon))); + /* don't pass down zero byte blobs */ + if (0 < numbytes) { + rc = opal_pmix.server_iof_push(&proct->name, rev->tag, data, numbytes); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, diff --git a/orte/mca/iof/hnp/iof_hnp_receive.c b/orte/mca/iof/hnp/iof_hnp_receive.c index b77dad84c0..9daf9ab51f 100644 --- a/orte/mca/iof/hnp/iof_hnp_receive.c +++ b/orte/mca/iof/hnp/iof_hnp_receive.c @@ -250,9 +250,12 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, sink->name.vpid == origin.vpid)) { /* send the data to the tool */ if (NULL != opal_pmix.server_iof_push) { - rc = opal_pmix.server_iof_push(&proct->name, stream, data, numbytes); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); + /* don't pass along zero byte blobs */ + if (0 < numbytes) { + rc = opal_pmix.server_iof_push(&proct->name, stream, data, numbytes); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } } } else { orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &origin, stream, data, numbytes); diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index f3e8b9ffeb..e1ddba3e6b 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -56,7 +56,7 @@ #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/shmem/base/base.h" #include "opal/mca/pstat/pstat.h" -#include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -100,6 +100,39 @@ #include "orte/mca/odls/base/base.h" #include "orte/mca/odls/base/odls_private.h" +static void setup_cbfunc(int status, + opal_list_t *info, + void *provided_cbdata, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + orte_job_t *jdata = (orte_job_t*)provided_cbdata; + opal_value_t *kv; + opal_buffer_t cache, *bptr; + int rc; + + OBJ_CONSTRUCT(&cache, opal_buffer_t); + if (NULL != info) { + /* cycle across the provided info */ + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + if (OPAL_SUCCESS != (rc = opal_dss.pack(&cache, &kv, 1, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + } + } + } + /* add the results */ + bptr = &cache; + opal_dss.pack(&jdata->launch_msg, &bptr, 1, OPAL_BUFFER); + OBJ_DESTRUCT(&cache); + + /* release our caller */ + if (NULL != cbfunc) { + cbfunc(rc, cbdata); + } + + /* move to next stage */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG); + +} /* IT IS CRITICAL THAT ANY CHANGE IN THE ORDER OF THE INFO PACKED IN * THIS FUNCTION BE REFLECTED IN THE CONSTRUCT_CHILD_LIST PARSER BELOW */ @@ -350,7 +383,18 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, free(nidmap); } - /* compute and pack the regex of ppn */ + /* get any application prep info */ + if (NULL != opal_pmix.server_setup_application) { + /* we don't want to block here because it could + * take some indeterminate time to get the info */ + if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_application(jdata->jobid, NULL, setup_cbfunc, jdata))) { + ORTE_ERROR_LOG(rc); + } + return rc; + } + + /* move to next stage */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG); return ORTE_SUCCESS; } @@ -362,6 +406,12 @@ static void fm_release(void *cbdata) OBJ_RELEASE(bptr); } +static void ls_cbunc(int status, void *cbdata) +{ + opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(lock); +} + int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, orte_jobid_t *job) { @@ -376,6 +426,9 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, orte_app_context_t *app; int8_t flag; char *ppn; + opal_value_t *kv; + opal_list_t local_support, cache; + opal_pmix_lock_t lock; OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:constructing child list", @@ -385,6 +438,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, *job = ORTE_JOBID_INVALID; /* get the daemon job object */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + OBJ_CONSTRUCT(&local_support, opal_list_t); /* unpack the flag to see if new daemons were launched */ cnt=1; @@ -504,17 +559,18 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } } + /* extract the ppn regex */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + /* if the job is fully described, then mpirun will have computed * and sent us the complete array of procs in the orte_job_t, so we * don't need to do anything more here */ if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { if (!ORTE_PROC_IS_HNP) { - /* extract the ppn regex */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto REPORT_ERROR; - } /* populate the node array of the job map and the proc array of * the job object so we know how many procs are on each node */ if (ORTE_SUCCESS != (rc = orte_regx.parse_ppn(jdata, ppn))) { @@ -522,10 +578,10 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, free(ppn); goto REPORT_ERROR; } - free(ppn); /* now assign locations to the procs */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); + free(ppn); goto REPORT_ERROR; } } @@ -533,14 +589,73 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, * to the jdata->procs array */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { ORTE_ERROR_LOG(rc); + free(ppn); goto REPORT_ERROR; } /* and finally, compute the local and node ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); + free(ppn); goto REPORT_ERROR; } } + free(ppn); + + /* unpack the buffer containing any application setup info - there + * might not be any, so it isn't an error if we don't find things */ + cnt=1; + rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER); + if (OPAL_SUCCESS == rc) { + /* there was setup data - process it */ + cnt=1; + OBJ_CONSTRUCT(&cache, opal_list_t); + while (ORTE_SUCCESS == (rc = opal_dss.unpack(bptr, &kv, &cnt, OPAL_VALUE))) { + /* if this is an envar operation, cache it in reverse order + * so that the order the user provided is preserved */ + if (0 == strcmp(kv->key, OPAL_PMIX_SET_ENVAR) || + 0 == strcmp(kv->key, OPAL_PMIX_ADD_ENVAR) || + 0 == strcmp(kv->key, OPAL_PMIX_UNSET_ENVAR) || + 0 == strcmp(kv->key, OPAL_PMIX_PREPEND_ENVAR) || + 0 == strcmp(kv->key, OPAL_PMIX_APPEND_ENVAR)) { + opal_list_prepend(&cache, &kv->super); + } else { + /* need to pass it to pmix.setup_local_support */ + opal_list_append(&local_support, &kv->super); + } + } + OBJ_RELEASE(bptr); + /* add any cache'd values to the front of the job attributes */ + while (NULL != (kv = (opal_value_t*)opal_list_remove_first(&cache))) { + if (0 == strcmp(kv->key, OPAL_PMIX_SET_ENVAR)) { + orte_prepend_attribute(&jdata->attributes, ORTE_JOB_SET_ENVAR, + ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(kv->key, OPAL_PMIX_ADD_ENVAR)) { + orte_prepend_attribute(&jdata->attributes, ORTE_JOB_ADD_ENVAR, + ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(kv->key, OPAL_PMIX_UNSET_ENVAR)) { + orte_prepend_attribute(&jdata->attributes, ORTE_JOB_UNSET_ENVAR, + ORTE_ATTR_GLOBAL, kv->data.string, OPAL_STRING); + } else if (0 == strcmp(kv->key, OPAL_PMIX_PREPEND_ENVAR)) { + orte_prepend_attribute(&jdata->attributes, ORTE_JOB_PREPEND_ENVAR, + ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(kv->key, OPAL_PMIX_APPEND_ENVAR)) { + orte_prepend_attribute(&jdata->attributes, ORTE_JOB_APPEND_ENVAR, + ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR); + } + OBJ_RELEASE(kv); + } + OPAL_LIST_DESTRUCT(&cache); + } + if (0 < opal_list_get_size(&local_support) && + NULL != opal_pmix.server_setup_local_support) { + if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support, + ls_cbunc, &lock))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + } else { + lock.active = false; // we won't get a callback + } /* now that the node array in the job map and jdata are completely filled out,. * we need to "wireup" the procs to their nodes so other utilities can @@ -663,9 +778,15 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, * proc structures. For the HNP, the proc structs will * remain in the orte_job_t array */ + /* wait here until the local support has been setup */ + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); + OPAL_LIST_DESTRUCT(&local_support); return ORTE_SUCCESS; REPORT_ERROR: + OPAL_PMIX_DESTRUCT_LOCK(&lock); + OPAL_LIST_DESTRUCT(&local_support); /* we have to report an error back to the HNP so we don't just * hang. Although there shouldn't be any errors once this is * all debugged, it is still good practice to have a way diff --git a/orte/mca/plm/base/base.h b/orte/mca/plm/base/base.h index d7dfe0f9f4..b0d2faea1a 100644 --- a/orte/mca/plm/base/base.h +++ b/orte/mca/plm/base/base.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,6 +63,7 @@ ORTE_DECLSPEC void orte_plm_base_daemons_launched(int fd, short args, void *cbda ORTE_DECLSPEC void orte_plm_base_vm_ready(int fd, short args, void *cbdata); ORTE_DECLSPEC void orte_plm_base_mapping_complete(int fd, short args, void *cbdata); ORTE_DECLSPEC void orte_plm_base_launch_apps(int fd, short args, void *cbdata); +ORTE_DECLSPEC void orte_plm_base_send_launch_msg(int fd, short args, void *cbdata); ORTE_DECLSPEC void orte_plm_base_post_launch(int fd, short args, void *cbdata); ORTE_DECLSPEC void orte_plm_base_registered(int fd, short args, void *cbdata); diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index bb9b44de35..ba2a3571fe 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -503,13 +503,10 @@ static void timer_cb(int fd, short event, void *cbdata) void orte_plm_base_launch_apps(int fd, short args, void *cbdata) { + orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_t *jdata; orte_daemon_cmd_flag_t command; - opal_buffer_t *buffer; int rc; - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_timer_t *timer; - orte_grpcomm_signature_t *sig; ORTE_ACQUIRE_OBJECT(caddy); @@ -529,45 +526,60 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); - /* setup the buffer */ - buffer = OBJ_NEW(opal_buffer_t); - /* pack the appropriate add_local_procs command */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) { command = ORTE_DAEMON_DVM_ADD_PROCS; } else { command = ORTE_DAEMON_ADD_LOCAL_PROCS; } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&jdata->launch_msg, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* get the local launcher's required data */ - if (ORTE_SUCCESS != (rc = orte_odls.get_add_procs_data(buffer, jdata->jobid))) { + if (ORTE_SUCCESS != (rc = orte_odls.get_add_procs_data(&jdata->launch_msg, jdata->jobid))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); - OBJ_RELEASE(caddy); - return; } + OBJ_RELEASE(caddy); + return; +} + +void orte_plm_base_send_launch_msg(int fd, short args, void *cbdata) +{ + orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + orte_timer_t *timer; + orte_grpcomm_signature_t *sig; + orte_job_t *jdata; + int rc; + + /* convenience */ + jdata = caddy->jdata; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:send launch msg for job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid))); + /* if we don't want to launch the apps, now is the time to leave */ if (orte_do_not_launch) { bool compressed; uint8_t *cmpdata; size_t cmplen; /* report the size of the launch message */ - compressed = orte_util_compress_block((uint8_t*)buffer->base_ptr, buffer->bytes_used, + compressed = orte_util_compress_block((uint8_t*)jdata->launch_msg.base_ptr, + jdata->launch_msg.bytes_used, &cmpdata, &cmplen); if (compressed) { opal_output(0, "LAUNCH MSG RAW SIZE: %d COMPRESSED SIZE: %d", - (int)buffer->bytes_used, (int)cmplen); + (int)jdata->launch_msg.bytes_used, (int)cmplen); free(cmpdata); } else { - opal_output(0, "LAUNCH MSG RAW SIZE: %d", (int)buffer->bytes_used); + opal_output(0, "LAUNCH MSG RAW SIZE: %d", (int)jdata->launch_msg.bytes_used); } orte_never_launched = true; ORTE_FORCED_TERMINATE(0); @@ -581,15 +593,15 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; sig->signature[0].vpid = ORTE_VPID_WILDCARD; sig->sz = 1; - if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buffer))) { + if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, &jdata->launch_msg))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); OBJ_RELEASE(sig); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } - OBJ_RELEASE(buffer); + OBJ_DESTRUCT(&jdata->launch_msg); + OBJ_CONSTRUCT(&jdata->launch_msg, opal_buffer_t); /* maintain accounting */ OBJ_RELEASE(sig); diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c index 1029850203..d89a6b9313 100644 --- a/orte/mca/plm/base/plm_base_receive.c +++ b/orte/mca/plm/base/plm_base_receive.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -39,6 +39,7 @@ #include "opal/dss/dss.h" #include "opal/threads/threads.h" #include "opal/util/argv.h" +#include "opal/util/opal_environ.h" #include "orte/constants.h" #include "orte/types.h" diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index 3a58c351b3..db779674de 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ diff --git a/orte/mca/plm/plm_types.h b/orte/mca/plm/plm_types.h index 245011d0e1..d12797f97e 100644 --- a/orte/mca/plm/plm_types.h +++ b/orte/mca/plm/plm_types.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -125,25 +125,26 @@ typedef int32_t orte_job_state_t; #define ORTE_JOB_STATE_DAEMONS_REPORTED 10 /* all launched daemons have reported */ #define ORTE_JOB_STATE_VM_READY 11 /* the VM is ready for operation */ #define ORTE_JOB_STATE_LAUNCH_APPS 12 /* ready to launch apps */ -#define ORTE_JOB_STATE_RUNNING 13 /* all procs have been fork'd */ -#define ORTE_JOB_STATE_SUSPENDED 14 /* job has been suspended */ -#define ORTE_JOB_STATE_REGISTERED 15 /* all procs registered for sync */ -#define ORTE_JOB_STATE_READY_FOR_DEBUGGERS 16 /* job ready for debugger init after spawn */ -#define ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE 17 /* all local procs have attempted launch */ -#define ORTE_JOB_STATE_DEBUGGER_DETACH 18 /* a debugger has detached */ +#define ORTE_JOB_STATE_SEND_LAUNCH_MSG 13 /* send launch msg to daemons */ +#define ORTE_JOB_STATE_RUNNING 14 /* all procs have been fork'd */ +#define ORTE_JOB_STATE_SUSPENDED 15 /* job has been suspended */ +#define ORTE_JOB_STATE_REGISTERED 16 /* all procs registered for sync */ +#define ORTE_JOB_STATE_READY_FOR_DEBUGGERS 17 /* job ready for debugger init after spawn */ +#define ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE 18 /* all local procs have attempted launch */ +#define ORTE_JOB_STATE_DEBUGGER_DETACH 19 /* a debugger has detached */ /* * Define a "boundary" so we can easily and quickly determine * if a job is still running or not - any value less than * this one means that we are not terminated */ -#define ORTE_JOB_STATE_UNTERMINATED 20 +#define ORTE_JOB_STATE_UNTERMINATED 30 -#define ORTE_JOB_STATE_TERMINATED 21 /* all processes have terminated and job is no longer running */ -#define ORTE_JOB_STATE_ALL_JOBS_COMPLETE 22 -#define ORTE_JOB_STATE_DAEMONS_TERMINATED 23 -#define ORTE_JOB_STATE_NOTIFY_COMPLETED 24 /* callback to notify when job completes */ -#define ORTE_JOB_STATE_NOTIFIED 25 +#define ORTE_JOB_STATE_TERMINATED 31 /* all processes have terminated and job is no longer running */ +#define ORTE_JOB_STATE_ALL_JOBS_COMPLETE 32 +#define ORTE_JOB_STATE_DAEMONS_TERMINATED 33 +#define ORTE_JOB_STATE_NOTIFY_COMPLETED 34 /* callback to notify when job completes */ +#define ORTE_JOB_STATE_NOTIFIED 35 /* Define a boundary so we can easily and quickly determine * if a job abnormally terminated - leave a little room diff --git a/orte/mca/schizo/alps/Makefile.am b/orte/mca/schizo/alps/Makefile.am index 14717d9522..880b021ea7 100644 --- a/orte/mca/schizo/alps/Makefile.am +++ b/orte/mca/schizo/alps/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -34,4 +34,3 @@ mca_schizo_alps_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la noinst_LTLIBRARIES = $(component_noinst) libmca_schizo_alps_la_SOURCES = $(sources) libmca_schizo_alps_la_LDFLAGS = -module -avoid-version - diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index d2b729340c..5e1daf9e21 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -800,14 +800,16 @@ static int setup_fork(orte_job_t *jdata, orte_app_context_t *app) { int i; - char *param; + char *param, *p2, *saveptr; bool oversubscribed; orte_node_t *node; char **envcpy, **nps, **firstranks; char *npstring, *firstrankstring; char *num_app_ctx; bool takeus = false; + bool exists; orte_app_context_t* tmp_app; + orte_attribute_t *attr; opal_output_verbose(1, orte_schizo_base_framework.framework_output, "%s schizo:ompi: setup_fork", @@ -1050,6 +1052,132 @@ static int setup_fork(orte_job_t *jdata, free(num_app_ctx); free(firstrankstring); free(npstring); + + /* now process any envar attributes - we begin with the job-level + * ones as the app-specific ones can override them. We have to + * process them in the order they were given to ensure we wind + * up in the desired final state */ + OPAL_LIST_FOREACH(attr, &jdata->attributes, orte_attribute_t) { + if (ORTE_JOB_SET_ENVAR == attr->key) { + opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env); + } else if (ORTE_JOB_ADD_ENVAR == attr->key) { + opal_setenv(attr->data.envar.envar, attr->data.envar.value, false, &app->env); + } else if (ORTE_JOB_UNSET_ENVAR == attr->key) { + opal_unsetenv(attr->data.string, &app->env); + } else if (ORTE_JOB_PREPEND_ENVAR == attr->key) { + /* see if the envar already exists */ + exists = false; + for (i=0; NULL != app->env[i]; i++) { + saveptr = strchr(app->env[i], '='); // cannot be NULL + *saveptr = '\0'; + if (0 == strcmp(app->env[i], attr->data.envar.envar)) { + /* we have the var - prepend it */ + param = saveptr; + ++param; // move past where the '=' sign was + (void)asprintf(&p2, "%s%c%s", attr->data.envar.value, + attr->data.envar.separator, param); + *saveptr = '='; // restore the current envar setting + opal_setenv(attr->data.envar.envar, p2, true, &app->env); + free(p2); + exists = true; + break; + } else { + *saveptr = '='; // restore the current envar setting + } + } + if (!exists) { + /* just insert it */ + opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env); + } + } else if (ORTE_JOB_APPEND_ENVAR == attr->key) { + /* see if the envar already exists */ + exists = false; + for (i=0; NULL != app->env[i]; i++) { + saveptr = strchr(app->env[i], '='); // cannot be NULL + *saveptr = '\0'; + if (0 == strcmp(app->env[i], attr->data.envar.envar)) { + /* we have the var - prepend it */ + param = saveptr; + ++param; // move past where the '=' sign was + (void)asprintf(&p2, "%s%c%s", param, attr->data.envar.separator, + attr->data.envar.value); + *saveptr = '='; // restore the current envar setting + opal_setenv(attr->data.envar.envar, p2, true, &app->env); + free(p2); + exists = true; + break; + } else { + *saveptr = '='; // restore the current envar setting + } + } + if (!exists) { + /* just insert it */ + opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env); + } + } + } + + /* now do the same thing for any app-level attributes */ + OPAL_LIST_FOREACH(attr, &app->attributes, orte_attribute_t) { + if (ORTE_APP_SET_ENVAR == attr->key) { + opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env); + } else if (ORTE_APP_ADD_ENVAR == attr->key) { + opal_setenv(attr->data.envar.envar, attr->data.envar.value, false, &app->env); + } else if (ORTE_APP_UNSET_ENVAR == attr->key) { + opal_unsetenv(attr->data.string, &app->env); + } else if (ORTE_APP_PREPEND_ENVAR == attr->key) { + /* see if the envar already exists */ + exists = false; + for (i=0; NULL != app->env[i]; i++) { + saveptr = strchr(app->env[i], '='); // cannot be NULL + *saveptr = '\0'; + if (0 == strcmp(app->env[i], attr->data.envar.envar)) { + /* we have the var - prepend it */ + param = saveptr; + ++param; // move past where the '=' sign was + (void)asprintf(&p2, "%s%c%s", attr->data.envar.value, + attr->data.envar.separator, param); + *saveptr = '='; // restore the current envar setting + opal_setenv(attr->data.envar.envar, p2, true, &app->env); + free(p2); + exists = true; + break; + } else { + *saveptr = '='; // restore the current envar setting + } + } + if (!exists) { + /* just insert it */ + opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env); + } + } else if (ORTE_APP_APPEND_ENVAR == attr->key) { + /* see if the envar already exists */ + exists = false; + for (i=0; NULL != app->env[i]; i++) { + saveptr = strchr(app->env[i], '='); // cannot be NULL + *saveptr = '\0'; + if (0 == strcmp(app->env[i], attr->data.envar.envar)) { + /* we have the var - prepend it */ + param = saveptr; + ++param; // move past where the '=' sign was + (void)asprintf(&p2, "%s%c%s", param, attr->data.envar.separator, + attr->data.envar.value); + *saveptr = '='; // restore the current envar setting + opal_setenv(attr->data.envar.envar, p2, true, &app->env); + free(p2); + exists = true; + break; + } else { + *saveptr = '='; // restore the current envar setting + } + } + if (!exists) { + /* just insert it */ + opal_setenv(attr->data.envar.envar, attr->data.envar.value, true, &app->env); + } + } + } + return ORTE_SUCCESS; } diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index a8275f3c62..50a9d4d8cf 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -87,6 +87,7 @@ static orte_job_state_t launch_states[] = { ORTE_JOB_STATE_MAP_COMPLETE, ORTE_JOB_STATE_SYSTEM_PREP, ORTE_JOB_STATE_LAUNCH_APPS, + ORTE_JOB_STATE_SEND_LAUNCH_MSG, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, ORTE_JOB_STATE_RUNNING, ORTE_JOB_STATE_REGISTERED, @@ -108,6 +109,7 @@ static orte_state_cbfunc_t launch_callbacks[] = { orte_plm_base_mapping_complete, orte_plm_base_complete_setup, orte_plm_base_launch_apps, + orte_plm_base_send_launch_msg, orte_state_base_local_launch_complete, orte_plm_base_post_launch, orte_plm_base_registered, diff --git a/orte/mca/state/hnp/state_hnp.c b/orte/mca/state/hnp/state_hnp.c index 71135b7a55..2d3e520042 100644 --- a/orte/mca/state/hnp/state_hnp.c +++ b/orte/mca/state/hnp/state_hnp.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -82,6 +82,7 @@ static orte_job_state_t launch_states[] = { ORTE_JOB_STATE_MAP_COMPLETE, ORTE_JOB_STATE_SYSTEM_PREP, ORTE_JOB_STATE_LAUNCH_APPS, + ORTE_JOB_STATE_SEND_LAUNCH_MSG, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, ORTE_JOB_STATE_RUNNING, ORTE_JOB_STATE_REGISTERED, @@ -103,6 +104,7 @@ static orte_state_cbfunc_t launch_callbacks[] = { orte_plm_base_mapping_complete, orte_plm_base_complete_setup, orte_plm_base_launch_apps, + orte_plm_base_send_launch_msg, orte_state_base_local_launch_complete, orte_plm_base_post_launch, orte_plm_base_registered, diff --git a/orte/mca/state/novm/state_novm.c b/orte/mca/state/novm/state_novm.c index 2bc36181a3..a15849ba44 100644 --- a/orte/mca/state/novm/state_novm.c +++ b/orte/mca/state/novm/state_novm.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,6 +80,7 @@ static orte_job_state_t launch_states[] = { ORTE_JOB_STATE_MAP_COMPLETE, ORTE_JOB_STATE_SYSTEM_PREP, ORTE_JOB_STATE_LAUNCH_APPS, + ORTE_JOB_STATE_SEND_LAUNCH_MSG, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, ORTE_JOB_STATE_RUNNING, ORTE_JOB_STATE_REGISTERED, @@ -101,6 +102,7 @@ static orte_state_cbfunc_t launch_callbacks[] = { map_complete, orte_plm_base_complete_setup, orte_plm_base_launch_apps, + orte_plm_base_send_launch_msg, orte_state_base_local_launch_complete, orte_plm_base_post_launch, orte_plm_base_registered, diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index ad8f85cb76..3f52cfee22 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -559,7 +559,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, /* look up job data object */ if (NULL == (jdata = orte_get_job_data_object(job))) { /* we can safely ignore this request as the job - * was already cleaned up */ + * was already cleaned up, or it was a tool */ goto CLEANUP; } diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index e4b9ee5b97..89b4303ba5 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -246,6 +246,25 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, } else if (0 == strcmp(info->key, OPAL_PMIX_PRELOAD_FILES)) { orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, ORTE_ATTR_GLOBAL, info->data.string, OPAL_STRING); + + /*** ENVIRONMENTAL VARIABLE DIRECTIVES ***/ + /* there can be multiple of these, so we add them to the attribute list */ + } else if (0 == strcmp(info->key, OPAL_PMIX_SET_ENVAR)) { + orte_add_attribute(&app->attributes, ORTE_APP_SET_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(info->key, OPAL_PMIX_ADD_ENVAR)) { + orte_add_attribute(&app->attributes, ORTE_APP_ADD_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(info->key, OPAL_PMIX_UNSET_ENVAR)) { + orte_add_attribute(&app->attributes, ORTE_APP_UNSET_ENVAR, + ORTE_ATTR_GLOBAL, info->data.string, OPAL_STRING); + } else if (0 == strcmp(info->key, OPAL_PMIX_PREPEND_ENVAR)) { + orte_add_attribute(&app->attributes, ORTE_APP_PREPEND_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(info->key, OPAL_PMIX_APPEND_ENVAR)) { + orte_add_attribute(&app->attributes, ORTE_APP_APPEND_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + } else { /* unrecognized key */ orte_show_help("help-orted.txt", "bad-key", @@ -460,6 +479,24 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON); ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_DEBUGGER); + /*** ENVIRONMENTAL VARIABLE DIRECTIVES ***/ + /* there can be multiple of these, so we add them to the attribute list */ + } else if (0 == strcmp(info->key, OPAL_PMIX_SET_ENVAR)) { + orte_add_attribute(&jdata->attributes, ORTE_JOB_SET_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(info->key, OPAL_PMIX_ADD_ENVAR)) { + orte_add_attribute(&jdata->attributes, ORTE_JOB_ADD_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(info->key, OPAL_PMIX_UNSET_ENVAR)) { + orte_add_attribute(&jdata->attributes, ORTE_JOB_UNSET_ENVAR, + ORTE_ATTR_GLOBAL, info->data.string, OPAL_STRING); + } else if (0 == strcmp(info->key, OPAL_PMIX_PREPEND_ENVAR)) { + orte_add_attribute(&jdata->attributes, ORTE_JOB_PREPEND_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + } else if (0 == strcmp(info->key, OPAL_PMIX_APPEND_ENVAR)) { + orte_add_attribute(&jdata->attributes, ORTE_JOB_APPEND_ENVAR, + ORTE_ATTR_GLOBAL, &info->data.envar, OPAL_ENVAR); + /*** DEFAULT - CACHE FOR INCLUSION WITH JOB INFO ***/ } else { /* cache for inclusion with job info at registration */ diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index b0550f1846..b22a2e1567 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -837,6 +837,12 @@ int orte_dt_pack_attr(opal_buffer_t *buffer, const void *src, int32_t num_vals, return ret; } break; + case OPAL_ENVAR: + if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &ptr[i]->data.envar, 1, OPAL_ENVAR))) { + return ret; + } + break; + default: opal_output(0, "PACK-ORTE-ATTR: UNSUPPORTED TYPE %d", (int)ptr[i]->type); return OPAL_ERROR; diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index 954b741c31..7e9db7dbdc 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -923,6 +923,11 @@ int orte_dt_unpack_attr(opal_buffer_t *buffer, void *dest, int32_t *num_vals, return ret; } break; + case OPAL_ENVAR: + if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &ptr[i]->data.envar, &m, OPAL_ENVAR))) { + return ret; + } + break; default: opal_output(0, "PACK-ORTE-ATTR: UNSUPPORTED TYPE"); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 43afe56075..1f7fc2ec7f 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -13,7 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -653,6 +653,7 @@ static void orte_job_construct(orte_job_t* job) ORTE_FLAG_SET(job, ORTE_JOB_FLAG_FORWARD_OUTPUT); OBJ_CONSTRUCT(&job->attributes, opal_list_t); + OBJ_CONSTRUCT(&job->launch_msg, opal_buffer_t); } static void orte_job_destruct(orte_job_t* job) @@ -715,6 +716,8 @@ static void orte_job_destruct(orte_job_t* job) /* release the attributes */ OPAL_LIST_DESTRUCT(&job->attributes); + OBJ_DESTRUCT(&job->launch_msg); + if (NULL != orte_job_data && ORTE_JOBID_INVALID != job->jobid) { /* remove the job from the global array */ opal_hash_table_remove_value_uint32(orte_job_data, job->jobid); diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index a7a705708e..1ae4de3eee 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -365,6 +365,8 @@ typedef struct { orte_job_flags_t flags; /* attributes */ opal_list_t attributes; + /* launch msg buffer */ + opal_buffer_t launch_msg; } orte_job_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_t); diff --git a/orte/tools/prun/prun.c b/orte/tools/prun/prun.c index 232a88fe0a..17683b803f 100644 --- a/orte/tools/prun/prun.c +++ b/orte/tools/prun/prun.c @@ -89,6 +89,24 @@ /* ensure I can behave like a daemon */ #include "prun.h" +typedef struct { + opal_object_t super; + opal_pmix_lock_t lock; + opal_list_t info; +} myinfo_t; +static void mcon(myinfo_t *p) +{ + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + OBJ_CONSTRUCT(&p->info, opal_list_t); +} +static void mdes(myinfo_t *p) +{ + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + OPAL_LIST_DESTRUCT(&p->info); +} +static OBJ_CLASS_INSTANCE(myinfo_t, opal_object_t, + mcon, mdes); + static struct { bool terminate_dvm; bool system_server_first; @@ -99,6 +117,7 @@ static struct { static opal_list_t job_info; static volatile bool active = false; static orte_jobid_t myjobid = ORTE_JOBID_INVALID; +static myinfo_t myinfo; static int create_app(int argc, char* argv[], opal_list_t *jdata, @@ -209,17 +228,70 @@ static void evhandler(int status, } } +typedef struct { + opal_pmix_lock_t lock; + opal_list_t list; +} mylock_t; + + +static void setupcbfunc(int status, + opal_list_t *info, + void *provided_cbdata, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + mylock_t *mylock = (mylock_t*)provided_cbdata; + opal_value_t *kv; + + if (NULL != info) { + /* cycle across the provided info */ + while (NULL != (kv = (opal_value_t*)opal_list_remove_first(info))) { + opal_list_append(&mylock->list, &kv->super); + } + } + + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); + } + + OPAL_PMIX_WAKEUP_THREAD(&mylock->lock); +} + +static void launchhandler(int status, + const opal_process_name_t *source, + opal_list_t *info, opal_list_t *results, + opal_pmix_notification_complete_fn_t cbfunc, + void *cbdata) +{ + opal_value_t *p; + + /* the info list will include the launch directives, so + * transfer those to the myinfo_t for return to the main thread */ + while (NULL != (p = (opal_value_t*)opal_list_remove_first(info))) { + opal_list_append(&myinfo.info, &p->super); + } + + /* we _always_ have to execute the evhandler callback or + * else the event progress engine will hang */ + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata); + } + + /* now release the thread */ + OPAL_PMIX_WAKEUP_THREAD(&myinfo.lock); +} int prun(int argc, char *argv[]) { int rc, i; char *param; opal_pmix_lock_t lock; - opal_list_t apps; + opal_list_t apps, *lt; opal_pmix_app_t *app; - opal_value_t *val; - opal_list_t info; + opal_value_t *val, *kv, *kv2; + opal_list_t info, codes; struct timespec tp = {0, 100000}; + mylock_t mylock; /* init the globals */ memset(&orte_cmd_options, 0, sizeof(orte_cmd_options)); @@ -477,7 +549,17 @@ int prun(int argc, char *argv[]) val = OBJ_NEW(opal_value_t); val->key = strdup(OPAL_PMIX_OUTPUT_TO_FILE); val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.output_filename); + /* if the given filename isn't an absolute path, then + * convert it to one so the name will be relative to + * the directory where prun was given as that is what + * the user will have seen */ + if (!opal_path_is_absolute(orte_cmd_options.output_filename)) { + char cwd[OPAL_PATH_MAX]; + getcwd(cwd, sizeof(cwd)); + val->data.string = opal_os_path(false, cwd, orte_cmd_options.output_filename, NULL); + } else { + val->data.string = strdup(orte_cmd_options.output_filename); + } opal_list_append(&job_info, &val->super); } /* if we were asked to merge stderr to stdout, mark it so */ @@ -633,6 +715,91 @@ int prun(int argc, char *argv[]) opal_list_append(&job_info, &val->super); } + /* pickup any relevant envars */ + if (NULL != opal_pmix.server_setup_application) { + OBJ_CONSTRUCT(&info, opal_list_t); + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_SETUP_APP_ENVARS); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&info, &val->super); + + OPAL_PMIX_CONSTRUCT_LOCK(&mylock.lock); + OBJ_CONSTRUCT(&mylock.list, opal_list_t); + rc = opal_pmix.server_setup_application(ORTE_PROC_MY_NAME->jobid, + &info, setupcbfunc, &mylock); + if (OPAL_SUCCESS != rc) { + OPAL_LIST_DESTRUCT(&info); + OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock); + OBJ_DESTRUCT(&mylock.list); + goto DONE; + } + OPAL_PMIX_WAIT_THREAD(&mylock.lock); + OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock); + /* transfer any returned ENVARS to the job_info */ + while (NULL != (val = (opal_value_t*)opal_list_remove_first(&mylock.list))) { + if (0 == strcmp(val->key, OPAL_PMIX_SET_ENVAR) || + 0 == strcmp(val->key, OPAL_PMIX_ADD_ENVAR) || + 0 == strcmp(val->key, OPAL_PMIX_UNSET_ENVAR) || + 0 == strcmp(val->key, OPAL_PMIX_PREPEND_ENVAR) || + 0 == strcmp(val->key, OPAL_PMIX_APPEND_ENVAR)) { + opal_list_append(&job_info, &val->super); + } else { + OBJ_RELEASE(val); + } + } + OPAL_LIST_DESTRUCT(&mylock.list); + } + + /* if we were launched by a tool wanting to direct our + * operation, then we need to pause here and give it + * a chance to tell us what we need to do */ + if (NULL != (param = getenv("PMIX_LAUNCHER_PAUSE_FOR_TOOL")) && + 0 == strcmp(param, "1")) { + /* register for the PMIX_LAUNCH_DIRECTIVE event */ + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + OBJ_CONSTRUCT(&codes, opal_list_t); + val = OBJ_NEW(opal_value_t); + val->key = strdup("foo"); + val->type = OPAL_INT; + val->data.integer = OPAL_PMIX_LAUNCH_DIRECTIVE; + opal_list_append(&codes, &val->super); + /* setup the myinfo object to capture the returned + * values - must do so prior to registering in case + * the event has already arrived */ + OBJ_CONSTRUCT(&myinfo, myinfo_t); + /* go ahead and register */ + opal_pmix.register_evhandler(&codes, NULL, launchhandler, regcbfunc, &lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); + OPAL_LIST_DESTRUCT(&codes); + /* now wait for the launch directives to arrive */ + OPAL_PMIX_WAIT_THREAD(&myinfo.lock); + /* process the returned directives */ + OPAL_LIST_FOREACH(val, &myinfo.info, opal_value_t) { + if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_JOB_DIRECTIVES)) { + /* there will be a pointer to a list containing the directives */ + lt = (opal_list_t*)val->data.ptr; + while (NULL != (kv = (opal_value_t*)opal_list_remove_first(lt))) { + opal_output(0, "JOB DIRECTIVE: %s", kv->key); + opal_list_append(&job_info, &kv->super); + } + } else if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_APP_DIRECTIVES)) { + /* there will be a pointer to a list containing the directives */ + lt = (opal_list_t*)val->data.ptr; + OPAL_LIST_FOREACH(kv, lt, opal_value_t) { + opal_output(0, "APP DIRECTIVE: %s", kv->key); + OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) { + /* the value can only be on one list at a time, so replicate it */ + kv2 = OBJ_NEW(opal_value_t); + opal_value_xfer(kv2, kv); + opal_list_append(&app->info, &kv2->super); + } + } + } + } + } + if (OPAL_SUCCESS != (rc = opal_pmix.spawn(&job_info, &apps, &myjobid))) { opal_output(0, "Job failed to spawn: %s", opal_strerror(rc)); goto DONE; diff --git a/orte/util/attr.c b/orte/util/attr.c index 19d644bf6a..9e8716f092 100644 --- a/orte/util/attr.c +++ b/orte/util/attr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -34,12 +34,6 @@ typedef struct { /* all default to NULL */ static orte_attr_converter_t converters[MAX_CONVERTERS]; -static int orte_attr_unload(orte_attribute_t *kv, - void **data, opal_data_type_t type); - -static int orte_attr_load(orte_attribute_t *kv, - void *data, opal_data_type_t type); - bool orte_get_attribute(opal_list_t *attributes, orte_attribute_key_t key, void **data, opal_data_type_t type) @@ -95,6 +89,81 @@ int orte_set_attribute(opal_list_t *attributes, return ORTE_SUCCESS; } +orte_attribute_t* orte_fetch_attribute(opal_list_t *attributes, + orte_attribute_t *prev, + orte_attribute_key_t key) +{ + orte_attribute_t *kv, *end, *next; + + /* if prev is NULL, then find the first attr on the list + * that matches the key */ + if (NULL == prev) { + OPAL_LIST_FOREACH(kv, attributes, orte_attribute_t) { + if (key == kv->key) { + return kv; + } + } + /* if we get, then the key isn't on the list */ + return NULL; + } + + /* if we are at the end of the list, then nothing to do */ + end = (orte_attribute_t*)opal_list_get_end(attributes); + if (prev == end || end == (orte_attribute_t*)opal_list_get_next(&prev->super) || + NULL == opal_list_get_next(&prev->super)) { + return NULL; + } + + /* starting with the next item on the list, search + * for the next attr with the matching key */ + next = (orte_attribute_t*)opal_list_get_next(&prev->super); + while (NULL != next) { + if (next->key == key) { + return next; + } + next = (orte_attribute_t*)opal_list_get_next(&next->super); + } + + /* if we get here, then no matching key was found */ + return NULL; +} + +int orte_add_attribute(opal_list_t *attributes, + orte_attribute_key_t key, bool local, + void *data, opal_data_type_t type) +{ + orte_attribute_t *kv; + int rc; + + kv = OBJ_NEW(orte_attribute_t); + kv->key = key; + kv->local = local; + if (OPAL_SUCCESS != (rc = orte_attr_load(kv, data, type))) { + OBJ_RELEASE(kv); + return rc; + } + opal_list_append(attributes, &kv->super); + return ORTE_SUCCESS; +} + +int orte_prepend_attribute(opal_list_t *attributes, + orte_attribute_key_t key, bool local, + void *data, opal_data_type_t type) +{ + orte_attribute_t *kv; + int rc; + + kv = OBJ_NEW(orte_attribute_t); + kv->key = key; + kv->local = local; + if (OPAL_SUCCESS != (rc = orte_attr_load(kv, data, type))) { + OBJ_RELEASE(kv); + return rc; + } + opal_list_prepend(attributes, &kv->super); + return ORTE_SUCCESS; +} + void orte_remove_attribute(opal_list_t *attributes, orte_attribute_key_t key) { orte_attribute_t *kv; @@ -170,6 +239,16 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) return "APP-PREFIX-DIR"; case ORTE_APP_NO_CACHEDIR: return "ORTE_APP_NO_CACHEDIR"; + case ORTE_APP_SET_ENVAR: + return "ORTE_APP_SET_ENVAR"; + case ORTE_APP_UNSET_ENVAR: + return "ORTE_APP_UNSET_ENVAR"; + case ORTE_APP_PREPEND_ENVAR: + return "ORTE_APP_PREPEND_ENVAR"; + case ORTE_APP_APPEND_ENVAR: + return "ORTE_APP_APPEND_ENVAR"; + case ORTE_APP_ADD_ENVAR: + return "ORTE_APP_ADD_ENVAR"; case ORTE_NODE_USERNAME: return "NODE-USERNAME"; @@ -290,6 +369,18 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) return "ORTE_JOB_FULLY_DESCRIBED"; case ORTE_JOB_SILENT_TERMINATION: return "ORTE_JOB_SILENT_TERMINATION"; + case ORTE_JOB_SET_ENVAR: + return "ORTE_JOB_SET_ENVAR"; + case ORTE_JOB_UNSET_ENVAR: + return "ORTE_JOB_UNSET_ENVAR"; + case ORTE_JOB_PREPEND_ENVAR: + return "ORTE_JOB_PREPEND_ENVAR"; + case ORTE_JOB_APPEND_ENVAR: + return "ORTE_JOB_APPEND_ENVAR"; + case ORTE_JOB_ADD_ENVAR: + return "ORTE_APP_ADD_ENVAR"; + case ORTE_JOB_APP_SETUP_DATA: + return "ORTE_JOB_APP_SETUP_DATA"; case ORTE_PROC_NOBARRIER: return "PROC-NOBARRIER"; @@ -360,11 +451,12 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) } -static int orte_attr_load(orte_attribute_t *kv, - void *data, opal_data_type_t type) +int orte_attr_load(orte_attribute_t *kv, + void *data, opal_data_type_t type) { opal_byte_object_t *boptr; struct timeval *tv; + opal_envar_t *envar; kv->type = type; if (NULL == data) { @@ -485,6 +577,18 @@ static int orte_attr_load(orte_attribute_t *kv, kv->data.name = *(opal_process_name_t *)data; break; + case OPAL_ENVAR: + OBJ_CONSTRUCT(&kv->data.envar, opal_envar_t); + envar = (opal_envar_t*)data; + if (NULL != envar->envar) { + kv->data.envar.envar = strdup(envar->envar); + } + if (NULL != envar->value) { + kv->data.envar.value = strdup(envar->value); + } + kv->data.envar.separator = envar->separator; + break; + default: OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); return OPAL_ERR_NOT_SUPPORTED; @@ -492,10 +596,11 @@ static int orte_attr_load(orte_attribute_t *kv, return OPAL_SUCCESS; } -static int orte_attr_unload(orte_attribute_t *kv, - void **data, opal_data_type_t type) +int orte_attr_unload(orte_attribute_t *kv, + void **data, opal_data_type_t type) { opal_byte_object_t *boptr; + opal_envar_t *envar; if (type != kv->type) { return OPAL_ERR_TYPE_MISMATCH; @@ -603,6 +708,18 @@ static int orte_attr_unload(orte_attribute_t *kv, memcpy(*data, &kv->data.name, sizeof(orte_process_name_t)); break; + case OPAL_ENVAR: + envar = OBJ_NEW(opal_envar_t); + if (NULL != kv->data.envar.envar) { + envar->envar = strdup(kv->data.envar.envar); + } + if (NULL != kv->data.envar.value) { + envar->value = strdup(kv->data.envar.value); + } + envar->separator = kv->data.envar.separator; + *data = envar; + break; + default: OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); return OPAL_ERR_NOT_SUPPORTED; diff --git a/orte/util/attr.h b/orte/util/attr.h index b1b9b224ea..8393dc9a2d 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -47,6 +47,11 @@ typedef uint8_t orte_app_context_flags_t; #define ORTE_APP_MAX_PPN 14 // uint32 - maximum number of procs/node for this app #define ORTE_APP_PREFIX_DIR 15 // string - prefix directory for this app, if override necessary #define ORTE_APP_NO_CACHEDIR 16 // bool - flag that a cache dir is not to be specified for a Singularity container +#define ORTE_APP_SET_ENVAR 17 // opal_envar_t - set the given envar to the specified value +#define ORTE_APP_UNSET_ENVAR 18 // string - name of envar to unset, if present +#define ORTE_APP_PREPEND_ENVAR 19 // opal_envar_t - prepend the specified value to the given envar +#define ORTE_APP_APPEND_ENVAR 20 // opal_envar_t - append the specified value to the given envar +#define ORTE_APP_ADD_ENVAR 21 // opal_envar_t - add envar, do not override pre-existing one #define ORTE_APP_MAX_KEY 100 @@ -146,6 +151,12 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_FULLY_DESCRIBED (ORTE_JOB_START_KEY + 53) // bool - job is fully described in launch msg #define ORTE_JOB_SILENT_TERMINATION (ORTE_JOB_START_KEY + 54) // bool - do not generate an event notification when job // normally terminates +#define ORTE_JOB_SET_ENVAR (ORTE_JOB_START_KEY + 55) // opal_envar_t - set the given envar to the specified value +#define ORTE_JOB_UNSET_ENVAR (ORTE_JOB_START_KEY + 56) // string - name of envar to unset, if present +#define ORTE_JOB_PREPEND_ENVAR (ORTE_JOB_START_KEY + 57) // opal_envar_t - prepend the specified value to the given envar +#define ORTE_JOB_APPEND_ENVAR (ORTE_JOB_START_KEY + 58) // opal_envar_t - append the specified value to the given envar +#define ORTE_JOB_ADD_ENVAR (ORTE_JOB_START_KEY + 59) // opal_envar_t - add envar, do not override pre-existing one +#define ORTE_JOB_APP_SETUP_DATA (ORTE_JOB_START_KEY + 60) // opal_byte_object_t - blob containing app setup data #define ORTE_JOB_MAX_KEY 300 @@ -221,6 +232,24 @@ ORTE_DECLSPEC int orte_set_attribute(opal_list_t *attributes, orte_attribute_key /* Remove the named attribute from a list */ ORTE_DECLSPEC void orte_remove_attribute(opal_list_t *attributes, orte_attribute_key_t key); +ORTE_DECLSPEC orte_attribute_t* orte_fetch_attribute(opal_list_t *attributes, + orte_attribute_t *prev, + orte_attribute_key_t key); + +ORTE_DECLSPEC int orte_add_attribute(opal_list_t *attributes, + orte_attribute_key_t key, bool local, + void *data, opal_data_type_t type); + +ORTE_DECLSPEC int orte_prepend_attribute(opal_list_t *attributes, + orte_attribute_key_t key, bool local, + void *data, opal_data_type_t type); + +ORTE_DECLSPEC int orte_attr_load(orte_attribute_t *kv, + void *data, opal_data_type_t type); + +ORTE_DECLSPEC int orte_attr_unload(orte_attribute_t *kv, + void **data, opal_data_type_t type); + /* * Register a handler for converting attr keys to strings * diff --git a/orte/util/error_strings.c b/orte/util/error_strings.c index 30fc3c5182..a2acad8339 100644 --- a/orte/util/error_strings.c +++ b/orte/util/error_strings.c @@ -12,7 +12,7 @@ * Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -253,6 +253,8 @@ const char *orte_job_state_to_str(orte_job_state_t state) return "VM READY"; case ORTE_JOB_STATE_LAUNCH_APPS: return "PENDING APP LAUNCH"; + case ORTE_JOB_STATE_SEND_LAUNCH_MSG: + return "SENDING LAUNCH MSG"; case ORTE_JOB_STATE_RUNNING: return "RUNNING"; case ORTE_JOB_STATE_SUSPENDED: From 17c40f4cea96b2b94eb1d6d50b2ccf747f1341fc Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 28 Feb 2018 07:52:11 -0800 Subject: [PATCH 2/4] Implement support for proctable queries Signed-off-by: Ralph Castain --- .../pmix/pmix3x/pmix/include/pmix_common.h.in | 38 ++-- .../pmix/pmix3x/pmix/src/common/pmix_query.c | 3 + .../pmix3x/pmix/src/common/pmix_strings.c | 4 + .../src/mca/bfrops/base/bfrop_base_copy.c | 5 +- opal/mca/pmix/pmix3x/pmix3x.c | 163 +++++++++++++++++- opal/mca/pmix/pmix3x/pmix3x.h | 5 + opal/mca/pmix/pmix3x/pmix3x_server_north.c | 19 +- opal/mca/pmix/pmix_types.h | 2 +- orte/orted/pmix/pmix_server_gen.c | 107 +++++++++++- 9 files changed, 320 insertions(+), 26 deletions(-) diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in index 8ec8d8db13..26271d8b29 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in @@ -573,11 +573,13 @@ typedef uint8_t pmix_proc_state_t; #define PMIX_PROC_STATE_ABORTED_BY_SIG (PMIX_PROC_STATE_ERROR + 4) /* process aborted by signal */ #define PMIX_PROC_STATE_TERM_WO_SYNC (PMIX_PROC_STATE_ERROR + 5) /* process exit'd w/o calling PMIx_Finalize */ #define PMIX_PROC_STATE_COMM_FAILED (PMIX_PROC_STATE_ERROR + 6) /* process communication has failed */ -#define PMIX_PROC_STATE_CALLED_ABORT (PMIX_PROC_STATE_ERROR + 7) /* process called "PMIx_Abort" */ -#define PMIX_PROC_STATE_MIGRATING (PMIX_PROC_STATE_ERROR + 8) /* process failed and is waiting for resources before restarting */ -#define PMIX_PROC_STATE_CANNOT_RESTART (PMIX_PROC_STATE_ERROR + 9) /* process failed and cannot be restarted */ -#define PMIX_PROC_STATE_TERM_NON_ZERO (PMIX_PROC_STATE_ERROR + 10) /* process exited with a non-zero status, indicating abnormal */ -#define PMIX_PROC_STATE_FAILED_TO_LAUNCH (PMIX_PROC_STATE_ERROR + 11) /* unable to launch process */ +#define PMIX_PROC_STATE_SENSOR_BOUND_EXCEEDED (PMIX_PROC_STATE_ERROR + 7) /* process exceeded a sensor limit */ +#define PMIX_PROC_STATE_CALLED_ABORT (PMIX_PROC_STATE_ERROR + 8) /* process called "PMIx_Abort" */ +#define PMIX_PROC_STATE_HEARTBEAT_FAILED (PMIX_PROC_STATE_ERROR + 9) /* process failed to send heartbeat w/in time limit */ +#define PMIX_PROC_STATE_MIGRATING (PMIX_PROC_STATE_ERROR + 10) /* process failed and is waiting for resources before restarting */ +#define PMIX_PROC_STATE_CANNOT_RESTART (PMIX_PROC_STATE_ERROR + 11) /* process failed and cannot be restarted */ +#define PMIX_PROC_STATE_TERM_NON_ZERO (PMIX_PROC_STATE_ERROR + 12) /* process exited with a non-zero status, indicating abnormal */ +#define PMIX_PROC_STATE_FAILED_TO_LAUNCH (PMIX_PROC_STATE_ERROR + 13) /* unable to launch process */ /**** PMIX ERROR CONSTANTS ****/ @@ -1356,16 +1358,20 @@ struct pmix_info_t { } \ } while (0) -#define PMIX_INFO_LOAD(m, k, v, t) \ - do { \ - (void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ - pmix_value_load(&((m)->value), (v), (t)); \ - } while (0) -#define PMIX_INFO_XFER(d, s) \ +#define PMIX_INFO_LOAD(m, k, v, t) \ do { \ - (void)strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ - (d)->flags = (s)->flags; \ - pmix_value_xfer(&(d)->value, &(s)->value); \ + if (NULL != (k)) { \ + (void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ + } \ + pmix_value_load(&((m)->value), (v), (t)); \ + } while (0) +#define PMIX_INFO_XFER(d, s) \ + do { \ + if (NULL != (s)->key) { \ + (void)strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ + } \ + (d)->flags = (s)->flags; \ + pmix_value_xfer(&(d)->value, &(s)->value); \ } while(0) #define PMIX_INFO_REQUIRED(m) \ @@ -1386,7 +1392,9 @@ struct pmix_info_t { (r) = PMIX_ERR_NOMEM; \ break; \ } \ - _kv->key = strdup(_info[_n].key); \ + if (NULL != _info[_n].key) { \ + _kv->key = strdup(_info[_n].key); \ + } \ PMIX_VALUE_XFER((r), _kv->value, &_info[_n].value);\ if (PMIX_SUCCESS != (r)) { \ PMIX_RELEASE(_kv); \ diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c index 5aa14a532a..77082041d4 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c @@ -63,6 +63,7 @@ static void query_cbfunc(struct pmix_peer_t *peer, PMIX_BFROPS_UNPACK(rc, peer, buf, &results->status, &cnt, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + results->status = rc; goto complete; } if (PMIX_SUCCESS != results->status) { @@ -74,6 +75,7 @@ static void query_cbfunc(struct pmix_peer_t *peer, PMIX_BFROPS_UNPACK(rc, peer, buf, &results->ninfo, &cnt, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + results->status = rc; goto complete; } if (0 < results->ninfo) { @@ -82,6 +84,7 @@ static void query_cbfunc(struct pmix_peer_t *peer, PMIX_BFROPS_UNPACK(rc, peer, buf, results->info, &cnt, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + results->status = rc; goto complete; } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c index 18ae2a34d1..7d8d81fd23 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c @@ -71,8 +71,12 @@ PMIX_EXPORT const char* PMIx_Proc_state_string(pmix_proc_state_t state) return "PROC TERMINATED WITHOUT CALLING PMIx_Finalize"; case PMIX_PROC_STATE_COMM_FAILED: return "PROC LOST COMMUNICATION"; + case PMIX_PROC_STATE_SENSOR_BOUND_EXCEEDED: + return "PROC SENSOR BOUND EXCEEDED"; case PMIX_PROC_STATE_CALLED_ABORT: return "PROC CALLED PMIx_Abort"; + case PMIX_PROC_STATE_HEARTBEAT_FAILED: + return "PROC FAILED TO REPORT HEARTBEAT"; case PMIX_PROC_STATE_MIGRATING: return "PROC WAITING TO MIGRATE"; case PMIX_PROC_STATE_CANNOT_RESTART: diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c index f2a9e9df81..08913f968d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c @@ -373,6 +373,7 @@ pmix_status_t pmix_bfrops_base_copy_pinfo(pmix_proc_info_t **dest, if (NULL == p) { return PMIX_ERR_NOMEM; } + memcpy(&p->proc, &src->proc, sizeof(pmix_proc_t)); if (NULL != src->hostname) { p->hostname = strdup(src->hostname); } @@ -623,7 +624,7 @@ pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, p1 = (pmix_info_t*)p->array; s1 = (pmix_info_t*)src->array; for (n=0; n < src->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); + PMIX_INFO_XFER(&p1[n], &s1[n]); } break; case PMIX_PDATA: @@ -635,7 +636,7 @@ pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, pd = (pmix_pdata_t*)p->array; sd = (pmix_pdata_t*)src->array; for (n=0; n < src->size; n++) { - PMIX_PDATA_LOAD(&pd[n], &sd[n].proc, sd[n].key, &sd[n].value.data.flag, sd[n].value.type); + PMIX_PDATA_XFER(&pd[n], &sd[n]); } break; case PMIX_BUFFER: diff --git a/opal/mca/pmix/pmix3x/pmix3x.c b/opal/mca/pmix/pmix3x/pmix3x.c index 1f3b5fbff9..b928770c71 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.c +++ b/opal/mca/pmix/pmix3x/pmix3x.c @@ -909,7 +909,9 @@ void pmix3x_value_load(pmix_value_t *v, v->data.darray->array = info; n=0; OPAL_LIST_FOREACH(val, list, opal_value_t) { - (void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN); + if (NULL != val->key) { + (void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN); + } pmix3x_value_load(&info[n].value, val); ++n; } @@ -917,6 +919,32 @@ void pmix3x_value_load(pmix_value_t *v, v->data.darray->array = NULL; } break; + case OPAL_PROC_INFO: + v->type = PMIX_PROC_INFO; + PMIX_PROC_INFO_CREATE(v->data.pinfo, 1); + /* see if this job is in our list of known nspaces */ + found = false; + OPAL_LIST_FOREACH(job, &mca_pmix_pmix3x_component.jobids, opal_pmix3x_jobid_trkr_t) { + if (job->jobid == kv->data.pinfo.name.jobid) { + (void)strncpy(v->data.pinfo->proc.nspace, job->nspace, PMIX_MAX_NSLEN); + found = true; + break; + } + } + if (!found) { + (void)opal_snprintf_jobid(v->data.pinfo->proc.nspace, PMIX_MAX_NSLEN, kv->data.pinfo.name.jobid); + } + v->data.pinfo->proc.rank = pmix3x_convert_opalrank(kv->data.pinfo.name.vpid); + if (NULL != kv->data.pinfo.hostname) { + v->data.pinfo->hostname = strdup(kv->data.pinfo.hostname); + } + if (NULL != kv->data.pinfo.executable_name) { + v->data.pinfo->executable_name = strdup(kv->data.pinfo.executable_name); + } + v->data.pinfo->pid = kv->data.pinfo.pid; + v->data.pinfo->exit_code = kv->data.pinfo.exit_code; + v->data.pinfo->state = pmix3x_convert_opalstate(kv->data.pinfo.state); + break; case OPAL_ENVAR: v->type = PMIX_ENVAR; PMIX_ENVAR_CONSTRUCT(&v->data.envar); @@ -1099,7 +1127,9 @@ int pmix3x_value_unload(opal_value_t *kv, /* handle the various types */ if (PMIX_INFO == v->data.darray->type) { pmix_info_t *iptr = (pmix_info_t*)v->data.darray->array; - ival->key = strdup(iptr[n].key); + if (NULL != iptr[n].key) { + ival->key = strdup(iptr[n].key); + } rc = pmix3x_value_unload(ival, &iptr[n].value); if (OPAL_SUCCESS != rc) { OPAL_LIST_RELEASE(lt); @@ -1110,6 +1140,37 @@ int pmix3x_value_unload(opal_value_t *kv, } } break; + case PMIX_PROC_INFO: + kv->type = OPAL_PROC_INFO; + if (NULL == v->data.pinfo) { + rc = OPAL_ERR_BAD_PARAM; + break; + } + /* see if this job is in our list of known nspaces */ + found = false; + OPAL_LIST_FOREACH(job, &mca_pmix_pmix3x_component.jobids, opal_pmix3x_jobid_trkr_t) { + if (0 == strncmp(job->nspace, v->data.pinfo->proc.nspace, PMIX_MAX_NSLEN)) { + kv->data.pinfo.name.jobid = job->jobid; + found = true; + break; + } + } + if (!found) { + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&kv->data.pinfo.name.jobid, v->data.pinfo->proc.nspace))) { + return pmix3x_convert_opalrc(rc); + } + } + kv->data.pinfo.name.vpid = pmix3x_convert_rank(v->data.pinfo->proc.rank); + if (NULL != v->data.pinfo->hostname) { + kv->data.pinfo.hostname = strdup(v->data.pinfo->hostname); + } + if (NULL != v->data.pinfo->executable_name) { + kv->data.pinfo.executable_name = strdup(v->data.pinfo->executable_name); + } + kv->data.pinfo.pid = v->data.pinfo->pid; + kv->data.pinfo.exit_code = v->data.pinfo->exit_code; + kv->data.pinfo.state = pmix3x_convert_state(v->data.pinfo->state); + break; case PMIX_ENVAR: kv->type = OPAL_ENVAR; OBJ_CONSTRUCT(&kv->data.envar, opal_envar_t); @@ -1347,6 +1408,7 @@ static void infocbfunc(pmix_status_t status, opal_list_append(results, &iptr->super); iptr->key = strdup(info[n].key); if (OPAL_SUCCESS != (rc = pmix3x_value_unload(iptr, &info[n].value))) { + OPAL_ERROR_LOG(rc); OPAL_LIST_RELEASE(results); results = NULL; break; @@ -1510,6 +1572,103 @@ opal_pmix_alloc_directive_t pmix3x_convert_allocdir(pmix_alloc_directive_t dir) } } +int pmix3x_convert_state(pmix_proc_state_t state) +{ + switch(state) { + case PMIX_PROC_STATE_UNDEF: + return 0; + case PMIX_PROC_STATE_PREPPED: + case PMIX_PROC_STATE_LAUNCH_UNDERWAY: + return 1; + case PMIX_PROC_STATE_RESTART: + return 2; + case PMIX_PROC_STATE_TERMINATE: + return 3; + case PMIX_PROC_STATE_RUNNING: + return 4; + case PMIX_PROC_STATE_CONNECTED: + return 5; + case PMIX_PROC_STATE_UNTERMINATED: + return 15; + case PMIX_PROC_STATE_TERMINATED: + return 20; + case PMIX_PROC_STATE_KILLED_BY_CMD: + return 51; + case PMIX_PROC_STATE_ABORTED: + return 52; + case PMIX_PROC_STATE_FAILED_TO_START: + return 53; + case PMIX_PROC_STATE_ABORTED_BY_SIG: + return 54; + case PMIX_PROC_STATE_TERM_WO_SYNC: + return 55; + case PMIX_PROC_STATE_COMM_FAILED: + return 56; + case PMIX_PROC_STATE_SENSOR_BOUND_EXCEEDED: + return 57; + case PMIX_PROC_STATE_CALLED_ABORT: + return 58; + case PMIX_PROC_STATE_HEARTBEAT_FAILED: + return 59; + case PMIX_PROC_STATE_MIGRATING: + return 60; + case PMIX_PROC_STATE_CANNOT_RESTART: + return 61; + case PMIX_PROC_STATE_TERM_NON_ZERO: + return 62; + case PMIX_PROC_STATE_FAILED_TO_LAUNCH: + return 63; + default: + return 0; // undef + } +} + +pmix_proc_state_t pmix3x_convert_opalstate(int state) +{ + switch(state) { + case 0: + return PMIX_PROC_STATE_UNDEF; + case 1: + return PMIX_PROC_STATE_LAUNCH_UNDERWAY; + case 2: + return PMIX_PROC_STATE_RESTART; + case 3: + return PMIX_PROC_STATE_TERMINATE; + case 4: + return PMIX_PROC_STATE_RUNNING; + case 5: + return PMIX_PROC_STATE_CONNECTED; + case 51: + return PMIX_PROC_STATE_KILLED_BY_CMD; + case 52: + return PMIX_PROC_STATE_ABORTED; + case 53: + return PMIX_PROC_STATE_FAILED_TO_START; + case 54: + return PMIX_PROC_STATE_ABORTED_BY_SIG; + case 55: + return PMIX_PROC_STATE_TERM_WO_SYNC; + case 56: + return PMIX_PROC_STATE_COMM_FAILED; + case 57: + return PMIX_PROC_STATE_SENSOR_BOUND_EXCEEDED; + case 58: + return PMIX_PROC_STATE_CALLED_ABORT; + case 59: + return PMIX_PROC_STATE_HEARTBEAT_FAILED; + case 60: + return PMIX_PROC_STATE_MIGRATING; + case 61: + return PMIX_PROC_STATE_CANNOT_RESTART; + case 62: + return PMIX_PROC_STATE_TERM_NON_ZERO; + case 63: + return PMIX_PROC_STATE_FAILED_TO_LAUNCH; + default: + return PMIX_PROC_STATE_UNDEF; + } +} + /**** INSTANTIATE INTERNAL CLASSES ****/ OBJ_CLASS_INSTANCE(opal_pmix3x_jobid_trkr_t, opal_list_item_t, diff --git a/opal/mca/pmix/pmix3x/pmix3x.h b/opal/mca/pmix/pmix3x/pmix3x.h index 4c56afc7cf..b3cf4de76c 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.h +++ b/opal/mca/pmix/pmix3x/pmix3x.h @@ -342,6 +342,11 @@ OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix3x_convert_allocdir(pmix_al OPAL_MODULE_DECLSPEC char* pmix3x_convert_jobid(opal_jobid_t jobid); +OPAL_MODULE_DECLSPEC int pmix3x_convert_state(pmix_proc_state_t state); + +OPAL_MODULE_DECLSPEC pmix_proc_state_t pmix3x_convert_opalstate(int state); + + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/pmix3x/pmix3x_server_north.c b/opal/mca/pmix/pmix3x/pmix3x_server_north.c index c86aa30b36..75261c7c46 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_server_north.c +++ b/opal/mca/pmix/pmix3x/pmix3x_server_north.c @@ -954,6 +954,7 @@ static void info_cbfunc(int status, OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix3x_value_load(&pcaddy->info[n].value, kv); + ++n; } } /* we are done with the incoming data */ @@ -1012,10 +1013,20 @@ static pmix_status_t server_query(pmix_proc_t *proct, for (m=0; m < queries[n].nqual; m++) { oinfo = OBJ_NEW(opal_value_t); opal_list_append(&q->qualifiers, &oinfo->super); - oinfo->key = strdup(queries[n].qualifiers[m].key); - if (OPAL_SUCCESS != (rc = pmix3x_value_unload(oinfo, &queries[n].qualifiers[m].value))) { - OBJ_RELEASE(opalcaddy); - return pmix3x_convert_opalrc(rc); + + if (0 == strcmp(queries[n].qualifiers[m].key, PMIX_NSPACE)) { + /* must convert this to jobid */ + oinfo->key = strdup(OPAL_PMIX_PROCID); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&oinfo->data.name.jobid, queries[n].qualifiers[m].value.data.string))) { + OBJ_RELEASE(opalcaddy); + return pmix3x_convert_opalrc(rc); + } + } else { + oinfo->key = strdup(queries[n].qualifiers[m].key); + if (OPAL_SUCCESS != (rc = pmix3x_value_unload(oinfo, &queries[n].qualifiers[m].value))) { + OBJ_RELEASE(opalcaddy); + return pmix3x_convert_opalrc(rc); + } } } } diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index 456a462032..d2643b2e54 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -534,7 +534,7 @@ OBJ_CLASS_DECLARATION(opal_pmix_modex_data_t); typedef struct { opal_list_item_t super; char **keys; - opal_list_t qualifiers; + opal_list_t qualifiers; // list of opal_value_t } opal_pmix_query_t; OBJ_CLASS_DECLARATION(opal_pmix_query_t); diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 13f0a6c2d5..c5ec718484 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -465,9 +465,11 @@ static void _query(int sd, short args, void *cbdata) orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; opal_pmix_query_t *q; opal_value_t *kv; + orte_jobid_t jobid; orte_job_t *jdata; orte_proc_t *proct; - int rc, i, num_replies; + orte_app_context_t *app; + int rc, i, k, num_replies; opal_list_t *results, targets, *array; size_t n; uint32_t key; @@ -683,10 +685,111 @@ static void _query(int sd, short args, void *cbdata) kv->type = OPAL_STRING; kv->data.string = strdup(orte_process_info.my_hnp_uri); opal_list_append(results, &kv->super); + } else if (0 == strcmp(q->keys[n], OPAL_PMIX_QUERY_PROC_TABLE)) { + /* the job they are asking about is in the qualifiers */ + jobid = ORTE_JOBID_INVALID; + OPAL_LIST_FOREACH(kv, &q->qualifiers, opal_value_t) { + if (0 == strcmp(kv->key, OPAL_PMIX_PROCID)) { + /* save the id */ + jobid = kv->data.name.jobid; + break; + } + } + if (ORTE_JOBID_INVALID == jobid) { + rc = ORTE_ERR_BAD_PARAM; + goto done; + } + /* construct a list of values with opal_proc_info_t + * entries for each proc in the indicated job */ + jdata = orte_get_job_data_object(jobid); + if (NULL == jdata) { + rc = ORTE_ERR_NOT_FOUND; + goto done; + } + /* setup the reply */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_QUERY_PROC_TABLE); + kv->type = OPAL_PTR; + array = OBJ_NEW(opal_list_t); + kv->data.ptr = array; + opal_list_append(results, &kv->super); + /* cycle thru the job and create an entry for each proc */ + for (k=0; k < jdata->procs->size; k++) { + if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, k))) { + continue; + } + kv = OBJ_NEW(opal_value_t); + kv->type = OPAL_PROC_INFO; + kv->data.pinfo.name.jobid = jobid; + kv->data.pinfo.name.vpid = proct->name.vpid; + if (NULL != proct->node && NULL != proct->node->name) { + kv->data.pinfo.hostname = strdup(proct->node->name); + } + app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proct->app_idx); + if (NULL != app && NULL != app->app) { + kv->data.pinfo.executable_name = strdup(app->app); + } + kv->data.pinfo.pid = proct->pid; + kv->data.pinfo.exit_code = proct->exit_code; + kv->data.pinfo.state = proct->state; + opal_list_append(array, &kv->super); + } + } else if (0 == strcmp(q->keys[n], OPAL_PMIX_QUERY_LOCAL_PROC_TABLE)) { + /* the job they are asking about is in the qualifiers */ + jobid = ORTE_JOBID_INVALID; + OPAL_LIST_FOREACH(kv, &q->qualifiers, opal_value_t) { + if (0 == strcmp(kv->key, OPAL_PMIX_PROCID)) { + /* save the id */ + jobid = kv->data.name.jobid; + break; + } + } + if (ORTE_JOBID_INVALID == jobid) { + rc = ORTE_ERR_BAD_PARAM; + goto done; + } + /* construct a list of values with opal_proc_info_t + * entries for each LOCAL proc in the indicated job */ + jdata = orte_get_job_data_object(jobid); + if (NULL == jdata) { + rc = ORTE_ERR_NOT_FOUND; + goto done; + } + /* setup the reply */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_QUERY_LOCAL_PROC_TABLE); + kv->type = OPAL_PTR; + array = OBJ_NEW(opal_list_t); + kv->data.ptr = array; + opal_list_append(results, &kv->super); + /* cycle thru the job and create an entry for each proc */ + for (k=0; k < jdata->procs->size; k++) { + if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, k))) { + continue; + } + if (ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_LOCAL)) { + kv = OBJ_NEW(opal_value_t); + kv->type = OPAL_PROC_INFO; + kv->data.pinfo.name.jobid = jobid; + kv->data.pinfo.name.vpid = proct->name.vpid; + if (NULL != proct->node && NULL != proct->node->name) { + kv->data.pinfo.hostname = strdup(proct->node->name); + } + app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proct->app_idx); + if (NULL != app && NULL != app->app) { + kv->data.pinfo.executable_name = strdup(app->app); + } + kv->data.pinfo.pid = proct->pid; + kv->data.pinfo.exit_code = proct->exit_code; + kv->data.pinfo.state = proct->state; + opal_list_append(array, &kv->super); + } + } } } } + done: if (0 == opal_list_get_size(results)) { rc = ORTE_ERR_NOT_FOUND; } else if (opal_list_get_size(results) < opal_list_get_size(cd->info)) { From 72410438090bf5aff9ce678c45dcb8141a8e86e3 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 1 Mar 2018 19:35:16 -0800 Subject: [PATCH 3/4] Modify the internal logic for resolve nodes/peers The current code path for PMIx_Resolve_peers and PMIx_Resolve_nodes executes a threadshift in the preg components themselves. This is done to ensure thread safety when called from the user level. However, it causes thread-stall when someone attempts to call the regex functions from _inside_ the PMIx code base should the call occur from within an event. Accordingly, move the threadshift to the client-level functions and make the preg components just execute their algorithms. Create a new pnet/test component to verify that the prge code can be safely accessed - set that component to be selected only when the user directly specifies it. The new component will be used to validate various logical extensions during development, and can then be discarded. Signed-off-by: Ralph Castain (cherry picked from commit 456ac7f7af3d9ba09888e3c899eb001daaa24aef) --- .../pmix/pmix3x/pmix/src/client/pmix_client.c | 112 +++++++- .../pmix3x/pmix/src/mca/gds/hash/gds_hash.c | 16 +- .../pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c | 6 + .../pmix3x/pmix/src/mca/pnet/test/Makefile.am | 52 ++++ .../pmix3x/pmix/src/mca/pnet/test/pnet_test.c | 221 ++++++++++++++++ .../pmix3x/pmix/src/mca/pnet/test/pnet_test.h | 36 +++ .../src/mca/pnet/test/pnet_test_component.c | 101 +++++++ .../pmix/src/mca/preg/native/preg_native.c | 246 ++++++------------ orte/mca/odls/base/odls_base_default_fns.c | 38 +-- 9 files changed, 642 insertions(+), 186 deletions(-) create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.h create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test_component.c diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c index 808f52e5c6..e17aac7720 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c @@ -1175,11 +1175,27 @@ static void _commitfn(int sd, short args, void *cbdata) return rc; } +static void _resolve_peers(int sd, short args, void *cbdata) +{ + pmix_cb_t *cb = (pmix_cb_t*)cbdata; + pmix_status_t rc; + + cb->status = pmix_preg.resolve_peers(cb->key, cb->pname.nspace, + &cb->procs, &cb->nprocs); + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); +} + /* need to thread-shift this request */ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, pmix_proc_t **procs, size_t *nprocs) { + pmix_cb_t *cb; + pmix_status_t rc; + pmix_proc_t proc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -1187,16 +1203,71 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, } PMIX_RELEASE_THREAD(&pmix_global_lock); - /* set default */ - *procs = NULL; - *nprocs = 0; - return pmix_preg.resolve_peers(nodename, nspace, procs, nprocs); + cb = PMIX_NEW(pmix_cb_t); + cb->key = (char*)nodename; + cb->pname.nspace = strdup(nspace); + + PMIX_THREADSHIFT(cb, _resolve_peers); + + /* wait for the result */ + PMIX_WAIT_THREAD(&cb->lock); + + /* if the nspace wasn't found, then we need to + * ask the server for that info */ + if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + /* any key will suffice as it will bring down + * the entire data blob */ + rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cb); + return rc; + } + /* retry the fetch */ + cb->lock.active = true; + PMIX_THREADSHIFT(cb, _resolve_peers); + PMIX_WAIT_THREAD(&cb->lock); + } + *procs = cb->procs; + *nprocs = cb->nprocs; + + rc = cb->status; + PMIX_RELEASE(cb); + return rc; +} + +static void _resolve_nodes(int fd, short args, void *cbdata) +{ + pmix_cb_t *cb = (pmix_cb_t*)cbdata; + char *regex, **names; + + /* get a regular expression describing the PMIX_NODE_MAP */ + cb->status = pmix_preg.resolve_nodes(cb->pname.nspace, ®ex); + if (PMIX_SUCCESS == cb->status) { + /* parse it into an argv array of names */ + cb->status = pmix_preg.parse_nodes(regex, &names); + if (PMIX_SUCCESS == cb->status) { + /* assemble it into a comma-delimited list */ + cb->key = pmix_argv_join(names, ','); + pmix_argv_free(names); + } else { + free(regex); + } + } + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } /* need to thread-shift this request */ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) { + pmix_cb_t *cb; + pmix_status_t rc; + pmix_proc_t proc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -1204,8 +1275,35 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist } PMIX_RELEASE_THREAD(&pmix_global_lock); - /* set default */ - *nodelist = NULL; + cb = PMIX_NEW(pmix_cb_t); + cb->pname.nspace = strdup(nspace); - return pmix_preg.resolve_nodes(nspace, nodelist); + PMIX_THREADSHIFT(cb, _resolve_nodes); + + /* wait for the result */ + PMIX_WAIT_THREAD(&cb->lock); + + /* if the nspace wasn't found, then we need to + * ask the server for that info */ + if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + /* any key will suffice as it will bring down + * the entire data blob */ + rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cb); + return rc; + } + /* retry the fetch */ + cb->lock.active = true; + PMIX_THREADSHIFT(cb, _resolve_nodes); + PMIX_WAIT_THREAD(&cb->lock); + } + /* the string we want is in the key field */ + *nodelist = cb->key; + + rc = cb->status; + PMIX_RELEASE(cb); + return rc; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c index 95bcec0a08..d005d47203 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c @@ -333,7 +333,8 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, } /* store the comma-delimited list of nodes hosting - * procs in this nspace */ + * procs in this nspace in case someone using PMIx v2 + * requests it */ kp2 = PMIX_NEW(pmix_kval_t); kp2->key = strdup(PMIX_NODE_LIST); kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); @@ -397,6 +398,19 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, ht = &trk->internal; for (n=0; n < ninfo; n++) { if (0 == strcmp(info[n].key, PMIX_NODE_MAP)) { + /* store the node map itself since that is + * what v3 uses */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_MAP); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(info[n].value.data.string); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + /* parse the regex to get the argv array of node names */ if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(info[n].value.data.string, &nodes))) { PMIX_ERROR_LOG(rc); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c index 53ad2e5973..d482832c9a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c @@ -37,6 +37,7 @@ #include "src/util/error.h" #include "src/util/output.h" #include "src/util/pmix_environ.h" +#include "src/mca/preg/preg.h" #include "src/mca/pnet/pnet.h" #include "src/mca/pnet/base/base.h" @@ -298,6 +299,10 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, size_t n; pmix_status_t rc; pmix_kval_t *kv; + char *nodestring, **nodes; + pmix_proc_t *procs; + size_t nprocs; + if (NULL != info) { for (n=0; n < ninfo; n++) { @@ -321,6 +326,7 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, } } } + return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am new file mode 100644 index 0000000000..3faf68a32c --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am @@ -0,0 +1,52 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers = pnet_test.h +sources = \ + pnet_test_component.c \ + pnet_test.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_pnet_test_DSO +lib = +lib_sources = +component = mca_pnet_test.la +component_sources = $(headers) $(sources) +else +lib = libmca_pnet_test.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_pnet_test_la_SOURCES = $(component_sources) +mca_pnet_test_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(lib) +libmca_pnet_test_la_SOURCES = $(lib_sources) +libmca_pnet_test_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c new file mode 100644 index 0000000000..2beb521b0b --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#include + +#include + +#include "src/mca/base/pmix_mca_base_var.h" +#include "src/include/pmix_socket_errno.h" +#include "src/include/pmix_globals.h" +#include "src/class/pmix_list.h" +#include "src/util/alfg.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/pmix_environ.h" +#include "src/mca/preg/preg.h" + +#include "src/mca/pnet/pnet.h" +#include "src/mca/pnet/base/base.h" +#include "pnet_test.h" + +static pmix_status_t test_init(void); +static void test_finalize(void); +static pmix_status_t setup_app(pmix_nspace_t *nptr, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist); +static pmix_status_t setup_local_network(pmix_nspace_t *nptr, + pmix_info_t info[], + size_t ninfo); +static pmix_status_t setup_fork(pmix_nspace_t *nptr, char ***env); +static void child_finalized(pmix_peer_t *peer); +static void local_app_finalized(char *nspace); + +pmix_pnet_module_t pmix_test_module = { + .init = test_init, + .finalize = test_finalize, + .setup_app = setup_app, + .setup_local_network = setup_local_network, + .setup_fork = setup_fork, + .child_finalized = child_finalized, + .local_app_finalized = local_app_finalized +}; + +static pmix_status_t test_init(void) +{ + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: test init"); + return PMIX_SUCCESS; +} + +static void test_finalize(void) +{ + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: test finalize"); +} + +/* NOTE: if there is any binary data to be transferred, then + * this function MUST pack it for transport as the host will + * not know how to do so */ +static pmix_status_t setup_app(pmix_nspace_t *nptr, + pmix_info_t info[], size_t ninfo, + pmix_list_t *ilist) +{ + uint64_t unique_key[2]; + char *string_key, *cs_env; + int fd_rand; + size_t n, bytes_read, len; + pmix_kval_t *kv, *next; + int i, j; + bool envars, seckeys; + + if (NULL == info) { + envars = true; + seckeys = true; + } else { + envars = false; + seckeys = false; + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_SETUP_APP_ENVARS, PMIX_MAX_KEYLEN)) { + envars = PMIX_INFO_TRUE(&info[n]); + } else if (0 == strncmp(info[n].key, PMIX_SETUP_APP_ALL, PMIX_MAX_KEYLEN)) { + envars = PMIX_INFO_TRUE(&info[n]); + seckeys = PMIX_INFO_TRUE(&info[n]); + } else if (0 == strncmp(info[n].key, PMIX_SETUP_APP_NONENVARS, PMIX_MAX_KEYLEN)) { + seckeys = PMIX_INFO_TRUE(&info[n]); + } + } + } + + if (seckeys) { + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + return PMIX_ERR_NOMEM; + } + kv->key = strdup(PMIX_SET_ENVAR); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + kv->value->type = PMIX_ENVAR; + PMIX_ENVAR_LOAD(&kv->value->data.envar, "PMIX_TEST_SECKEY", "1", ':'); + pmix_list_append(ilist, &kv->super); + } + + if (envars) { + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + return PMIX_ERR_NOMEM; + } + kv->key = strdup(PMIX_SET_ENVAR); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + kv->value->type = PMIX_ENVAR; + PMIX_ENVAR_LOAD(&kv->value->data.envar, "PMIX_TEST_ENVAR", "1", ':'); + pmix_list_append(ilist, &kv->super); + } + + /* provide a blob so setup_local_network will get called */ + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + return PMIX_ERR_NOMEM; + } + kv->key = strdup("pmix-pnet-test-blob"); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + kv->value->type = PMIX_STRING; + kv->value->data.string = strdup("foobar"); + pmix_list_append(ilist, &kv->super); + + + return PMIX_SUCCESS; +} + +static pmix_status_t setup_local_network(pmix_nspace_t *nptr, + pmix_info_t info[], + size_t ninfo) +{ + size_t n, m; + pmix_status_t rc; + pmix_kval_t *kv; + char *nodestring, **nodes; + pmix_proc_t *procs; + size_t nprocs; + + /* get the list of nodes in this job - returns a regex */ + pmix_output(0, "pnet:setup_local_network NSPACE %s", (NULL == nptr) ? "NULL" : nptr->nspace); + pmix_preg.resolve_nodes(nptr->nspace, &nodestring); + if (NULL == nodestring) { + return PMIX_SUCCESS; + } + pmix_preg.parse_nodes(nodestring, &nodes); // get an argv array of node names + pmix_output(0, "pnet:setup_local_network NODES %s", (NULL == nodes) ? "NULL" : "NON-NULL"); + if (NULL == nodes) { + free(nodestring); + return PMIX_SUCCESS; + } + for (n=0; NULL != nodes[n]; n++) { + pmix_output(0, "pnet:setup_local_network NODE: %s", nodes[n]); + } + + for (n=0; NULL != nodes[n]; n++) { + /* get an array of pmix_proc_t containing the names of the procs on that node */ + pmix_preg.resolve_peers(nodes[n], nptr->nspace, &procs, &nprocs); + if (NULL == procs) { + continue; + } + for (m=0; m < nprocs; m++) { + pmix_output(0, "pnet:setup_local_network NODE %s: peer %s:%d", nodes[n], procs[m].nspace, procs[m].rank); + } + /* do stuff */ + free(procs); + } + + return PMIX_SUCCESS; +} + +static pmix_status_t setup_fork(pmix_nspace_t *nptr, char ***env) +{ + return PMIX_SUCCESS; +} + +static void child_finalized(pmix_peer_t *peer) +{ + +} + +static void local_app_finalized(char *nspace) +{ + +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.h new file mode 100644 index 0000000000..8601bc355b --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_PNET_test_H +#define PMIX_PNET_test_H + +#include + + +#include "src/mca/pnet/pnet.h" + +BEGIN_C_DECLS + +typedef struct { + pmix_pnet_base_component_t super; + char **include; + char **exclude; +} pmix_pnet_test_component_t; + +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_pnet_test_component_t mca_pnet_test_component; +extern pmix_pnet_module_t pmix_test_module; + +/* define a key for any blob we need to send in a launch msg */ +#define PMIX_PNET_TEST_BLOB "pmix.pnet.test.blob" + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test_component.c new file mode 100644 index 0000000000..06e360163f --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test_component.c @@ -0,0 +1,101 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include +#include "pmix_common.h" + +#include "src/util/argv.h" +#include "src/mca/pnet/pnet.h" +#include "pnet_test.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_pnet_test_component_t mca_pnet_test_component = { + .super = { + .base = { + PMIX_PNET_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "test", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + } + }, + .include = NULL, + .exclude = NULL +}; + +static pmix_status_t component_open(void) +{ + int index; + const pmix_mca_base_var_storage_t *value=NULL; + + /* we only allow ourselves to be considered IF the user + * specifically requested so */ + if (0 > (index = pmix_mca_base_var_find("pmix", "pnet", NULL, NULL))) { + return PMIX_ERROR; + } + pmix_mca_base_var_get_value(index, &value, NULL, NULL); + if (NULL != value && NULL != value->stringval && '\0' != value->stringval[0]) { + if (NULL != strstr(value->stringval, "test")) { + return PMIX_SUCCESS; + } + } + return PMIX_ERROR; +} + + +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 0; + *module = (pmix_mca_base_module_t *)&pmix_test_module; + return PMIX_SUCCESS; +} + + +static pmix_status_t component_close(void) +{ + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c index 5535296b3d..dd9767dacf 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c @@ -501,27 +501,32 @@ static pmix_status_t parse_procs(const char *regexp, return rc; } -static void _resolve_peers(int sd, short args, void *cbdata) +static pmix_status_t resolve_peers(const char *nodename, + const char *nspace, + pmix_proc_t **procs, size_t *nprocs) { - pmix_cb_t *cb = (pmix_cb_t*)cbdata; + pmix_cb_t cb; pmix_status_t rc; pmix_kval_t *kv; pmix_proc_t proc; char **ptr; pmix_info_t *info; - pmix_proc_t *procs; - size_t ninfo, nprocs, n, j; + pmix_proc_t *p=NULL; + size_t ninfo, np=0, n, j; + PMIX_CONSTRUCT(&cb, pmix_cb_t); + + cb.key = strdup(nodename); /* this data isn't going anywhere, so we don't require a copy */ - cb->copy = false; + cb.copy = false; /* scope is irrelevant as the info we seek must be local */ - cb->scope = PMIX_SCOPE_UNDEF; + cb.scope = PMIX_SCOPE_UNDEF; /* let the proc point to the nspace */ - (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - cb->proc = &proc; + cb.proc = &proc; - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, &cb); if (PMIX_SUCCESS != rc) { if (PMIX_ERR_INVALID_NAMESPACE != rc) { PMIX_ERROR_LOG(rc); @@ -529,12 +534,12 @@ static void _resolve_peers(int sd, short args, void *cbdata) goto complete; } /* should just be the one value on the list */ - if (1 != pmix_list_get_size(&cb->kvs)) { + if (1 != pmix_list_get_size(&cb.kvs)) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); rc = PMIX_ERR_BAD_PARAM; goto complete; } - kv = (pmix_kval_t*)pmix_list_get_first(&cb->kvs); + kv = (pmix_kval_t*)pmix_list_get_first(&cb.kvs); /* the hostname used as a key with wildcard rank will return * a pmix_data_array_t of pmix_info_t structs */ if (NULL == kv->value || @@ -552,184 +557,103 @@ static void _resolve_peers(int sd, short args, void *cbdata) if (0 == strncmp(info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN)) { /* split the string */ ptr = pmix_argv_split(info[n].value.data.string, ','); - nprocs = pmix_argv_count(ptr); - PMIX_PROC_CREATE(procs, nprocs); - if (NULL == procs) { + np = pmix_argv_count(ptr); + PMIX_PROC_CREATE(p, np); + if (NULL == p) { rc = PMIX_ERR_NOMEM; pmix_argv_free(ptr); goto complete; } - for (j=0; j < nprocs; j++) { - (void)strncpy(procs[j].nspace, cb->pname.nspace, PMIX_MAX_NSLEN); - procs[j].rank = strtoul(ptr[j], NULL, 10); + for (j=0; j < np; j++) { + (void)strncpy(p[j].nspace, nspace, PMIX_MAX_NSLEN); + p[j].rank = strtoul(ptr[j], NULL, 10); } - cb->procs = procs; - cb->nprocs = nprocs; rc = PMIX_SUCCESS; pmix_argv_free(ptr); - goto complete; + break; } } complete: - cb->status = rc; - if (NULL != cb->info) { - PMIX_INFO_FREE(cb->info, cb->ninfo); + if (NULL != cb.info) { + PMIX_INFO_FREE(cb.info, cb.ninfo); } - cb->pstatus = rc; - /* post the data so the receiving thread can acquire it */ - PMIX_POST_OBJECT(cb); - PMIX_WAKEUP_THREAD(&cb->lock); - return; -} - -static pmix_status_t resolve_peers(const char *nodename, - const char *nspace, - pmix_proc_t **procs, size_t *nprocs) -{ - pmix_cb_t *cb; - pmix_status_t rc; - pmix_proc_t proc; - - cb = PMIX_NEW(pmix_cb_t); - cb->key = (char*)nodename; - cb->pname.nspace = strdup(nspace); - - PMIX_THREADSHIFT(cb, _resolve_peers); - - /* wait for the result */ - PMIX_WAIT_THREAD(&cb->lock); - - /* if the nspace wasn't found, then we need to - * ask the server for that info */ - if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { - (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - /* any key will suffice as it will bring down - * the entire data blob */ - rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL); - if (PMIX_SUCCESS != rc) { - PMIX_RELEASE(cb); - return rc; - } - /* retry the fetch */ - cb->lock.active = true; - PMIX_THREADSHIFT(cb, _resolve_peers); - PMIX_WAIT_THREAD(&cb->lock); + if (NULL != cb.key) { + free(cb.key); + cb.key = NULL; } - *procs = cb->procs; - *nprocs = cb->nprocs; + PMIX_DESTRUCT(&cb); + *procs = p; + *nprocs = np; - rc = cb->status; - PMIX_RELEASE(cb); return rc; } -static void _resolve_nodes(int sd, short args, void *cbdata) -{ - pmix_cb_t *cb = (pmix_cb_t*)cbdata; - pmix_status_t rc; - pmix_kval_t *kv; - pmix_proc_t proc; - - /* create a pmix_info_t so we can pass the nspace - * into the fetch as a qualifier */ - PMIX_INFO_CREATE(cb->info, 1); - if (NULL == cb->info) { - cb->status = PMIX_ERR_NOMEM; - PMIX_POST_OBJECT(cb); - PMIX_WAKEUP_THREAD(&cb->lock); - return; - } - cb->ninfo = 1; - PMIX_INFO_LOAD(&cb->info[0], PMIX_NSPACE, cb->pname.nspace, PMIX_STRING); - /* tell the GDS what we want */ - cb->key = PMIX_NODE_LIST; - /* this data isn't going anywhere, so we don't require a copy */ - cb->copy = false; - /* scope is irrelevant as the info we seek must be local */ - cb->scope = PMIX_SCOPE_UNDEF; - /* put the nspace in the proc field */ - (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); - /* the info will be associated with PMIX_RANK_WILDCARD */ - proc.rank = PMIX_RANK_WILDCARD; - cb->proc = &proc; - - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto complete; - } - /* should just be the one value on the list */ - if (1 != pmix_list_get_size(&cb->kvs)) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - rc = PMIX_ERR_BAD_PARAM; - goto complete; - } - kv = (pmix_kval_t*)pmix_list_get_first(&cb->kvs); - /* the PMIX_NODE_LIST key is supposed to return a comma-delimited - * string of nodes in this - check that it did */ - if (NULL == kv->value || - PMIX_STRING != kv->value->type) { - PMIX_ERROR_LOG(PMIX_ERR_DATA_VALUE_NOT_FOUND); - rc = PMIX_ERR_DATA_VALUE_NOT_FOUND; - goto complete; - } - /* return the string */ - if (NULL != kv->value->data.string) { - cb->key = strdup(kv->value->data.string); - } - - complete: - cb->status = rc; - if (NULL != cb->info) { - PMIX_INFO_FREE(cb->info, cb->ninfo); - } - /* post the data so the receiving thread can acquire it */ - PMIX_POST_OBJECT(cb); - PMIX_WAKEUP_THREAD(&cb->lock); - return; -} - static pmix_status_t resolve_nodes(const char *nspace, char **nodelist) { - pmix_cb_t *cb; + pmix_cb_t cb; pmix_status_t rc; + pmix_kval_t *kv; pmix_proc_t proc; - cb = PMIX_NEW(pmix_cb_t); - cb->pname.nspace = strdup(nspace); + PMIX_CONSTRUCT(&cb, pmix_cb_t); - PMIX_THREADSHIFT(cb, _resolve_nodes); + /* setup default answer */ + *nodelist = NULL; - /* wait for the result */ - PMIX_WAIT_THREAD(&cb->lock); - - /* if the nspace wasn't found, then we need to - * ask the server for that info */ - if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { - (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - /* any key will suffice as it will bring down - * the entire data blob */ - rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL); - if (PMIX_SUCCESS != rc) { - PMIX_RELEASE(cb); - return rc; - } - /* retry the fetch */ - cb->lock.active = true; - PMIX_THREADSHIFT(cb, _resolve_nodes); - PMIX_WAIT_THREAD(&cb->lock); + /* create a pmix_info_t so we can pass the nspace + * into the fetch as a qualifier */ + PMIX_INFO_CREATE(cb.info, 1); + if (NULL == cb.info) { + PMIX_DESTRUCT(&cb); + return PMIX_ERR_NOMEM; } - /* the string we want is in the key field */ - *nodelist = cb->key; + cb.ninfo = 1; + PMIX_INFO_LOAD(&cb.info[0], PMIX_NSPACE, nspace, PMIX_STRING); - rc = cb->status; - PMIX_RELEASE(cb); + /* tell the GDS what we want */ + cb.key = PMIX_NODE_MAP; + /* this data isn't going anywhere, so we don't require a copy */ + cb.copy = false; + /* scope is irrelevant as the info we seek must be local */ + cb.scope = PMIX_SCOPE_UNDEF; + /* put the nspace in the proc field */ + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + /* the info will be associated with PMIX_RANK_WILDCARD */ + proc.rank = PMIX_RANK_WILDCARD; + cb.proc = &proc; + + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, &cb); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + /* should just be the one value on the list */ + if (1 != pmix_list_get_size(&cb.kvs)) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_BAD_PARAM; + goto complete; + } + kv = (pmix_kval_t*)pmix_list_get_first(&cb.kvs); + /* the PMIX_NODE_MAP key is supposed to return + * a regex string - check that it did */ + if (NULL == kv->value || + PMIX_STRING != kv->value->type) { + PMIX_ERROR_LOG(PMIX_ERR_DATA_VALUE_NOT_FOUND); + rc = PMIX_ERR_DATA_VALUE_NOT_FOUND; + goto complete; + } + /* return the string */ + if (NULL != kv->value->data.string) { + *nodelist = strdup(kv->value->data.string); + } + + complete: + if (NULL != cb.info) { + PMIX_INFO_FREE(cb.info, cb.ninfo); + } return rc; - } static pmix_status_t pmix_regex_extract_nodes(char *regexp, char ***names) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index e1ddba3e6b..ff9426bba6 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -646,16 +646,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } OPAL_LIST_DESTRUCT(&cache); } - if (0 < opal_list_get_size(&local_support) && - NULL != opal_pmix.server_setup_local_support) { - if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support, - ls_cbunc, &lock))) { - ORTE_ERROR_LOG(rc); - goto REPORT_ERROR; - } - } else { - lock.active = false; // we won't get a callback - } /* now that the node array in the job map and jdata are completely filled out,. * we need to "wireup" the procs to their nodes so other utilities can @@ -751,6 +741,27 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, orte_rmaps_base_display_map(jdata); } + /* register this job with the PMIx server - need to wait until after we + * have computed the #local_procs before calling the function */ + if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, false))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + + /* if we have local support setup info, then execute it here - we + * have to do so AFTER we register the nspace so the PMIx server + * has the nspace info it needs */ + if (0 < opal_list_get_size(&local_support) && + NULL != opal_pmix.server_setup_local_support) { + if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support, + ls_cbunc, &lock))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + } else { + lock.active = false; // we won't get a callback + } + /* if we have a file map, then we need to load it */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FILE_MAPS, (void**)&bptr, OPAL_BUFFER)) { if (NULL != orte_dfs.load_file_maps) { @@ -763,13 +774,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, /* load any controls into the job */ orte_rtc.assign(jdata); - /* register this job with the PMIx server - need to wait until after we - * have computed the #local_procs before calling the function */ - if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, false))) { - ORTE_ERROR_LOG(rc); - goto REPORT_ERROR; - } - /* spin up the spawn threads */ orte_odls_base_start_threads(jdata); From 2f85db9791141b0bf23a8d09a10be9084a22e9ed Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 2 Mar 2018 01:59:37 -0800 Subject: [PATCH 4/4] Always register the nspace for jobs Signed-off-by: Ralph Castain --- orte/orted/pmix/pmix_server_register_fns.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/orte/orted/pmix/pmix_server_register_fns.c b/orte/orted/pmix/pmix_server_register_fns.c index 25fb264759..b6bbebb25c 100644 --- a/orte/orted/pmix/pmix_server_register_fns.c +++ b/orte/orted/pmix/pmix_server_register_fns.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -77,12 +77,6 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid)); - /* if this job has no local procs, then no need to register - * it unless the job info is needed by connecting jobs */ - if (!force && 0 == jdata->num_local_procs) { - return ORTE_SUCCESS; - } - /* setup the info list */ info = OBJ_NEW(opal_list_t); uid = geteuid();