From cfdd08d309d9ebc48229f0ca68ceec64a7e6389f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 2 Oct 2018 11:54:57 -0700 Subject: [PATCH] Remove stale ORTE code Functionality moved to PMIx Signed-off-by: Ralph Castain --- orte/mca/dfs/Makefile.am | 27 - orte/mca/dfs/app/Makefile.am | 36 - orte/mca/dfs/app/dfs_app.c | 1315 --------- orte/mca/dfs/app/dfs_app.h | 35 - orte/mca/dfs/app/dfs_app_component.c | 85 - orte/mca/dfs/app/owner.txt | 7 - orte/mca/dfs/base/Makefile.am | 15 - orte/mca/dfs/base/base.h | 82 - orte/mca/dfs/base/dfs_base_frame.c | 163 -- orte/mca/dfs/base/dfs_base_select.c | 56 - orte/mca/dfs/base/owner.txt | 7 - orte/mca/dfs/dfs.h | 184 -- orte/mca/dfs/dfs_types.h | 76 - orte/mca/dfs/orted/Makefile.am | 36 - orte/mca/dfs/orted/dfs_orted.c | 2388 ----------------- orte/mca/dfs/orted/dfs_orted.h | 38 - orte/mca/dfs/orted/dfs_orted_component.c | 101 - orte/mca/dfs/orted/owner.txt | 7 - orte/mca/dfs/test/Makefile.am | 36 - orte/mca/dfs/test/dfs_test.c | 1149 -------- orte/mca/dfs/test/dfs_test.h | 35 - orte/mca/dfs/test/dfs_test_component.c | 100 - orte/mca/ess/alps/ess_alps_module.c | 2 +- orte/mca/ess/base/base.h | 3 +- orte/mca/ess/base/ess_base_std_app.c | 169 +- orte/mca/ess/base/ess_base_std_orted.c | 15 - orte/mca/ess/env/ess_env_module.c | 2 +- orte/mca/ess/hnp/ess_hnp_module.c | 14 - orte/mca/ess/lsf/ess_lsf_module.c | 2 +- orte/mca/ess/singleton/ess_singleton_module.c | 27 +- orte/mca/ess/slurm/ess_slurm_module.c | 2 +- orte/mca/ess/tm/ess_tm_module.c | 2 +- orte/mca/notifier/Makefile.am | 39 - orte/mca/notifier/base/Makefile.am | 27 - orte/mca/notifier/base/base.h | 87 - orte/mca/notifier/base/notifier_base_fns.c | 200 -- orte/mca/notifier/base/notifier_base_frame.c | 266 -- orte/mca/notifier/base/notifier_base_select.c | 127 - orte/mca/notifier/notifier.h | 234 -- orte/mca/notifier/smtp/.opal_ignore | 0 orte/mca/notifier/smtp/Makefile.am | 54 - orte/mca/notifier/smtp/configure.m4 | 39 - .../notifier/smtp/help-orte-notifier-smtp.txt | 33 - orte/mca/notifier/smtp/notifier_smtp.h | 68 - .../notifier/smtp/notifier_smtp_component.c | 197 -- orte/mca/notifier/smtp/notifier_smtp_module.c | 316 --- orte/mca/notifier/syslog/Makefile.am | 47 - orte/mca/notifier/syslog/configure.m4 | 31 - orte/mca/notifier/syslog/notifier_syslog.h | 40 - .../syslog/notifier_syslog_component.c | 60 - .../notifier/syslog/notifier_syslog_module.c | 132 - orte/mca/odls/base/odls_base_default_fns.c | 17 - orte/tools/orterun/orterun.c | 3 +- orte/util/attr.c | 2 - orte/util/attr.h | 1 - 55 files changed, 33 insertions(+), 8203 deletions(-) delete mode 100644 orte/mca/dfs/Makefile.am delete mode 100644 orte/mca/dfs/app/Makefile.am delete mode 100644 orte/mca/dfs/app/dfs_app.c delete mode 100644 orte/mca/dfs/app/dfs_app.h delete mode 100644 orte/mca/dfs/app/dfs_app_component.c delete mode 100644 orte/mca/dfs/app/owner.txt delete mode 100644 orte/mca/dfs/base/Makefile.am delete mode 100644 orte/mca/dfs/base/base.h delete mode 100644 orte/mca/dfs/base/dfs_base_frame.c delete mode 100644 orte/mca/dfs/base/dfs_base_select.c delete mode 100644 orte/mca/dfs/base/owner.txt delete mode 100644 orte/mca/dfs/dfs.h delete mode 100644 orte/mca/dfs/dfs_types.h delete mode 100644 orte/mca/dfs/orted/Makefile.am delete mode 100644 orte/mca/dfs/orted/dfs_orted.c delete mode 100644 orte/mca/dfs/orted/dfs_orted.h delete mode 100644 orte/mca/dfs/orted/dfs_orted_component.c delete mode 100644 orte/mca/dfs/orted/owner.txt delete mode 100644 orte/mca/dfs/test/Makefile.am delete mode 100644 orte/mca/dfs/test/dfs_test.c delete mode 100644 orte/mca/dfs/test/dfs_test.h delete mode 100644 orte/mca/dfs/test/dfs_test_component.c delete mode 100644 orte/mca/notifier/Makefile.am delete mode 100644 orte/mca/notifier/base/Makefile.am delete mode 100644 orte/mca/notifier/base/base.h delete mode 100644 orte/mca/notifier/base/notifier_base_fns.c delete mode 100644 orte/mca/notifier/base/notifier_base_frame.c delete mode 100644 orte/mca/notifier/base/notifier_base_select.c delete mode 100644 orte/mca/notifier/notifier.h delete mode 100644 orte/mca/notifier/smtp/.opal_ignore delete mode 100644 orte/mca/notifier/smtp/Makefile.am delete mode 100644 orte/mca/notifier/smtp/configure.m4 delete mode 100644 orte/mca/notifier/smtp/help-orte-notifier-smtp.txt delete mode 100644 orte/mca/notifier/smtp/notifier_smtp.h delete mode 100644 orte/mca/notifier/smtp/notifier_smtp_component.c delete mode 100644 orte/mca/notifier/smtp/notifier_smtp_module.c delete mode 100644 orte/mca/notifier/syslog/Makefile.am delete mode 100644 orte/mca/notifier/syslog/configure.m4 delete mode 100644 orte/mca/notifier/syslog/notifier_syslog.h delete mode 100644 orte/mca/notifier/syslog/notifier_syslog_component.c delete mode 100644 orte/mca/notifier/syslog/notifier_syslog_module.c diff --git a/orte/mca/dfs/Makefile.am b/orte/mca/dfs/Makefile.am deleted file mode 100644 index c374dfcff8..0000000000 --- a/orte/mca/dfs/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_dfs.la -libmca_dfs_la_SOURCES = - -# local files -headers = dfs.h dfs_types.h -libmca_dfs_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ortedir = $(orteincludedir)/$(subdir) -nobase_orte_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/orte/mca/dfs/app/Makefile.am b/orte/mca/dfs/app/Makefile.am deleted file mode 100644 index 7c86273e46..0000000000 --- a/orte/mca/dfs/app/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dfs_app.h \ - dfs_app_component.c \ - dfs_app.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_dfs_app_DSO -component_noinst = -component_install = mca_dfs_app.la -else -component_noinst = libmca_dfs_app.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dfs_app_la_SOURCES = $(sources) -mca_dfs_app_la_LDFLAGS = -module -avoid-version -mca_dfs_app_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dfs_app_la_SOURCES =$(sources) -libmca_dfs_app_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/dfs/app/dfs_app.c b/orte/mca/dfs/app/dfs_app.c deleted file mode 100644 index 33676f5095..0000000000 --- a/orte/mca/dfs/app/dfs_app.c +++ /dev/null @@ -1,1315 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#include - -#include "opal/util/if.h" -#include "opal/util/output.h" -#include "opal/util/uri.h" -#include "opal/dss/dss.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/dfs/base/base.h" -#include "dfs_app.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); - -/****************** - * APP module - ******************/ -orte_dfs_base_module_t orte_dfs_app_module = { - init, - finalize, - dfs_open, - dfs_close, - dfs_get_file_size, - dfs_seek, - dfs_read, - dfs_post_file_map, - dfs_get_file_map, - dfs_load_file_maps, - dfs_purge_file_maps -}; - -static opal_list_t requests, active_files; -static int local_fd = 0; -static uint64_t req_id = 0; -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - -static int init(void) -{ - OBJ_CONSTRUCT(&requests, opal_list_t); - OBJ_CONSTRUCT(&active_files, opal_list_t); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_DATA, - ORTE_RML_PERSISTENT, - recv_dfs, - NULL); - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_DATA); - while (NULL != (item = opal_list_remove_first(&requests))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&requests); - while (NULL != (item = opal_list_remove_first(&active_files))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&active_files); - return ORTE_SUCCESS; -} - -/* receives take place in an event, so we are free to process - * the request list without fear of getting things out-of-order - */ -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - orte_dfs_request_t *dfs, *dptr; - opal_list_item_t *item; - int remote_fd, rc; - int64_t i64; - uint64_t rid; - orte_dfs_tracker_t *trk; - - /* unpack the command this message is responding to */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd cmd %d from sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack the request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the remote fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* as the request has been fulfilled, remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* if the remote_fd < 0, then we had an error, so return - * the error value to the caller - */ - if (remote_fd < 0) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file response error file %s [error: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(remote_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->host_daemon.jobid = sender->jobid; - trk->host_daemon.vpid = sender->vpid; - trk->uri = strdup(dfs->uri); - /* break the uri down into scheme and filename */ - trk->scheme = opal_uri_get_scheme(dfs->uri); - trk->filename = opal_filename_from_uri(dfs->uri, NULL); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* return the local_fd to the caller for - * subsequent operations - */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file completed for file %s [local fd: %d remote fd: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, trk->local_fd, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack the request id for this request */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd size - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the size */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->size_cbfunc) { - dfs->size_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SEEK_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd seek - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the returned offset/status */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->seek_cbfunc) { - dfs->seek_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_READ_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd read - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the bytes read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - if (0 < i64) { - cnt = i64; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_POST_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd post - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_GETFM_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd getfm - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* return it to caller */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(buffer, dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - default: - opal_output(0, "APP:DFS:RECV WTF"); - break; - } -} - -static void open_local_file(orte_dfs_request_t *dfs) -{ - char *filename; - orte_dfs_tracker_t *trk; - - /* extract the filename from the uri */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, NULL))) { - /* something wrong - error was reported, so just get out */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - OBJ_RELEASE(dfs); - return; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening local file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - /* attempt to open the file */ - if (0 > (dfs->remote_fd = open(filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(dfs->remote_fd, dfs->cbdata); - } - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->uri = strdup(dfs->uri); - /* break the uri down into scheme and filename */ - trk->scheme = opal_uri_get_scheme(dfs->uri); - trk->filename = strdup(filename); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = dfs->remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* the file is locally hosted */ - trk->host_daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - trk->host_daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s mapped localfd %d to remotefd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, trk->local_fd, trk->remote_fd); - /* let the caller know */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* request will be released by the calling routing */ -} - -static void process_opens(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - int rc; - opal_buffer_t *buffer; - char *scheme, *host, *filename; - orte_process_name_t daemon; - opal_list_t lt; - opal_namelist_t *nm; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* get the scheme to determine if we can process locally or not */ - if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - goto complete; - } - - if (0 == strcmp(scheme, "nfs")) { - open_local_file(dfs); - /* the callback was done in the above function */ - OBJ_RELEASE(dfs); - return; - } - - if (0 != strcmp(scheme, "file")) { - /* not yet supported */ - orte_show_help("orte_dfs_help.txt", "unsupported-filesystem", - true, dfs->uri); - goto complete; - } - - /* dissect the uri to extract host and filename/path */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) { - goto complete; - } - if (NULL == host) { - host = strdup(orte_process_info.nodename); - } - - /* if the host is our own, then treat it as a local file */ - if (orte_ifislocal(host)) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on local host", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - /* the callback was done in the above function */ - OBJ_RELEASE(dfs); - return; - } - - /* ident the daemon on that host */ - daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - /* fetch the daemon for this hostname */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s looking for daemon on host %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host); - OBJ_CONSTRUCT(<, opal_list_t); - if (ORTE_SUCCESS != (rc = opal_pmix.resolve_peers(host, daemon.jobid, <))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(<); - goto complete; - } - nm = (opal_namelist_t*)opal_list_get_first(<); - daemon.vpid = nm->name.vpid; - OPAL_LIST_DESTRUCT(<); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on host %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, host, ORTE_NAME_PRINT(&daemon)); - - /* double-check: if it is our local daemon, then we - * treat this as local - */ - if (daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s on same daemon", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - /* the callback was done in the above function */ - OBJ_RELEASE(dfs); - return; - } - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending open file request to %s file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&daemon), - filename); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* don't release it */ - return; - - complete: - /* we get here if an error occurred - execute any - * pending callback so the proc doesn't hang - */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - - -/* in order to handle the possible opening/reading of files by - * multiple threads, we have to ensure that all operations are - * carried out in events - so the "open" cmd simply posts an - * event containing the required info, and then returns - */ -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uri); - - /* setup the request */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_OPEN_CMD; - dfs->uri = strdup(uri); - dfs->open_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); -} - -static void process_close(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(close_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s closing fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - close_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == close_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); - return; - } - - /* if the file is local, close it */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - close(trk->remote_fd); - goto complete; - } - - /* setup a message for the daemon telling - * them what file to close - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending close file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - opal_list_remove_item(&active_files, &trk->super); - OBJ_RELEASE(trk); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); -} - -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s close called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_CLOSE_CMD; - dfs->local_fd = fd; - dfs->close_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); -} - -static void process_sizes(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(size_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing get_size on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - size_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == size_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(size_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - } else { - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(buf.st_size, size_dfs->cbdata); - } - } - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - size_dfs->id = req_id++; - opal_list_append(&requests, &size_dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending get_size request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &size_dfs->super); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - goto complete; - } - /* leave the request there */ - return; - - complete: - OBJ_RELEASE(size_dfs); -} - -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get_size called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SIZE_CMD; - dfs->local_fd = fd; - dfs->size_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); -} - - -static void process_seeks(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *seek_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(seek_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == seek_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(seek_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < seek_dfs->read_length && - SEEK_SET == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < (off_t)(trk->location + seek_dfs->read_length) && - SEEK_CUR == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else { - lseek(trk->remote_fd, seek_dfs->read_length, seek_dfs->remote_fd); - if (SEEK_SET == seek_dfs->remote_fd) { - trk->location = seek_dfs->read_length; - } else { - trk->location += seek_dfs->read_length; - } - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(seek_dfs->read_length, seek_dfs->cbdata); - } - } - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - seek_dfs->id = req_id++; - opal_list_append(&requests, &seek_dfs->super); - - /* setup a message for the daemon telling - * them what file to seek - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &seek_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)seek_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending seek file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - /* leave the request */ - return; - - complete: - OBJ_RELEASE(seek_dfs); -} - - -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SEEK_CMD; - dfs->local_fd = fd; - dfs->read_length = offset; - dfs->remote_fd = whence; - dfs->seek_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); -} - -static void process_reads(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - long nbytes; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(read_dfs); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == read_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(read_dfs); - return; - } - - /* if the file is local, read the desired bytes */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - nbytes = read(trk->remote_fd, read_dfs->read_buffer, read_dfs->read_length); - if (0 < nbytes) { - /* update our location */ - trk->location += nbytes; - } - /* pass them back to the caller */ - if (NULL != read_dfs->read_cbfunc) { - read_dfs->read_cbfunc(nbytes, read_dfs->read_buffer, read_dfs->cbdata); - } - /* request is complete */ - OBJ_RELEASE(read_dfs); - return; - } - /* add this request to our pending list */ - read_dfs->id = req_id++; - opal_list_append(&requests, &read_dfs->super); - - /* setup a message for the daemon telling - * them what file to read - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)read_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending read file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - } - /* don't release the request */ - return; - - complete: - /* don't need to hang on to this request */ - opal_list_remove_item(&requests, &read_dfs->super); - OBJ_RELEASE(read_dfs); -} - -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_READ_CMD; - dfs->local_fd = fd; - dfs->read_buffer = buffer; - dfs->read_length = length; - dfs->read_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); -} - -static void process_posts(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the buffer's contents to our local daemon for storage */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* add my name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* pack the payload */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_POST_CMD; - dfs->bptr = bo; - dfs->post_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); -} - -static void process_getfm(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the request to our local daemon */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* and the target */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->target, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_GETFM_CMD; - dfs->target.jobid = target->jobid; - dfs->target.vpid = target->vpid; - dfs->fm_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); -} - -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} - -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} diff --git a/orte/mca/dfs/app/dfs_app.h b/orte/mca/dfs/app/dfs_app.h deleted file mode 100644 index fef69fdf58..0000000000 --- a/orte/mca/dfs/app/dfs_app.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_dfs_app_EXPORT_H -#define MCA_dfs_app_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/dfs/dfs.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_dfs_base_component_t mca_dfs_app_component; - -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs_app_module; - -END_C_DECLS - -#endif /* MCA_dfs_app_EXPORT_H */ diff --git a/orte/mca/dfs/app/dfs_app_component.c b/orte/mca/dfs/app/dfs_app_component.c deleted file mode 100644 index 1479007ac0..0000000000 --- a/orte/mca/dfs/app/dfs_app_component.c +++ /dev/null @@ -1,85 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * Copyright (c) 2016 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_app.h" - -/* - * Public string for version number - */ -const char *orte_dfs_app_component_version_string = - "ORTE DFS app MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int dfs_app_open(void); -static int dfs_app_close(void); -static int dfs_app_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_dfs_base_component_t mca_dfs_app_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component - */ - .base_version = { - ORTE_DFS_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "app", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dfs_app_open, - .mca_close_component = dfs_app_close, - .mca_query_component = dfs_app_component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int dfs_app_open(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_app_close(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_app_component_query(mca_base_module_t **module, int *priority) -{ - if (ORTE_PROC_IS_APP) { - /* set our priority high as we are the default for apps */ - *priority = 1000; - *module = (mca_base_module_t *)&orte_dfs_app_module; - return ORTE_SUCCESS; - } - - *priority = -1; - *module = NULL; - return ORTE_ERROR; -} diff --git a/orte/mca/dfs/app/owner.txt b/orte/mca/dfs/app/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/orte/mca/dfs/app/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/dfs/base/Makefile.am b/orte/mca/dfs/base/Makefile.am deleted file mode 100644 index eb03638718..0000000000 --- a/orte/mca/dfs/base/Makefile.am +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_dfs_la_SOURCES += \ - base/dfs_base_select.c \ - base/dfs_base_frame.c diff --git a/orte/mca/dfs/base/base.h b/orte/mca/dfs/base/base.h deleted file mode 100644 index 8356b488cd..0000000000 --- a/orte/mca/dfs/base/base.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef ORTE_MCA_DFS_BASE_H -#define ORTE_MCA_DFS_BASE_H - -/* - * includes - */ -#include "orte_config.h" -#include "orte/types.h" -#include "orte/constants.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/mca.h" -#include "orte/mca/dfs/dfs.h" - - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_dfs_base_framework; -/* select a component */ -ORTE_DECLSPEC int orte_dfs_base_select(void); - -/* tracker for active files */ -typedef struct { - opal_list_item_t super; - orte_process_name_t requestor; - orte_process_name_t host_daemon; - char *uri; - char *scheme; - char *filename; - int local_fd; - int remote_fd; - size_t location; -} orte_dfs_tracker_t; -OBJ_CLASS_DECLARATION(orte_dfs_tracker_t); - -/* requests */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - uint64_t id; - orte_dfs_cmd_t cmd; - orte_process_name_t target; - char *uri; - int local_fd; - int remote_fd; - uint8_t *read_buffer; - long read_length; - opal_buffer_t *bptr; - opal_buffer_t bucket; - orte_dfs_open_callback_fn_t open_cbfunc; - orte_dfs_close_callback_fn_t close_cbfunc; - orte_dfs_size_callback_fn_t size_cbfunc; - orte_dfs_seek_callback_fn_t seek_cbfunc; - orte_dfs_read_callback_fn_t read_cbfunc; - orte_dfs_post_callback_fn_t post_cbfunc; - orte_dfs_fm_callback_fn_t fm_cbfunc; - orte_dfs_load_callback_fn_t load_cbfunc; - orte_dfs_purge_callback_fn_t purge_cbfunc; - void *cbdata; -} orte_dfs_request_t; -OBJ_CLASS_DECLARATION(orte_dfs_request_t); - -END_C_DECLS - -#endif diff --git a/orte/mca/dfs/base/dfs_base_frame.c b/orte/mca/dfs/base/dfs_base_frame.c deleted file mode 100644 index 77ce617142..0000000000 --- a/orte/mca/dfs/base/dfs_base_frame.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" - -#include "orte/util/show_help.h" -#include "orte/mca/dfs/base/base.h" - -#include "orte/mca/dfs/base/static-components.h" - -/* - * Globals - */ -orte_dfs_base_module_t orte_dfs = { - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -static int orte_dfs_base_close(void) -{ - /* Close selected component */ - if (NULL != orte_dfs.finalize) { - orte_dfs.finalize(); - } - - return mca_base_framework_components_close(&orte_dfs_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_dfs_base_open(mca_base_open_flag_t flags) -{ - /* Open up all available components */ - return mca_base_framework_components_open(&orte_dfs_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, dfs, "ORTE Distributed File System", - NULL, orte_dfs_base_open, orte_dfs_base_close, - mca_dfs_base_static_components, 0); - - -/* instantiate classes */ -static void trk_con(orte_dfs_tracker_t *trk) -{ - trk->host_daemon.jobid = ORTE_JOBID_INVALID; - trk->host_daemon.vpid = ORTE_VPID_INVALID; - trk->uri = NULL; - trk->scheme = NULL; - trk->filename = NULL; - trk->location = 0; -} -static void trk_des(orte_dfs_tracker_t *trk) -{ - if (NULL != trk->uri) { - free(trk->uri); - } - if (NULL != trk->scheme) { - free(trk->scheme); - } - if (NULL != trk->filename) { - free(trk->filename); - } -} -OBJ_CLASS_INSTANCE(orte_dfs_tracker_t, - opal_list_item_t, - trk_con, trk_des); -static void req_const(orte_dfs_request_t *dfs) -{ - dfs->id = 0; - dfs->uri = NULL; - dfs->local_fd = -1; - dfs->remote_fd = -1; - dfs->read_length = -1; - dfs->bptr = NULL; - OBJ_CONSTRUCT(&dfs->bucket, opal_buffer_t); - dfs->read_buffer = NULL; - dfs->open_cbfunc = NULL; - dfs->close_cbfunc = NULL; - dfs->size_cbfunc = NULL; - dfs->seek_cbfunc = NULL; - dfs->read_cbfunc = NULL; - dfs->post_cbfunc = NULL; - dfs->fm_cbfunc = NULL; - dfs->load_cbfunc = NULL; - dfs->purge_cbfunc = NULL; - dfs->cbdata = NULL; -} -static void req_dest(orte_dfs_request_t *dfs) -{ - if (NULL != dfs->uri) { - free(dfs->uri); - } - OBJ_DESTRUCT(&dfs->bucket); -} -OBJ_CLASS_INSTANCE(orte_dfs_request_t, - opal_list_item_t, - req_const, req_dest); - -static void jobfm_const(orte_dfs_jobfm_t *fm) -{ - OBJ_CONSTRUCT(&fm->maps, opal_list_t); -} -static void jobfm_dest(orte_dfs_jobfm_t *fm) -{ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first(&fm->maps))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&fm->maps); -} -OBJ_CLASS_INSTANCE(orte_dfs_jobfm_t, - opal_list_item_t, - jobfm_const, jobfm_dest); - -static void vpidfm_const(orte_dfs_vpidfm_t *fm) -{ - OBJ_CONSTRUCT(&fm->data, opal_buffer_t); - fm->num_entries = 0; -} -static void vpidfm_dest(orte_dfs_vpidfm_t *fm) -{ - OBJ_DESTRUCT(&fm->data); -} -OBJ_CLASS_INSTANCE(orte_dfs_vpidfm_t, - opal_list_item_t, - vpidfm_const, vpidfm_dest); diff --git a/orte/mca/dfs/base/dfs_base_select.c b/orte/mca/dfs/base/dfs_base_select.c deleted file mode 100644 index bf0a7c2d67..0000000000 --- a/orte/mca/dfs/base/dfs_base_select.c +++ /dev/null @@ -1,56 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/util/output.h" - -#include "orte/mca/dfs/base/base.h" - -int orte_dfs_base_select(void) -{ - int exit_status = ORTE_SUCCESS; - orte_dfs_base_component_t *best_component = NULL; - orte_dfs_base_module_t *best_module = NULL; - - /* - * Select the best component - */ - if (OPAL_SUCCESS != mca_base_select("dfs", orte_dfs_base_framework.framework_output, - &orte_dfs_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component, NULL)) { - /* This will only happen if no component was selected, which - * is okay - we don't have to select anything - */ - return ORTE_SUCCESS; - } - - /* Save the winner */ - orte_dfs = *best_module; - - /* Initialize the winner */ - if (NULL != best_module && NULL != orte_dfs.init) { - if (ORTE_SUCCESS != orte_dfs.init()) { - exit_status = ORTE_ERROR; - goto cleanup; - } - } - - cleanup: - return exit_status; -} diff --git a/orte/mca/dfs/base/owner.txt b/orte/mca/dfs/base/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/orte/mca/dfs/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/dfs/dfs.h b/orte/mca/dfs/dfs.h deleted file mode 100644 index 136c0d76b6..0000000000 --- a/orte/mca/dfs/dfs.h +++ /dev/null @@ -1,184 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_MCA_DFS_H -#define ORTE_MCA_DFS_H - -#include "orte_config.h" -#include "orte/types.h" - -#ifdef HAVE_FCNTL_H -#include -#endif - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/dfs/dfs_types.h" - -BEGIN_C_DECLS - -/* - * Framework Interfaces - */ -/** - * Module initialization function. - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_dfs_base_module_init_fn_t)(void); - -/** - * Module finalization function. - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_dfs_base_module_finalize_fn_t)(void); - -/* Open a file - * - * Open a possibly remote file for reading. The uri can include file - * system descriptions (e.g., file:///, nfs:///, or hdfs:///). Note - * that this is a full uri - i.e., it may include a hostname to - * indicate where the file is located - * - * The file descriptor will be returned in the cbfunc. It - * represents the number by which the file can be referenced, - * and will be an ORTE error code upon failure - */ -typedef void (*orte_dfs_base_module_open_fn_t)(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); - -/* Close a file - * - * Closes and invalidates the file descriptor - */ -typedef void (*orte_dfs_base_module_close_fn_t)(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); - -/* Get the size of a file - * - */ -typedef void (*orte_dfs_base_module_get_file_size_fn_t)(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); - -/* Position a file - * - * Move the read position in the file to the specified byte number - * relative to the location specified by whence: - * SEEK_SET => from beginning of file - * SEEK_CUR => from current location - * - * The callback will return the offset, or a negative value if - * the requested seek would take the pointer past the end of the - * file. This is contrary to standard lseek behavior, but is consistent - * with the read-only nature of this framework - */ -typedef void (*orte_dfs_base_module_seek_fn_t)(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); - -/* Read bytes from a possibly remote file - * - * Read the specified number of bytes from the given file, using the - * specified offset (in bytes). The status returned in cbfunc is the actual number - * of bytes read, which should match the request unless the requested - * length/offset would read past the end of file. An ORTE error code - * will be returned upon error - * - * Note: the caller is responsible for ensuring the buffer is at least - * length bytes in size - */ -typedef void (*orte_dfs_base_module_read_fn_t)(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); - - -/* Post a file map so others may access it */ -typedef void (*orte_dfs_base_module_post_file_map_fn_t)(opal_buffer_t *buf, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); - -/* Get the file map for a process - * - * Returns the file map associated with the specified process name. If - * NULL is provided, then all known process maps will be returned in the - * byte object. It is the responsibility of the caller to unpack it, so - * applications are free to specify whatever constitutes a "file map" that - * suits their purposes - */ -typedef void (*orte_dfs_base_module_get_file_map_fn_t)(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); - - -/* Load file maps for a job - */ -typedef void (*orte_dfs_base_module_load_file_maps_fn_t)(orte_jobid_t jobid, - opal_buffer_t *buf, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); - -/* Purge file maps for a job */ -typedef void (*orte_dfs_base_module_purge_file_maps_fn_t)(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); - -/* - * Module Structure - */ -struct orte_dfs_base_module_1_0_0_t { - /** Initialization Function */ - orte_dfs_base_module_init_fn_t init; - /** Finalization Function */ - orte_dfs_base_module_finalize_fn_t finalize; - - orte_dfs_base_module_open_fn_t open; - orte_dfs_base_module_close_fn_t close; - orte_dfs_base_module_get_file_size_fn_t get_file_size; - orte_dfs_base_module_seek_fn_t seek; - orte_dfs_base_module_read_fn_t read; - orte_dfs_base_module_post_file_map_fn_t post_file_map; - orte_dfs_base_module_get_file_map_fn_t get_file_map; - orte_dfs_base_module_load_file_maps_fn_t load_file_maps; - orte_dfs_base_module_purge_file_maps_fn_t purge_file_maps; -}; -typedef struct orte_dfs_base_module_1_0_0_t orte_dfs_base_module_1_0_0_t; -typedef orte_dfs_base_module_1_0_0_t orte_dfs_base_module_t; -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs; - -/* - * DFS Component - */ -struct orte_dfs_base_component_1_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; -}; -typedef struct orte_dfs_base_component_1_0_0_t orte_dfs_base_component_1_0_0_t; -typedef orte_dfs_base_component_1_0_0_t orte_dfs_base_component_t; - -/* - * Macro for use in components that are of type errmgr - */ -#define ORTE_DFS_BASE_VERSION_1_0_0 \ - ORTE_MCA_BASE_VERSION_2_1_0("dfs", 1, 0, 0) - -END_C_DECLS - -#endif diff --git a/orte/mca/dfs/dfs_types.h b/orte/mca/dfs/dfs_types.h deleted file mode 100644 index 1f3e088e1d..0000000000 --- a/orte/mca/dfs/dfs_types.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_MCA_DFS_TYPES_H -#define ORTE_MCA_DFS_TYPES_H - -#include "orte_config.h" - -#include "opal/class/opal_list.h" -#include "opal/dss/dss_types.h" -#include "opal/util/proc.h" - -BEGIN_C_DECLS - -typedef uint8_t orte_dfs_cmd_t; -#define ORTE_DFS_CMD_T OPAL_UINT8 - -#define ORTE_DFS_OPEN_CMD 1 -#define ORTE_DFS_CLOSE_CMD 2 -#define ORTE_DFS_SIZE_CMD 3 -#define ORTE_DFS_SEEK_CMD 4 -#define ORTE_DFS_READ_CMD 5 -#define ORTE_DFS_POST_CMD 6 -#define ORTE_DFS_GETFM_CMD 7 -#define ORTE_DFS_LOAD_CMD 8 -#define ORTE_DFS_PURGE_CMD 9 -#define ORTE_DFS_RELAY_POSTS_CMD 10 - -/* file maps */ -typedef struct { - opal_list_item_t super; - orte_jobid_t jobid; - opal_list_t maps; -} orte_dfs_jobfm_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_dfs_jobfm_t); - -typedef struct { - opal_list_item_t super; - orte_vpid_t vpid; - int num_entries; - opal_buffer_t data; -} orte_dfs_vpidfm_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_dfs_vpidfm_t); - -typedef void (*orte_dfs_open_callback_fn_t)(int fd, void *cbdata); - -typedef void (*orte_dfs_close_callback_fn_t)(int fd, void *cbdata); - -typedef void (*orte_dfs_size_callback_fn_t)(long size, void *cbdata); - -typedef void (*orte_dfs_seek_callback_fn_t)(long offset, void *cbdata); - -typedef void (*orte_dfs_read_callback_fn_t)(long status, - uint8_t *buffer, - void *cbdata); - -typedef void (*orte_dfs_post_callback_fn_t)(void *cbdata); - -typedef void (*orte_dfs_fm_callback_fn_t)(opal_buffer_t *fmaps, void *cbdata); - -typedef void (*orte_dfs_load_callback_fn_t)(void *cbdata); - -typedef void (*orte_dfs_purge_callback_fn_t)(void *cbdata); - -END_C_DECLS - -#endif diff --git a/orte/mca/dfs/orted/Makefile.am b/orte/mca/dfs/orted/Makefile.am deleted file mode 100644 index 90946f6f4c..0000000000 --- a/orte/mca/dfs/orted/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dfs_orted.h \ - dfs_orted_component.c \ - dfs_orted.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_dfs_orted_DSO -component_noinst = -component_install = mca_dfs_orted.la -else -component_noinst = libmca_dfs_orted.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dfs_orted_la_SOURCES = $(sources) -mca_dfs_orted_la_LDFLAGS = -module -avoid-version -mca_dfs_orted_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dfs_orted_la_SOURCES =$(sources) -libmca_dfs_orted_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/dfs/orted/dfs_orted.c b/orte/mca/dfs/orted/dfs_orted.c deleted file mode 100644 index ee3c9d5a33..0000000000 --- a/orte/mca/dfs/orted/dfs_orted.c +++ /dev/null @@ -1,2388 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#include - -#include "opal/util/if.h" -#include "opal/util/output.h" -#include "opal/util/uri.h" -#include "opal/dss/dss.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" -#include "orte/util/session_dir.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" - -#include "orte/runtime/orte_quit.h" -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_orted.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); -/****************** - * Daemon/HNP module - ******************/ -orte_dfs_base_module_t orte_dfs_orted_module = { - init, - finalize, - dfs_open, - dfs_close, - dfs_get_file_size, - dfs_seek, - dfs_read, - dfs_post_file_map, - dfs_get_file_map, - dfs_load_file_maps, - dfs_purge_file_maps -}; - -static void* worker_thread_engine(opal_object_t *obj); - -typedef struct { - opal_object_t super; - int idx; - opal_event_base_t *event_base; - bool active; - opal_thread_t thread; -} worker_thread_t; -static void wt_const(worker_thread_t *ptr) -{ - /* create an event base for this thread */ - ptr->event_base = opal_event_base_create(); - /* construct the thread object */ - OBJ_CONSTRUCT(&ptr->thread, opal_thread_t); - /* fork off a thread to progress it */ - ptr->active = true; - ptr->thread.t_run = worker_thread_engine; - ptr->thread.t_arg = ptr; - opal_thread_start(&ptr->thread); -} -static void wt_dest(worker_thread_t *ptr) -{ - /* stop the thread */ - ptr->active = false; - /* break the loop */ - opal_event_base_loopbreak(ptr->event_base); - /* wait for thread to exit */ - opal_thread_join(&ptr->thread, NULL); - OBJ_DESTRUCT(&ptr->thread); - /* release the event base */ - opal_event_base_free(ptr->event_base); -} -OBJ_CLASS_INSTANCE(worker_thread_t, - opal_object_t, - wt_const, wt_dest); - -typedef struct { - opal_object_t super; - opal_event_t ev; - uint64_t rid; - orte_dfs_tracker_t *trk; - int64_t nbytes; - int whence; -} worker_req_t; -OBJ_CLASS_INSTANCE(worker_req_t, - opal_object_t, - NULL, NULL); -#define ORTE_DFS_POST_WORKER(r, cb) \ - do { \ - worker_thread_t *wt; \ - wt = (worker_thread_t*)opal_pointer_array_get_item(&worker_threads, wt_cntr); \ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, \ - "%s assigning req to worker thread %d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - wt->idx); \ - opal_event_set(wt->event_base, &((r)->ev), \ - -1, OPAL_EV_WRITE, (cb), (r)); \ - opal_event_active(&((r)->ev), OPAL_EV_WRITE, 1); \ - /* move to the next thread */ \ - wt_cntr++; \ - if (wt_cntr == orte_dfs_orted_num_worker_threads) { \ - wt_cntr = 0; \ - } \ - } while(0); - -static opal_list_t requests, active_files, file_maps; -static opal_pointer_array_t worker_threads; -static int wt_cntr = 0; -static int local_fd = 0; -static uint64_t req_id = 0; -static void recv_dfs_cmd(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -static void recv_dfs_data(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -static void remote_read(int fd, short args, void *cbata); -static void remote_open(int fd, short args, void *cbdata); -static void remote_size(int fd, short args, void *cbdata); -static void remote_seek(int fd, short args, void *cbdata); - -static int init(void) -{ - int i; - worker_thread_t *wt; - - OBJ_CONSTRUCT(&requests, opal_list_t); - OBJ_CONSTRUCT(&active_files, opal_list_t); - OBJ_CONSTRUCT(&file_maps, opal_list_t); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_CMD, - ORTE_RML_PERSISTENT, - recv_dfs_cmd, - NULL); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_DATA, - ORTE_RML_PERSISTENT, - recv_dfs_data, - NULL); - OBJ_CONSTRUCT(&worker_threads, opal_pointer_array_t); - opal_pointer_array_init(&worker_threads, 1, INT_MAX, 1); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s starting %d worker threads", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_dfs_orted_num_worker_threads); - for (i=0; i < orte_dfs_orted_num_worker_threads; i++) { - wt = OBJ_NEW(worker_thread_t); - wt->idx = i; - opal_pointer_array_add(&worker_threads, wt); - } - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - int i; - worker_thread_t *wt; - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_CMD); - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_DATA); - while (NULL != (item = opal_list_remove_first(&requests))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&requests); - while (NULL != (item = opal_list_remove_first(&active_files))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&active_files); - while (NULL != (item = opal_list_remove_first(&file_maps))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&file_maps); - for (i=0; i < worker_threads.size; i++) { - if (NULL != (wt = (worker_thread_t*)opal_pointer_array_get_item(&worker_threads, i))) { - OBJ_RELEASE(wt); - } - } - OBJ_DESTRUCT(&worker_threads); - - return ORTE_SUCCESS; -} - -static void open_local_file(orte_dfs_request_t *dfs) -{ - char *filename; - orte_dfs_tracker_t *trk; - - /* extract the filename from the uri */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, NULL))) { - /* something wrong - error was reported, so just get out */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - return; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening local file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - /* attempt to open the file */ - if (0 > (dfs->remote_fd = open(filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(dfs->remote_fd, dfs->cbdata); - } - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->filename = strdup(dfs->uri); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = dfs->remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* the file is locally hosted */ - trk->host_daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - trk->host_daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s mapped localfd %d to remotefd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, trk->local_fd, trk->remote_fd); - /* let the caller know */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* request will be released by the calling routing */ -} - -static void process_opens(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - int rc; - opal_buffer_t *buffer = NULL; - char *scheme = NULL, *host = NULL, *filename = NULL; - int v; - orte_node_t *node, *nptr; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* get the scheme to determine if we can process locally or not */ - if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { - OBJ_RELEASE(dfs); - return; - } - - if (0 == strcmp(scheme, "nfs")) { - open_local_file(dfs); - goto complete; - } - - if (0 != strcmp(scheme, "file")) { - /* not yet supported */ - orte_show_help("orte_dfs_help.txt", "unsupported-filesystem", - true, dfs->uri); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - goto complete; - } - - free(scheme); - scheme = NULL; - - /* dissect the uri to extract host and filename/path */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) { - goto complete; - } - /* if the host is our own, then treat it as a local file */ - if (NULL == host || orte_ifislocal(host)) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on local host", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - goto complete; - } - - /* ident the daemon on that host */ - node = NULL; - for (v=0; v < orte_node_pool->size; v++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, v))) { - continue; - } - if (NULL == nptr->daemon) { - continue; - } - if (0 == strcmp(host, nptr->name)) { - node = nptr; - break; - } - } - if (NULL == node) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on host %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, host, ORTE_NAME_PRINT(&node->daemon->name)); - - free(host); - host = NULL; - /* double-check: if it is our local daemon, then we - * treat this as local - */ - if (node->daemon->name.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s on same daemon", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending open file request to %s file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&node->daemon->name), - filename); - - free(filename); - filename = NULL; - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &node->daemon->name, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* don't release it */ - return; - - complete: - if (NULL != buffer) { - OBJ_RELEASE(buffer); - } - if (NULL != scheme) { - free(scheme); - } - if (NULL != host) { - free(host); - } - if (NULL != filename) { - free(filename); - } - OBJ_RELEASE(dfs); -} - - -/* in order to handle the possible opening/reading of files by - * multiple threads, we have to ensure that all operations are - * carried out in events - so the "open" cmd simply posts an - * event containing the required info, and then returns - */ -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uri); - - /* setup the request */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_OPEN_CMD; - dfs->uri = strdup(uri); - dfs->open_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); -} - -static void process_close(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(close_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s closing fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - close_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == close_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); - return; - } - - /* if the file is local, close it */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - close(trk->remote_fd); - goto complete; - } - - /* setup a message for the daemon telling - * them what file to close - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending close file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - opal_list_remove_item(&active_files, &trk->super); - OBJ_RELEASE(trk); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); -} - -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s close called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_CLOSE_CMD; - dfs->local_fd = fd; - dfs->close_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); -} - -static void process_sizes(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(size_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing get_size on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - size_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == size_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(size_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - } - goto complete; - } - - /* setup a message for the daemon telling - * them what file to get the size of - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending get_size request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - goto complete; - } - - complete: - OBJ_RELEASE(size_dfs); -} - -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get_size called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SIZE_CMD; - dfs->local_fd = fd; - dfs->size_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); -} - - -static void process_seeks(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *seek_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(seek_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == seek_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(seek_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < seek_dfs->read_length && - SEEK_SET == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < (off_t)(trk->location + seek_dfs->read_length) && - SEEK_CUR == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else { - lseek(trk->remote_fd, seek_dfs->read_length, seek_dfs->remote_fd); - if (SEEK_SET == seek_dfs->remote_fd) { - trk->location = seek_dfs->read_length; - } else { - trk->location += seek_dfs->read_length; - } - } - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - seek_dfs->id = req_id++; - opal_list_append(&requests, &seek_dfs->super); - - /* setup a message for the daemon telling - * them what file to seek - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &seek_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)seek_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending seek file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - OBJ_RELEASE(seek_dfs); -} - - -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SEEK_CMD; - dfs->local_fd = fd; - dfs->read_length = offset; - dfs->remote_fd = whence; - dfs->seek_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); -} - -static void process_reads(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - long nbytes; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(read_dfs); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == read_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(read_dfs); - return; - } - - /* if the file is local, read the desired bytes */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - nbytes = read(trk->remote_fd, read_dfs->read_buffer, read_dfs->read_length); - if (0 < nbytes) { - /* update our location */ - trk->location += nbytes; - } - /* pass them back to the caller */ - if (NULL != read_dfs->read_cbfunc) { - read_dfs->read_cbfunc(nbytes, read_dfs->read_buffer, read_dfs->cbdata); - } - /* request is complete */ - OBJ_RELEASE(read_dfs); - return; - } - /* add this request to our pending list */ - read_dfs->id = req_id++; - opal_list_append(&requests, &read_dfs->super); - - /* setup a message for the daemon telling - * them what file to read - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)read_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending read file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - } - /* don't release the request */ - return; - - complete: - /* don't need to hang on to this request */ - opal_list_remove_item(&requests, &read_dfs->super); - OBJ_RELEASE(read_dfs); -} - -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_READ_CMD; - dfs->local_fd = fd; - dfs->read_buffer = buffer; - dfs->read_length = length; - dfs->read_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); -} - -static void process_posts(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_jobfm_t *jptr, *jfm; - orte_dfs_vpidfm_t *vptr, *vfm; - opal_list_item_t *item; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s posting file map containing %d bytes for target %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)dfs->bptr->bytes_used, ORTE_NAME_PRINT(&dfs->target)); - - /* lookup the job map */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == dfs->target.jobid) { - jfm = jptr; - break; - } - } - if (NULL == jfm) { - /* add it */ - jfm = OBJ_NEW(orte_dfs_jobfm_t); - jfm->jobid = dfs->target.jobid; - opal_list_append(&file_maps, &jfm->super); - } - /* see if we already have an entry for this source */ - vfm = NULL; - for (item = opal_list_get_first(&jfm->maps); - item != opal_list_get_end(&jfm->maps); - item = opal_list_get_next(item)) { - vptr = (orte_dfs_vpidfm_t*)item; - if (vptr->vpid == dfs->target.vpid) { - vfm = vptr; - break; - } - } - if (NULL == vfm) { - /* add it */ - vfm = OBJ_NEW(orte_dfs_vpidfm_t); - vfm->vpid = dfs->target.vpid; - opal_list_append(&jfm->maps, &vfm->super); - } - - /* add this entry to our collection */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&vfm->data, &dfs->bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - vfm->num_entries++; - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s target %s now has %d entries", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&dfs->target), - vfm->num_entries); - - cleanup: - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_post_file_map(opal_buffer_t *buffer, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_POST_CMD; - dfs->target.jobid = ORTE_PROC_MY_NAME->jobid; - dfs->target.vpid = ORTE_PROC_MY_NAME->vpid; - dfs->bptr = buffer; - dfs->post_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); -} - -static int get_job_maps(orte_dfs_jobfm_t *jfm, - orte_vpid_t vpid, - opal_buffer_t *buf) -{ - orte_dfs_vpidfm_t *vfm; - opal_list_item_t *item; - int rc; - int entries=0; - - /* if the target vpid is WILDCARD, then process - * data for all vpids - else, find the one - */ - for (item = opal_list_get_first(&jfm->maps); - item != opal_list_get_end(&jfm->maps); - item = opal_list_get_next(item)) { - vfm = (orte_dfs_vpidfm_t*)item; - if (ORTE_VPID_WILDCARD == vpid || - vfm->vpid == vpid) { - entries++; - /* indicate data from this vpid */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &vfm->vpid, 1, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return -1; - } - /* pack the number of posts we received from it */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &vfm->num_entries, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return -1; - } - /* copy the data across */ - opal_dss.copy_payload(buf, &vfm->data); - } - } - return entries; -} - -static void process_getfm(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_jobfm_t *jfm; - opal_list_item_t *item; - opal_buffer_t xfer; - int32_t n, ntotal; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* if the target job is WILDCARD, then process - * data for all jobids - else, find the one - */ - ntotal = 0; - n = -1; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jfm = (orte_dfs_jobfm_t*)item; - if (ORTE_JOBID_WILDCARD == dfs->target.jobid || - jfm->jobid == dfs->target.jobid) { - n = get_job_maps(jfm, dfs->target.vpid, &dfs->bucket); - if (n < 0) { - break; - } - ntotal += n; - } - } - - if (n < 0) { - /* indicates an error */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - } else { - OBJ_CONSTRUCT(&xfer, opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(&xfer, &ntotal, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&xfer); - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - return; - } - opal_dss.copy_payload(&xfer, &dfs->bucket); - /* pass it back to caller */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(&xfer, dfs->cbdata); - } - OBJ_DESTRUCT(&xfer); - } - OBJ_RELEASE(dfs); -} - -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get file map for %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(target)); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_GETFM_CMD; - dfs->target.jobid = target->jobid; - dfs->target.vpid = target->vpid; - dfs->fm_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); -} - -static void process_load(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_list_item_t *item; - orte_dfs_jobfm_t *jfm, *jptr; - orte_dfs_vpidfm_t *vfm; - orte_vpid_t vpid; - int32_t entries, nvpids; - int cnt, i, j; - int rc; - opal_buffer_t *xfer; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* see if we already have a tracker for this job */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == dfs->target.jobid) { - jfm = jptr; - break; - } - } - if (NULL != jfm) { - /* need to purge it first */ - while (NULL != (item = opal_list_remove_first(&jfm->maps))) { - OBJ_RELEASE(item); - } - } else { - jfm = OBJ_NEW(orte_dfs_jobfm_t); - jfm->jobid = dfs->target.jobid; - opal_list_append(&file_maps, &jfm->super); - } - - /* retrieve the number of vpids in the map */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &nvpids, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s loading file maps from %d vpids", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nvpids); - - /* unpack the buffer */ - for (i=0; i < nvpids; i++) { - /* unpack this vpid */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &vpid, &cnt, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* unpack the number of file maps in this entry */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &entries, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s loading %d entries in file map for vpid %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - entries, ORTE_VPID_PRINT(vpid)); - /* create the entry */ - vfm = OBJ_NEW(orte_dfs_vpidfm_t); - vfm->vpid = vpid; - vfm->num_entries = entries; - /* copy the data */ - for (j=0; j < entries; j++) { - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &xfer, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(&vfm->data, &xfer, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - OBJ_RELEASE(xfer); - } - opal_list_append(&jfm->maps, &vfm->super); - } - - complete: - if (NULL != dfs->load_cbfunc) { - dfs->load_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *buf, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s loading file maps for %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid)); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_LOAD_CMD; - dfs->target.jobid = jobid; - dfs->bptr = buf; - dfs->load_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_load, ORTE_SYS_PRI); -} - -static void process_purge(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_list_item_t *item; - orte_dfs_jobfm_t *jfm, *jptr; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* find the job tracker */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == dfs->target.jobid) { - jfm = jptr; - break; - } - } - if (NULL == jfm) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - } else { - /* remove it from the list */ - opal_list_remove_item(&file_maps, &jfm->super); - /* the destructor will release the list of maps - * in the jobfm object - */ - OBJ_RELEASE(jfm); - } - - if (NULL != dfs->purge_cbfunc) { - dfs->purge_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s purging file maps for job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid)); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_PURGE_CMD; - dfs->target.jobid = jobid; - dfs->purge_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_purge, ORTE_SYS_PRI); -} - - -/* receives take place in an event, so we are free to process - * the request list without fear of getting things out-of-order - */ -static void recv_dfs_cmd(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - opal_list_item_t *item; - int my_fd; - int32_t rc, nmaps; - char *filename; - orte_dfs_tracker_t *trk; - int64_t i64, bytes_read; - uint8_t *read_buf; - uint64_t rid; - int whence; - struct stat buf; - orte_process_name_t source; - opal_buffer_t *bptr, *xfer; - orte_dfs_request_t *dfs; - orte_dfs_jobfm_t *jfm, *jptr; - orte_dfs_vpidfm_t *vfm, *vptr; - opal_buffer_t *answer, bucket; - int i, j; - orte_vpid_t vpid; - int32_t nentries, ncontributors; - worker_req_t *wrkr; - - /* unpack the command */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s received command %d from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the filename */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &filename, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return; - } - /* create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = sender->jobid; - trk->requestor.vpid = sender->vpid; - trk->host_daemon.jobid = ORTE_PROC_MY_NAME->jobid; - trk->host_daemon.vpid = ORTE_PROC_MY_NAME->vpid; - trk->filename = strdup(filename); - opal_list_append(&active_files, &trk->super); - /* process the request */ - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->trk = trk; - wrkr->rid = rid; - ORTE_DFS_POST_WORKER(wrkr, remote_open); - return; - } - /* no worker threads, so attempt to open the file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - if (0 > (my_fd = open(filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - goto answer_open; - } - trk->local_fd = my_fd; - answer_open: - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &my_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_CLOSE_CMD: - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* find the corresponding tracker */ - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - /* remove it */ - opal_list_remove_item(&active_files, item); - OBJ_RELEASE(item); - /* close the file */ - close(my_fd); - break; - } - } - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* find the corresponding tracker */ - i64 = -1; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - /* process the request */ - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->trk = trk; - wrkr->rid = rid; - ORTE_DFS_POST_WORKER(wrkr, remote_size); - return; - } - /* no worker threads, so stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - } else { - i64 = buf.st_size; - } - break; - } - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_SEEK_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the offset */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the whence */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &whence, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* set default error */ - bytes_read = -1; - /* find the corresponding tracker - we do this to ensure - * that the local fd we were sent is actually open - */ - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - /* process the request */ - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->trk = trk; - wrkr->rid = rid; - wrkr->nbytes = i64; - wrkr->whence = whence; - ORTE_DFS_POST_WORKER(wrkr, remote_seek); - return; - } - /* no worker threads, so stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - } else if (buf.st_size < i64 && SEEK_SET == whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek SET past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - bytes_read = -2; - } else if (buf.st_size < (off_t)(trk->location + i64) && - SEEK_CUR == whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek CUR past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - bytes_read = -3; - } else { - lseek(my_fd, i64, whence); - if (SEEK_SET == whence) { - trk->location = i64; - } else { - trk->location += i64; - } - bytes_read = i64; - } - break; - } - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* return the offset/status */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &bytes_read, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending %ld offset back to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)bytes_read, - ORTE_NAME_PRINT(sender)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_READ_CMD: - /* set default error */ - my_fd = -1; - bytes_read = -1; - read_buf = NULL; - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto answer_read; - } - /* unpack the number of bytes to read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto answer_read; - } - /* find the corresponding tracker - we do this to ensure - * that the local fd we were sent is actually open - */ - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->rid = rid; - wrkr->trk = trk; - wrkr->nbytes = i64; - /* dispatch to the currently indexed thread */ - ORTE_DFS_POST_WORKER(wrkr, remote_read); - return; - } else { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s reading %ld bytes from local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)i64, my_fd); - /* do the read */ - read_buf = (uint8_t*)malloc(i64); - if (NULL == read_buf) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto answer_read; - } - bytes_read = read(my_fd, read_buf, (long)i64); - if (0 < bytes_read) { - /* update our location */ - trk->location += bytes_read; - } - } - break; - } - } - answer_read: - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - if (NULL != read_buf) { - free(read_buf); - } - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - if (NULL != read_buf) { - free(read_buf); - } - return; - } - /* include the number of bytes read */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &bytes_read, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - if (NULL != read_buf) { - free(read_buf); - } - return; - } - /* include the bytes read */ - if (0 < bytes_read) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, read_buf, bytes_read, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - free(read_buf); - return; - } - } - if (NULL != read_buf) { - free(read_buf); - } - /* send it */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending %ld bytes back to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)bytes_read, - ORTE_NAME_PRINT(sender)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_POST_CMD: - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s received post command from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender)); - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto answer_post; - } - /* unpack the name of the source of this data */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &source, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto answer_post; - } - /* unpack their buffer object */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto answer_post; - } - /* add the contents to the storage for this process */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->target.jobid = source.jobid; - dfs->target.vpid = source.vpid; - dfs->bptr = bptr; - dfs->post_cbfunc = NULL; - process_posts(0, 0, (void*)dfs); - OBJ_RELEASE(bptr); - answer_post: - if (UINT64_MAX != rid) { - /* return an ack */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } - } - break; - - case ORTE_DFS_RELAY_POSTS_CMD: - /* unpack the name of the source of this data */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &source, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s received relayed posts from sender %s for source %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender), - ORTE_NAME_PRINT(&source)); - /* lookup the job map */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == source.jobid) { - jfm = jptr; - break; - } - } - if (NULL == jfm) { - /* add it */ - jfm = OBJ_NEW(orte_dfs_jobfm_t); - jfm->jobid = source.jobid; - opal_list_append(&file_maps, &jfm->super); - } - /* see if we already have an entry for this source */ - vfm = NULL; - for (item = opal_list_get_first(&jfm->maps); - item != opal_list_get_end(&jfm->maps); - item = opal_list_get_next(item)) { - vptr = (orte_dfs_vpidfm_t*)item; - if (vptr->vpid == source.vpid) { - vfm = vptr; - break; - } - } - if (NULL == vfm) { - /* add it */ - vfm = OBJ_NEW(orte_dfs_vpidfm_t); - vfm->vpid = source.vpid; - opal_list_append(&jfm->maps, &vfm->super); - } - /* unpack their buffer object */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - return; - } - /* the buffer object came from a call to get_file_maps, so it isn't quite - * the same as when someone posts directly to us. So process it here by - * starting with getting the number of vpids that contributed. This - * should always be one, but leave it open for flexibility - */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &ncontributors, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - /* loop thru the number of contributors */ - for (i=0; i < ncontributors; i++) { - /* unpack the vpid of the contributor */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &vpid, &cnt, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the number of entries */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &nentries, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - for (j=0; j < nentries; j++) { - /* get the entry */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &xfer, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - return; - } - /* store it */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&vfm->data, &xfer, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - return; - } - OBJ_RELEASE(xfer); - vfm->num_entries++; - } - } - OBJ_RELEASE(bptr); - /* no reply required */ - break; - - case ORTE_DFS_GETFM_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the target */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &source, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return; - } - /* construct the response */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our data tree for matches, assembling them - * into a byte object - */ - /* if the target job is WILDCARD, then process - * data for all jobids - else, find the one - */ - OBJ_CONSTRUCT(&bucket, opal_buffer_t); - nmaps = 0; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jfm = (orte_dfs_jobfm_t*)item; - if (ORTE_JOBID_WILDCARD == source.jobid || - jfm->jobid == source.jobid) { - rc = get_job_maps(jfm, source.vpid, &bucket); - if (rc < 0) { - break; - } else { - nmaps += rc; - } - } - } - if (rc < 0) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - } else { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &nmaps, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - if (0 < nmaps) { - opal_dss.copy_payload(answer, &bucket); - } - } - OBJ_DESTRUCT(&bucket); - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s getf-cmd: returning %d maps with %d bytes to sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nmaps, - (int)answer->bytes_used, ORTE_NAME_PRINT(sender)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } - break; - - default: - opal_output(0, "ORTED:DFS:RECV_DFS WTF"); - break; - } -} - -static void recv_dfs_data(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - orte_dfs_request_t *dfs, *dptr; - opal_list_item_t *item; - int remote_fd, rc; - int64_t i64; - uint64_t rid; - orte_dfs_tracker_t *trk; - - /* unpack the command this message is responding to */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data cmd %d from sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack the request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the remote fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* as the request has been fulfilled, remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data open file - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* if the remote_fd < 0, then we had an error, so return - * the error value to the caller - */ - if (remote_fd < 0) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data open file response error file %s [error: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(remote_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->host_daemon.jobid = sender->jobid; - trk->host_daemon.vpid = sender->vpid; - trk->filename = strdup(dfs->uri); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* return the local_fd to the caller for - * subsequent operations - */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data open file completed for file %s [local fd: %d remote fd: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, trk->local_fd, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack the request id for this request */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data size - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the size */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->size_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_READ_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data read - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the bytes read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - if (0 < i64) { - cnt = i64; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - default: - opal_output(0, "ORTED:DFS:RECV:DATA WTF"); - break; - } -} - -static void* worker_thread_engine(opal_object_t *obj) -{ - opal_thread_t *thread = (opal_thread_t*)obj; - worker_thread_t *ptr = (worker_thread_t*)thread->t_arg; - - while (ptr->active) { - opal_event_loop(ptr->event_base, OPAL_EVLOOP_ONCE); - } - return OPAL_THREAD_CANCELLED; -} - -static void remote_open(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_OPEN_CMD; - int rc; - - /* attempt to open the file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - if (0 > (req->trk->local_fd = open(req->trk->filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->trk->local_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } -} - -static void remote_size(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - int rc; - struct stat buf; - int64_t i64; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_SIZE_CMD; - - if (0 > stat(req->trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - } else { - i64 = buf.st_size; - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } -} - -static void remote_seek(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_SEEK_CMD; - int rc; - struct stat buf; - int64_t i64; - - /* stat the file and get its size */ - if (0 > stat(req->trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - } else if (buf.st_size < req->nbytes && SEEK_SET == req->whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek SET past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - i64 = -2; - } else if (buf.st_size < (off_t)(req->trk->location + req->nbytes) && - SEEK_CUR == req->whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek CUR past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - i64 = -3; - } else { - lseek(req->trk->local_fd, req->nbytes, req->whence); - if (SEEK_SET == req->whence) { - req->trk->location = req->nbytes; - } else { - req->trk->location += req->nbytes; - } - i64 = req->nbytes; - } - - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } -} - -static void remote_read(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - uint8_t *read_buf; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_READ_CMD; - int64_t bytes_read; - int rc; - - /* do the read */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s issuing read", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - read_buf = (uint8_t*)malloc(req->nbytes); - if (NULL == read_buf) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return; - } - bytes_read = read(req->trk->local_fd, read_buf, (long)req->nbytes); - if (0 < bytes_read) { - /* update our location */ - req->trk->location += bytes_read; - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - OBJ_RELEASE(answer); - return; - } - /* include the number of bytes read */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &bytes_read, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - OBJ_RELEASE(answer); - return; - } - /* include the bytes read */ - if (0 < bytes_read) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, read_buf, bytes_read, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - OBJ_RELEASE(answer); - return; - } - } - free(read_buf); - /* send it */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending %ld bytes back to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)bytes_read, - ORTE_NAME_PRINT(&req->trk->requestor)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - OBJ_RELEASE(req); -} diff --git a/orte/mca/dfs/orted/dfs_orted.h b/orte/mca/dfs/orted/dfs_orted.h deleted file mode 100644 index b2b2f44096..0000000000 --- a/orte/mca/dfs/orted/dfs_orted.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_dfs_orted_EXPORT_H -#define MCA_dfs_orted_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/dfs/dfs.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_dfs_base_component_t mca_dfs_orted_component; - -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs_orted_module; - -extern int orte_dfs_orted_num_worker_threads; - -END_C_DECLS - -#endif /* MCA_dfs_orted_EXPORT_H */ diff --git a/orte/mca/dfs/orted/dfs_orted_component.c b/orte/mca/dfs/orted/dfs_orted_component.c deleted file mode 100644 index f102b898b1..0000000000 --- a/orte/mca/dfs/orted/dfs_orted_component.c +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_orted.h" - -/* - * Public string for version number - */ -const char *orte_dfs_orted_component_version_string = - "ORTE DFS orted MCA component version " ORTE_VERSION; - -int orte_dfs_orted_num_worker_threads = 0; - -/* - * Local functionality - */ -static int dfs_orted_register(void); -static int dfs_orted_open(void); -static int dfs_orted_close(void); -static int dfs_orted_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_dfs_base_component_t mca_dfs_orted_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component itdefault_orted - */ - .base_version = { - ORTE_DFS_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "orted", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dfs_orted_open, - .mca_close_component = dfs_orted_close, - .mca_query_component = dfs_orted_component_query, - .mca_register_component_params = dfs_orted_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int dfs_orted_register(void) -{ - orte_dfs_orted_num_worker_threads = 0; - (void) mca_base_component_var_register(&mca_dfs_orted_component.base_version, "num_worker_threads", - "Number of worker threads to use for processing file requests", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &orte_dfs_orted_num_worker_threads); - - return ORTE_SUCCESS; -} - -static int dfs_orted_open(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_orted_close(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_orted_component_query(mca_base_module_t **module, int *priority) -{ - if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { - /* we are the default component for daemons and HNP */ - *priority = 1000; - *module = (mca_base_module_t *)&orte_dfs_orted_module; - return ORTE_SUCCESS; - } - - *priority = -1; - *module = NULL; - return ORTE_ERROR; -} - diff --git a/orte/mca/dfs/orted/owner.txt b/orte/mca/dfs/orted/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/orte/mca/dfs/orted/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/dfs/test/Makefile.am b/orte/mca/dfs/test/Makefile.am deleted file mode 100644 index 1abd1f6dbc..0000000000 --- a/orte/mca/dfs/test/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dfs_test.h \ - dfs_test_component.c \ - dfs_test.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_dfs_test_DSO -component_noinst = -component_install = mca_dfs_test.la -else -component_noinst = libmca_dfs_test.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dfs_test_la_SOURCES = $(sources) -mca_dfs_test_la_LDFLAGS = -module -avoid-version -mca_dfs_test_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dfs_test_la_SOURCES =$(sources) -libmca_dfs_test_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/dfs/test/dfs_test.c b/orte/mca/dfs/test/dfs_test.c deleted file mode 100644 index 24392e013d..0000000000 --- a/orte/mca/dfs/test/dfs_test.c +++ /dev/null @@ -1,1149 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#include - -#include "opal/util/if.h" -#include "opal/util/output.h" -#include "opal/util/uri.h" -#include "opal/dss/dss.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/dfs/base/base.h" -#include "dfs_test.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); - -/****************** - * TEST module - ******************/ -orte_dfs_base_module_t orte_dfs_test_module = { - init, - finalize, - dfs_open, - dfs_close, - dfs_get_file_size, - dfs_seek, - dfs_read, - dfs_post_file_map, - dfs_get_file_map, - dfs_load_file_maps, - dfs_purge_file_maps -}; - -static opal_list_t requests, active_files; -static int local_fd = 0; -static uint64_t req_id = 0; -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - -static int init(void) -{ - OBJ_CONSTRUCT(&requests, opal_list_t); - OBJ_CONSTRUCT(&active_files, opal_list_t); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_DATA, - ORTE_RML_PERSISTENT, - recv_dfs, - NULL); - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_DATA); - while (NULL != (item = opal_list_remove_first(&requests))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&requests); - while (NULL != (item = opal_list_remove_first(&active_files))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&active_files); - return ORTE_SUCCESS; -} - -/* receives take place in an event, so we are free to process - * the request list without fear of getting things out-of-order - */ -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - orte_dfs_request_t *dfs, *dptr; - opal_list_item_t *item; - int remote_fd, rc; - int64_t i64; - uint64_t rid; - orte_dfs_tracker_t *trk; - - /* unpack the command this message is responding to */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd cmd %d from sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack the request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the remote fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* as the request has been fulfilled, remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* if the remote_fd < 0, then we had an error, so return - * the error value to the caller - */ - if (remote_fd < 0) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file response error file %s [error: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(remote_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->host_daemon.jobid = sender->jobid; - trk->host_daemon.vpid = sender->vpid; - trk->filename = strdup(dfs->uri); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* return the local_fd to the caller for - * subsequent operations - */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file completed for file %s [local fd: %d remote fd: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, trk->local_fd, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack the request id for this request */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd size - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the size */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->size_cbfunc) { - dfs->size_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SEEK_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd seek - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the returned offset/status */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->seek_cbfunc) { - dfs->seek_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_READ_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd read - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the bytes read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - if (0 < i64) { - cnt = i64; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_POST_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd post - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_GETFM_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd getfm - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* return it to caller */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(buffer, dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - default: - opal_output(0, "TEST:DFS:RECV WTF"); - break; - } -} - -static void process_opens(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - int rc; - opal_buffer_t *buffer; - char *scheme, *host=NULL, *filename=NULL; - orte_process_name_t daemon; - opal_list_t lt; - opal_namelist_t *nm; - - ORTE_ACQUIRE_OBJECT(dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s PROCESSING OPEN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* get the scheme to determine if we can process locally or not */ - if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s GOT SCHEME", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - if (0 != strcmp(scheme, "file")) { - /* not yet supported */ - orte_show_help("orte_dfs_help.txt", "unsupported-filesystem", - true, dfs->uri); - free(scheme); - goto complete; - } - free(scheme); - - /* dissect the uri to extract host and filename/path */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) { - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s GOT FILENAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), filename); - if (NULL == host) { - host = strdup(orte_process_info.nodename); - } - - /* ident the daemon on that host */ - daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - OBJ_CONSTRUCT(<, opal_list_t); - if (ORTE_SUCCESS != (rc = opal_pmix.resolve_peers(host, daemon.jobid, <))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(<); - goto complete; - } - nm = (opal_namelist_t*)opal_list_get_first(<); - daemon.vpid = nm->name.vpid; - OPAL_LIST_DESTRUCT(<); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on host %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, host, ORTE_NAME_PRINT(&daemon)); - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending open file request to %s file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&daemon), - filename); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* don't release it */ - free(host); - free(filename); - return; - - complete: - /* we get here if an error occurred - execute any - * pending callback so the proc doesn't hang - */ - if (NULL != host) { - free(host); - } - if (NULL != filename) { - free(filename); - } - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - - -/* in order to handle the possible opening/reading of files by - * multiple threads, we have to ensure that all operations are - * carried out in events - so the "open" cmd simply posts an - * event containing the required info, and then returns - */ -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uri); - - /* setup the request */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_OPEN_CMD; - dfs->uri = strdup(uri); - dfs->open_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); -} - -static void process_close(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(close_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s closing fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - close_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == close_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); - return; - } - - /* setup a message for the daemon telling - * them what file to close - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending close file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - opal_list_remove_item(&active_files, &trk->super); - OBJ_RELEASE(trk); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); -} - -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s close called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_CLOSE_CMD; - dfs->local_fd = fd; - dfs->close_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); -} - -static void process_sizes(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(size_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing get_size on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - size_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == size_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(size_dfs); - return; - } - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - size_dfs->id = req_id++; - opal_list_append(&requests, &size_dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending get_size request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &size_dfs->super); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - goto complete; - } - /* leave the request there */ - return; - - complete: - OBJ_RELEASE(size_dfs); -} - -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get_size called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SIZE_CMD; - dfs->local_fd = fd; - dfs->size_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); -} - - -static void process_seeks(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *seek_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(seek_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == seek_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(seek_dfs); - return; - } - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - seek_dfs->id = req_id++; - opal_list_append(&requests, &seek_dfs->super); - - /* setup a message for the daemon telling - * them what file to seek - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &seek_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)seek_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending seek file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - /* leave the request */ - return; - - complete: - OBJ_RELEASE(seek_dfs); -} - - -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SEEK_CMD; - dfs->local_fd = fd; - dfs->read_length = offset; - dfs->remote_fd = whence; - dfs->seek_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); -} - -static void process_reads(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(read_dfs); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == read_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(read_dfs); - return; - } - - /* add this request to our pending list */ - read_dfs->id = req_id++; - opal_list_append(&requests, &read_dfs->super); - - /* setup a message for the daemon telling - * them what file to read - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)read_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending read file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - } - /* don't release the request */ - return; - - complete: - /* don't need to hang on to this request */ - opal_list_remove_item(&requests, &read_dfs->super); - OBJ_RELEASE(read_dfs); -} - -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_READ_CMD; - dfs->local_fd = fd; - dfs->read_buffer = buffer; - dfs->read_length = length; - dfs->read_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); -} - -static void process_posts(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the buffer's contents to our local daemon for storage */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* add my name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* pack the payload */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_POST_CMD; - dfs->bptr = bo; - dfs->post_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); -} - -static void process_getfm(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the request to our local daemon */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* and the target */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->target, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_GETFM_CMD; - dfs->target.jobid = target->jobid; - dfs->target.vpid = target->vpid; - dfs->fm_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); -} - -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} - -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} diff --git a/orte/mca/dfs/test/dfs_test.h b/orte/mca/dfs/test/dfs_test.h deleted file mode 100644 index d9ef7b301b..0000000000 --- a/orte/mca/dfs/test/dfs_test.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_dfs_test_EXPORT_H -#define MCA_dfs_test_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/dfs/dfs.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_dfs_base_component_t mca_dfs_test_component; - -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs_test_module; - -END_C_DECLS - -#endif /* MCA_dfs_test_EXPORT_H */ diff --git a/orte/mca/dfs/test/dfs_test_component.c b/orte/mca/dfs/test/dfs_test_component.c deleted file mode 100644 index 11ec09ced4..0000000000 --- a/orte/mca/dfs/test/dfs_test_component.c +++ /dev/null @@ -1,100 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_test.h" - -/* - * Public string for version number - */ -const char *orte_dfs_test_component_version_string = - "ORTE DFS test MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int dfs_test_register(void); -static int dfs_test_open(void); -static int dfs_test_close(void); -static int dfs_test_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_dfs_base_component_t mca_dfs_test_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component - */ - .base_version = { - ORTE_DFS_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "test", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dfs_test_open, - .mca_close_component = dfs_test_close, - .mca_query_component = dfs_test_component_query, - .mca_register_component_params = dfs_test_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static bool select_me = false; - -static int dfs_test_register(void) -{ - select_me = false; - (void) mca_base_component_var_register(&mca_dfs_test_component.base_version, "select", - "Apps select the test plug-in for the DFS framework", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, &select_me); - - return ORTE_SUCCESS; -} - -static int dfs_test_open(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_test_close(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_test_component_query(mca_base_module_t **module, int *priority) -{ - if (ORTE_PROC_IS_APP && select_me) { - /* set our priority high so apps use us */ - *priority = 10000; - *module = (mca_base_module_t *)&orte_dfs_test_module; - return ORTE_SUCCESS; - } - - *priority = -1; - *module = NULL; - return ORTE_ERROR; -} diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 533a054e8b..bfbbc3d9f6 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -45,7 +45,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_alps_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index 679bac4b19..d8706b7bab 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,7 +63,6 @@ ORTE_DECLSPEC int orte_ess_base_std_prolog(void); ORTE_DECLSPEC int orte_ess_base_app_setup(bool db_restrict_local); ORTE_DECLSPEC int orte_ess_base_app_finalize(void); -ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report); ORTE_DECLSPEC int orte_ess_base_tool_setup(opal_list_t *flags); ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index a02711f5f4..f0a7b848d2 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -48,17 +48,10 @@ #include "opal/util/proc.h" #include "opal/runtime/opal.h" -#include "orte/mca/rml/base/base.h" -#include "orte/mca/routed/base/base.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/dfs/base/base.h" -#include "orte/mca/grpcomm/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/filem/base/base.h" #include "orte/mca/errmgr/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/state/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" @@ -75,7 +68,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) { int ret; char *error = NULL; - opal_list_t transports; OPAL_TIMING_ENV_INIT(ess_base_setup); /* @@ -166,48 +158,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) } OPAL_TIMING_ENV_NEXT(ess_base_setup, "create_session_dirs"); - /* Setup the communication infrastructure */ - /* Routed system */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_routed_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_routed_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "routed_framework_open"); - - /* - * OOB Layer - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_oob_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_oob_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "oob_framework_open"); - - /* Runtime Messaging Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_rml_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_rml_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_framework_open"); - /* if we have info on the HNP and local daemon, process it */ if (NULL != orte_process_info.my_hnp_uri) { /* we have to set the HNP's name, even though we won't route messages directly @@ -260,56 +210,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) } OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_select"); - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_mgmt_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_coll_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_open_conduit"); - - /* - * Group communications - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_grpcomm_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_grpcomm_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "grpcomm_framework_open"); - - /* open the distributed file system */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "dfs_framework_open"); - return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", @@ -320,25 +220,14 @@ int orte_ess_base_app_setup(bool db_restrict_local) int orte_ess_base_app_finalize(void) { - /* release the conduits */ - orte_rml.close_conduit(orte_mgmt_conduit); - orte_rml.close_conduit(orte_coll_conduit); - /* close frameworks */ (void) mca_base_framework_close(&orte_filem_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); - /* now can close the rml and its friendly group comm */ - (void) mca_base_framework_close(&orte_grpcomm_base_framework); - (void) mca_base_framework_close(&orte_dfs_base_framework); - (void) mca_base_framework_close(&orte_routed_base_framework); - - (void) mca_base_framework_close(&orte_rml_base_framework); if (NULL != opal_pmix.finalize) { opal_pmix.finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); } - (void) mca_base_framework_close(&orte_oob_base_framework); (void) mca_base_framework_close(&orte_state_base_framework); if (NULL == opal_pmix.register_cleanup) { @@ -349,59 +238,3 @@ int orte_ess_base_app_finalize(void) return ORTE_SUCCESS; } - -/* - * We do NOT call the regular C-library "abort" function, even - * though that would have alerted us to the fact that this is - * an abnormal termination, because it would automatically cause - * a core file to be generated. On large systems, that can be - * overwhelming (imagine a few thousand Gbyte-sized files hitting - * a shared file system simultaneously...ouch!). - * - * However, this causes a problem for OpenRTE as the system truly - * needs to know that this actually IS an abnormal termination. - * To get around the problem, we drop a marker in the proc-level - * session dir. If session dir's were not allowed, then we just - * ignore this question. - * - * In some cases, however, we DON'T want to create that alert. For - * example, if an orted detects that the HNP has died, then there - * is truly nobody to alert! In these cases, we pass report=false - * to indicate that we don't want the marker dropped. - */ -void orte_ess_base_app_abort(int status, bool report) -{ - int fd; - char *myfile; - struct timespec tp = {0, 100000}; - - /* Exit - do NOT do a normal finalize as this will very likely - * hang the process. We are aborting due to an abnormal condition - * that precludes normal cleanup - * - * We do need to do the following bits to make sure we leave a - * clean environment. Taken from orte_finalize(): - * - Assume errmgr cleans up child processes before we exit. - */ - - /* If we were asked to report this termination, do so. - * Since singletons don't start an HNP unless necessary, and - * direct-launched procs don't have daemons at all, only send - * the message if routing is enabled as this indicates we - * have someone to send to - */ - if (report && orte_routing_is_enabled && orte_create_session_dirs) { - myfile = opal_os_path(false, orte_process_info.proc_session_dir, "aborted", NULL); - fd = open(myfile, O_CREAT, S_IRUSR); - close(fd); - /* now introduce a short delay to allow any pending - * messages (e.g., from a call to "show_help") to - * have a chance to be sent */ - nanosleep(&tp, NULL); - } - /* - Clean out the global structures - * (not really necessary, but good practice) */ - orte_proc_info_finalize(); - /* Now Exit */ - _exit(status); -} diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 7f50533880..9711bd64fd 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -52,7 +52,6 @@ #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/dfs/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/base/base.h" @@ -621,18 +620,6 @@ int orte_ess_base_orted_setup(void) goto error; } - /* setup the DFS framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_select"; - goto error; - } - return ORTE_SUCCESS; error: @@ -684,8 +671,6 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_iof_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); (void) mca_base_framework_close(&orte_plm_base_framework); - /* close the dfs so its threads can exit */ - (void) mca_base_framework_close(&orte_dfs_base_framework); /* make sure our local procs are dead */ orte_odls.kill_local_procs(NULL); (void) mca_base_framework_close(&orte_rtc_base_framework); diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index df55650e4a..ac107d08b1 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -77,7 +77,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_env_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL }; diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 70f79e67bf..e8e811645e 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -62,7 +62,6 @@ #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/rtc/base/base.h" -#include "orte/mca/dfs/base/base.h" #include "orte/mca/errmgr/base/base.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/base/base.h" @@ -699,18 +698,6 @@ static int rte_init(void) goto error; } - /* setup the dfs framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_select"; - goto error; - } - /* setup to support debugging */ orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS, orte_debugger_init_after_spawn, @@ -795,7 +782,6 @@ static int rte_finalize(void) /* shutdown the pmix server */ pmix_server_finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); - (void) mca_base_framework_close(&orte_dfs_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); /* output any lingering stdout/err data */ fflush(stdout); diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index 18d3ddc554..ec5fbfe724 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -53,7 +53,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_lsf_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 7729b9bc0f..b6b5262a89 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science @@ -63,11 +63,12 @@ static int rte_init(void); static int rte_finalize(void); +static void rte_abort(int status, bool report); orte_ess_base_module_t orte_ess_singleton_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + rte_abort, NULL /* ft_event */ }; @@ -584,3 +585,25 @@ static int fork_hnp(void) return ORTE_SUCCESS; } } + +static void rte_abort(int status, bool report) +{ + struct timespec tp = {0, 100000}; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, + "%s ess:singleton:abort: abort with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + status)); + + /* PMI doesn't like NULL messages, but our interface + * doesn't provide one - so rig one up here + */ + opal_pmix.abort(status, "N/A", NULL); + + /* provide a little delay for the PMIx thread to + * get the info out */ + nanosleep(&tp, NULL); + + /* Now Exit */ + _exit(status); +} diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 47dddbed49..de14bdc0e9 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -54,7 +54,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_slurm_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; diff --git a/orte/mca/ess/tm/ess_tm_module.c b/orte/mca/ess/tm/ess_tm_module.c index 20373798c4..16a6e74b1a 100644 --- a/orte/mca/ess/tm/ess_tm_module.c +++ b/orte/mca/ess/tm/ess_tm_module.c @@ -53,7 +53,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_tm_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; diff --git a/orte/mca/notifier/Makefile.am b/orte/mca/notifier/Makefile.am deleted file mode 100644 index 52444ea125..0000000000 --- a/orte/mca/notifier/Makefile.am +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_notifier.la -libmca_notifier_la_SOURCES = - -# local files -headers = notifier.h - -libmca_notifier_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ortedir = $(includedir)/openmpi/$(subdir) -nobase_orte_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/orte/mca/notifier/base/Makefile.am b/orte/mca/notifier/base/Makefile.am deleted file mode 100644 index 89171605f5..0000000000 --- a/orte/mca/notifier/base/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_notifier_la_SOURCES += \ - base/notifier_base_frame.c \ - base/notifier_base_select.c \ - base/notifier_base_fns.c diff --git a/orte/mca/notifier/base/base.h b/orte/mca/notifier/base/base.h deleted file mode 100644 index 2f944dfd9d..0000000000 --- a/orte/mca/notifier/base/base.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_NOTIFIER_BASE_H -#define MCA_NOTIFIER_BASE_H - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/base/base.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/notifier/notifier.h" - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_notifier_base_framework; - -typedef struct { - opal_event_base_t *ev_base; - bool ev_base_active; - opal_list_t modules; - orte_notifier_severity_t severity_level; - char *default_actions; - char *emerg_actions; - char *alert_actions; - char *crit_actions; - char *warn_actions; - char *notice_actions; - char *info_actions; - char *debug_actions; - char *error_actions; -} orte_notifier_base_t; - -/* - * Type for holding selected module / component pairs - */ -typedef struct { - opal_list_item_t super; - /* Component */ - orte_notifier_base_component_t *component; - /* Module */ - orte_notifier_base_module_t *module; -} orte_notifier_active_module_t; -OBJ_CLASS_DECLARATION(orte_notifier_active_module_t); - -ORTE_DECLSPEC extern orte_notifier_base_t orte_notifier_base; - -/* select a component */ -ORTE_DECLSPEC int orte_notifier_base_select(void); - -/* base functions */ -ORTE_DECLSPEC void orte_notifier_base_log(int sd, short args, void *cbdata); -ORTE_DECLSPEC void orte_notifier_base_event(int sd, short args, void *cbdata); -ORTE_DECLSPEC void orte_notifier_base_report(int sd, short args, void *cbdata); - -/* severity to string */ -ORTE_DECLSPEC const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity); -END_C_DECLS -#endif diff --git a/orte/mca/notifier/base/notifier_base_fns.c b/orte/mca/notifier/base/notifier_base_fns.c deleted file mode 100644 index 1a6751a208..0000000000 --- a/orte/mca/notifier/base/notifier_base_fns.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/util/argv.h" - -#include "orte/util/attr.h" -#include "orte/util/threads.h" -#include "orte/mca/notifier/base/base.h" - - -static void orte_notifier_base_identify_modules(char ***modules, - orte_notifier_request_t *req); - -void orte_notifier_base_log(int sd, short args, void *cbdata) -{ - orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; - char **modules = NULL; - orte_notifier_active_module_t *imod; - int i; - - ORTE_ACQUIRE_OBJECT(req); - - /* if no modules are active, then there is nothing to do */ - if (0 == opal_list_get_size(&orte_notifier_base.modules)) { - return; - } - - /* check if the severity is >= severity level set for - * reporting - note that the severity enum value goes up - * as severity goes down */ - if (orte_notifier_base.severity_level < req->severity ) { - return; - } - - orte_notifier_base_identify_modules(&modules, req); - - /* no modules selected then nothing to do */ - if (NULL == modules) { - return; - } - - for (i=0; NULL != modules[i]; i++) { - OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != imod->module->log && - 0 == strcmp(imod->component->base_version.mca_component_name, modules[i])) - imod->module->log(req); - } - } - opal_argv_free(modules); -} - -void orte_notifier_base_event(int sd, short args, void *cbdata) -{ - orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; - char **modules = NULL; - orte_notifier_active_module_t *imod; - int i; - - ORTE_ACQUIRE_OBJECT(req); - - /* if no modules are active, then there is nothing to do */ - if (0 == opal_list_get_size(&orte_notifier_base.modules)) { - return; - } - - /* check if the severity is >= severity level set for - * reporting - note that the severity enum value goes up - * as severity goes down */ - if (orte_notifier_base.severity_level < req->severity ) { - return; - } - - orte_notifier_base_identify_modules(&modules, req); - - /* no modules selected then nothing to do */ - if (NULL == modules) { - return; - } - - for (i=0; NULL != modules[i]; i++) { - OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != imod->module->log && - 0 == strcmp(imod->component->base_version.mca_component_name, modules[i])) - imod->module->event(req); - } - } - opal_argv_free(modules); -} - -void orte_notifier_base_report(int sd, short args, void *cbdata) -{ - orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; - char **modules = NULL; - orte_notifier_active_module_t *imod; - int i; - - ORTE_ACQUIRE_OBJECT(req); - - /* if no modules are active, then there is nothing to do */ - if (0 == opal_list_get_size(&orte_notifier_base.modules)) { - return; - } - - /* see if the job requested any notifications */ - if (!orte_get_attribute(&req->jdata->attributes, ORTE_JOB_NOTIFICATIONS, (void**)modules, OPAL_STRING)) { - return; - } - - /* need to process the notification string to get the names of the modules */ - if (NULL == modules) { - orte_notifier_base_identify_modules(&modules, req); - - /* no modules selected then nothing to do */ - if (NULL == modules) { - return; - } - } - - for (i=0; NULL != modules[i]; i++) { - OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != imod->module->log && - 0 == strcmp(imod->component->base_version.mca_component_name, modules[i])) - imod->module->report(req); - } - } - opal_argv_free(modules); -} - -const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity) -{ - switch (severity) { - case ORTE_NOTIFIER_EMERG: return "EMERGENCY"; break; - case ORTE_NOTIFIER_ALERT: return "ALERT"; break; - case ORTE_NOTIFIER_CRIT: return "CRITICAL"; break; - case ORTE_NOTIFIER_ERROR: return "ERROR"; break; - case ORTE_NOTIFIER_WARN: return "WARNING"; break; - case ORTE_NOTIFIER_NOTICE: return "NOTICE"; break; - case ORTE_NOTIFIER_INFO: return "INFO"; break; - case ORTE_NOTIFIER_DEBUG: return "DEBUG"; break; - default: return "UNKNOWN"; break; - } -} - -static void orte_notifier_base_identify_modules(char ***modules, - orte_notifier_request_t *req) -{ - if (NULL != req->action) { - *modules = opal_argv_split(req->action, ','); - } else { - if (ORTE_NOTIFIER_EMERG == req->severity && - (NULL != orte_notifier_base.emerg_actions)) { - *modules = opal_argv_split(orte_notifier_base.emerg_actions, ','); - } else if (ORTE_NOTIFIER_ALERT == req->severity && - (NULL != orte_notifier_base.alert_actions)) { - *modules = opal_argv_split(orte_notifier_base.alert_actions, ','); - } else if (ORTE_NOTIFIER_CRIT == req->severity && - (NULL != orte_notifier_base.crit_actions)) { - *modules = opal_argv_split(orte_notifier_base.crit_actions, ','); - } else if (ORTE_NOTIFIER_WARN == req->severity && - (NULL != orte_notifier_base.warn_actions)) { - *modules = opal_argv_split(orte_notifier_base.warn_actions, ','); - } else if (ORTE_NOTIFIER_NOTICE == req->severity && - (NULL != orte_notifier_base.notice_actions)) { - *modules = opal_argv_split(orte_notifier_base.notice_actions, ','); - } else if (ORTE_NOTIFIER_INFO == req->severity && - (NULL != orte_notifier_base.info_actions)) { - *modules = opal_argv_split(orte_notifier_base.info_actions, ','); - } else if (ORTE_NOTIFIER_DEBUG == req->severity && - (NULL != orte_notifier_base.debug_actions)) { - *modules = opal_argv_split(orte_notifier_base.debug_actions, ','); - } else if (ORTE_NOTIFIER_ERROR == req->severity && - (NULL != orte_notifier_base.error_actions)) { - *modules = opal_argv_split(orte_notifier_base.error_actions, ','); - } else if (NULL != orte_notifier_base.default_actions) { - *modules = opal_argv_split(orte_notifier_base.default_actions, ','); - } - } - return; -} diff --git a/orte/mca/notifier/base/notifier_base_frame.c b/orte/mca/notifier/base/notifier_base_frame.c deleted file mode 100644 index 207998c473..0000000000 --- a/orte/mca/notifier/base/notifier_base_frame.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/fd.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/runtime/opal_progress_threads.h" -#include "orte/mca/notifier/base/base.h" - -/* default module to use for logging*/ -#define ORTE_NOTIFIER_DEFAULT_MODULE "syslog" - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/notifier/base/static-components.h" - -/* - * Global variables - */ -opal_list_t orte_notifier_base_components_available = {{0}}; -int orte_notifier_debug_output = -1; - -orte_notifier_base_t orte_notifier_base = {0}; - -static char *notifier_severity = NULL; -static bool use_progress_thread = false; - -/** - * Function for selecting a set of components from all those that are - * available. - * - * Examples: - * 1) - * -mca notifier syslog,smtp - * --> syslog and smtp are selected for the loging - */ -static int orte_notifier_base_register(mca_base_register_flag_t flags) -{ - (void) mca_base_var_register("orte", "notifier", "base", "use_progress_thread", - "Use a dedicated progress thread for notifications [default: false]", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &use_progress_thread); - - /* let the user define a base level of severity to report */ - (void) mca_base_var_register("orte", "notifier", "base", "severity_level", - "Report all events at or above this severity [default: error]", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - ¬ifier_severity); - if (NULL == notifier_severity) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_ERROR; - } else if (0 == strncasecmp(notifier_severity, "emerg", strlen("emerg"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_EMERG; - } else if (0 == strncasecmp(notifier_severity, "alert", strlen("alert"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_ALERT; - } else if (0 == strncasecmp(notifier_severity, "crit", strlen("crit"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_CRIT; - } else if (0 == strncasecmp(notifier_severity, "warn", strlen("warn"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_WARN; - } else if (0 == strncasecmp(notifier_severity, "notice", strlen("notice"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_NOTICE; - } else if (0 == strncasecmp(notifier_severity, "info", strlen("info"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_INFO; - } else if (0 == strncasecmp(notifier_severity, "debug", strlen("debug"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_DEBUG; - } else { - orte_notifier_base.severity_level = ORTE_NOTIFIER_ERROR; - } - - /* let the user define a base default actions */ - orte_notifier_base.default_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "default_actions", - "Report all events to the default actions:NONE,syslog,smtp", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.default_actions); - - if (NULL == orte_notifier_base.default_actions) { - orte_notifier_base.default_actions = strdup(ORTE_NOTIFIER_DEFAULT_MODULE); - } - /* let the user define a action for emergency events */ - orte_notifier_base.emerg_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "emerg_event_actions", - "Report emergency events to the specified actions: example 'smtp'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.emerg_actions); - - /* let the user define a action for alert events */ - orte_notifier_base.alert_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "alert_event_actions", - "Report alert events to the specified actions: example 'smtp'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.alert_actions); - - /* let the user define a action for critical events */ - orte_notifier_base.crit_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "crit_event_actions", - "Report critical events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.crit_actions); - - /* let the user define a action for warning events */ - orte_notifier_base.warn_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "warn_event_actions", - "Report warning events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.warn_actions); - - /* let the user define a action for notice events */ - orte_notifier_base.notice_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "notice_event_actions", - "Report notice events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.notice_actions); - - /* let the user define a action for info events */ - orte_notifier_base.info_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "info_event_actions", - "Report info events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.info_actions); - - /* let the user define a action for debug events */ - orte_notifier_base.debug_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "debug_event_actions", - "Report debug events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.debug_actions); - - /* let the user define a action for error events */ - orte_notifier_base.error_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "error_event_actions", - "Report error events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.error_actions); - - return ORTE_SUCCESS; -} - -static int orte_notifier_base_close(void) -{ - orte_notifier_active_module_t *i_module; - - if (orte_notifier_base.ev_base_active) { - orte_notifier_base.ev_base_active = false; - opal_progress_thread_finalize("notifier"); - } - - OPAL_LIST_FOREACH(i_module, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != i_module->module->finalize) { - i_module->module->finalize(); - } - } - OPAL_LIST_DESTRUCT(&orte_notifier_base.modules); - - /* close all remaining available components */ - return mca_base_framework_components_close(&orte_notifier_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_notifier_base_open(mca_base_open_flag_t flags) -{ - int rc; - - /* construct the array of modules */ - OBJ_CONSTRUCT(&orte_notifier_base.modules, opal_list_t); - - /* if requested, create our own event base */ - if (use_progress_thread) { - orte_notifier_base.ev_base_active = true; - if (NULL == (orte_notifier_base.ev_base = - opal_progress_thread_init("notifier"))) { - orte_notifier_base.ev_base_active = false; - return ORTE_ERROR; - } - } else { - orte_notifier_base.ev_base = orte_event_base; - } - - /* Open up all available components */ - rc = mca_base_framework_components_open(&orte_notifier_base_framework, - flags); - orte_notifier_debug_output = orte_notifier_base_framework.framework_output; - return rc; -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, notifier, "ORTE Notifier Framework", - orte_notifier_base_register, - orte_notifier_base_open, orte_notifier_base_close, - mca_notifier_base_static_components, 0); - - -OBJ_CLASS_INSTANCE (orte_notifier_active_module_t, - opal_list_item_t, - NULL, NULL); - -static void req_cons (orte_notifier_request_t *r) -{ - r->jdata = NULL; - r->msg = NULL; - r->t = 0; -} -static void req_des(orte_notifier_request_t *r) -{ - if (NULL != r->jdata) { - OBJ_RELEASE(r->jdata); - } -} -OBJ_CLASS_INSTANCE (orte_notifier_request_t, - opal_object_t, - req_cons, req_des); diff --git a/orte/mca/notifier/base/notifier_base_select.c b/orte/mca/notifier/base/notifier_base_select.c deleted file mode 100644 index cdd9142ff2..0000000000 --- a/orte/mca/notifier/base/notifier_base_select.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/mca/notifier/base/base.h" - -/* Global variables */ -/* - * orte_notifier_base_selected is set to true if at least 1 module has - * been selected for the notifier log API interface. - */ -static bool orte_notifier_base_selected = false; - -/** - * Function for weeding out notifier components that don't want to run. - * - * Call the init function on all available compoenent to find out if - * they want to run. Select all components that don't fail. Failing - * Components will be closed and unloaded. The selected modules will - * be returned to the called in a opal_list_t. - */ - -int orte_notifier_base_select(void) -{ - mca_base_component_list_item_t *cli = NULL; - orte_notifier_base_component_t *component = NULL; - mca_base_module_t *module = NULL; - int priority; - orte_notifier_active_module_t *tmp_module; - orte_notifier_base_module_t *bmod; - - if (orte_notifier_base_selected) { - return ORTE_SUCCESS; - } - orte_notifier_base_selected = true; - - opal_output_verbose(10, orte_notifier_base_framework.framework_output, - "notifier:base:select: Auto-selecting components"); - - /* - * Traverse the list of available components. - * For each call their 'query' functions to see if they are available. - */ - OPAL_LIST_FOREACH(cli, &orte_notifier_base_framework.framework_components, mca_base_component_list_item_t) { - component = (orte_notifier_base_component_t *) cli->cli_component; - - /* - * If there is a query function then use it. - */ - if (NULL == component->base_version.mca_query_component) { - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select Skipping component [%s]. It does not implement a query function", - component->base_version.mca_component_name ); - continue; - } - - /* - * Query this component for the module and priority - */ - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select Querying component [%s]", - component->base_version.mca_component_name); - - component->base_version.mca_query_component(&module, &priority); - - /* - * If no module was returned or negative priority, then skip component - */ - if (NULL == module || priority < 0) { - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select Skipping component [%s]. Query failed to return a module", - component->base_version.mca_component_name ); - continue; - } - bmod = (orte_notifier_base_module_t*)module; - - /* see if it can be init'd */ - if (NULL != bmod->init) { - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:init module called with priority [%s] %d", - component->base_version.mca_component_name, priority); - if (ORTE_SUCCESS != bmod->init()) { - continue; - } - } - /* - * Append them to the list - */ - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select adding component [%s]", - component->base_version.mca_component_name); - tmp_module = OBJ_NEW(orte_notifier_active_module_t); - tmp_module->component = component; - tmp_module->module = (orte_notifier_base_module_t*)module; - - opal_list_append(&orte_notifier_base.modules, (void*)tmp_module); - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/notifier/notifier.h b/orte/mca/notifier/notifier.h deleted file mode 100644 index 8c7eb8529d..0000000000 --- a/orte/mca/notifier/notifier.h +++ /dev/null @@ -1,234 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All Rights Reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The OpenRTE Notifier Framework - * - * The OpenRTE Notifier framework provides a mechanism for notifying - * system administrators or other fault monitoring systems that a - * problem with the underlying cluster has been detected - e.g., a - * failed connection in a network fabric - */ - -#ifndef MCA_NOTIFIER_H -#define MCA_NOTIFIER_H - -/* - * includes - */ - -#include "orte_config.h" - -#include -#include -#ifdef HAVE_SYSLOG_H -#include -#endif - -#include "orte/mca/mca.h" - -#include "orte/constants.h" -#include "orte/types.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/threads.h" - -BEGIN_C_DECLS - -/* make the verbose channel visible here so everyone - * doesn't have to include notifier/base/base.h */ -ORTE_DECLSPEC extern int orte_notifier_debug_output; - -/* The maximum size of any on-stack buffers used in the notifier - * so we can try to avoid calling malloc in OUT_OF_RESOURCES conditions. - * The code has NOT been auditied for use of malloc, so this still - * may fail to get the "OUT_OF_RESOURCE" message out. Oh Well. - */ -#define ORTE_NOTIFIER_MAX_BUF 512 - -/* Severities */ -typedef enum { -#ifdef HAVE_SYSLOG_H - ORTE_NOTIFIER_EMERG = LOG_EMERG, - ORTE_NOTIFIER_ALERT = LOG_ALERT, - ORTE_NOTIFIER_CRIT = LOG_CRIT, - ORTE_NOTIFIER_ERROR = LOG_ERR, - ORTE_NOTIFIER_WARN = LOG_WARNING, - ORTE_NOTIFIER_NOTICE = LOG_NOTICE, - ORTE_NOTIFIER_INFO = LOG_INFO, - ORTE_NOTIFIER_DEBUG = LOG_DEBUG -#else - ORTE_NOTIFIER_EMERG, - ORTE_NOTIFIER_ALERT, - ORTE_NOTIFIER_CRIT, - ORTE_NOTIFIER_ERROR, - ORTE_NOTIFIER_WARN, - ORTE_NOTIFIER_NOTICE, - ORTE_NOTIFIER_INFO, - ORTE_NOTIFIER_DEBUG -#endif -} orte_notifier_severity_t; - -typedef struct { - opal_object_t super; - opal_event_t ev; - orte_job_t *jdata; - orte_job_state_t state; - orte_notifier_severity_t severity; - int errcode; - const char *msg; - const char *action; - time_t t; -} orte_notifier_request_t; -OBJ_CLASS_DECLARATION(orte_notifier_request_t); - -/* - * Component functions - all MUST be provided! - */ - -/* initialize the selected module */ -typedef int (*orte_notifier_base_module_init_fn_t)(void); - -/* finalize the selected module */ -typedef void (*orte_notifier_base_module_finalize_fn_t)(void); - -/* Log an internal error - this will include the job that caused the - * error to occur */ -typedef void (*orte_notifier_base_module_log_fn_t)(orte_notifier_request_t *req); - -/* Report a system event - e.g., a temperature out-of-bound */ -typedef void (*orte_notifier_base_module_event_fn_t)(orte_notifier_request_t *req); - -/* Report a job state */ -typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *req); - - -#define ORTE_NOTIFIER_INTERNAL_ERROR(j, st, s, e, m) \ - do { \ - orte_notifier_request_t *_n; \ - opal_output_verbose(2, orte_notifier_debug_output, \ - "%s notifier:internal:error[%s:%d] " \ - "job %s error %s severity %s", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - ORTE_JOBID_PRINT((NULL == (j)) ? \ - ORTE_JOBID_INVALID : \ - (j)->jobid), \ - ORTE_ERROR_NAME((e)), \ - orte_notifier_base_sev2str(s)); \ - _n = OBJ_NEW(orte_notifier_request_t); \ - _n->jdata = (j); \ - _n->state = (st); \ - _n->severity = (s); \ - _n->errcode = (e); \ - _n->msg = (m); \ - _n->t = time(NULL); \ - _n->action = (NULL); \ - /* add the event */ \ - opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ - OPAL_EV_WRITE, orte_notifier_base_log, (_n)); \ - opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ - ORTE_POST_OBJECT(_n); \ - opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -#define ORTE_NOTIFIER_JOB_STATE(j, st, m) \ - do { \ - orte_notifier_request_t *_n; \ - opal_output_verbose(2, orte_notifier_debug_output, \ - "%s notifier[%s:%d] job %s state %s", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - ORTE_JOBID_PRINT((NULL == (j)) ? \ - ORTE_JOBID_INVALID : \ - (j)->jobid), \ - orte_job_state_to_str(st)); \ - _n = OBJ_NEW(orte_notifier_request_t); \ - _n->jdata = (j); \ - _n->state = (st); \ - _n->msg = (m); \ - _n->t = time(NULL); \ - _n->action = (NULL); \ - /* add the event */ \ - opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ - OPAL_EV_WRITE, orte_notifier_base_report, (_n)); \ - opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ - ORTE_POST_OBJECT(_n); \ - opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -#define ORTE_NOTIFIER_SYSTEM_EVENT(s, m, a) \ - do { \ - orte_notifier_request_t *_n; \ - opal_output_verbose(2, orte_notifier_debug_output, \ - "%s notifier:sys:event[%s:%d] event %s", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - orte_notifier_base_sev2str(s)); \ - _n = OBJ_NEW(orte_notifier_request_t); \ - _n->jdata = (NULL); \ - _n->state = (NULL); \ - _n->jdata = NULL; \ - _n->msg = (m); \ - _n->t = time(NULL); \ - _n->severity = (s); \ - _n->action = (a); \ - /* add the event */ \ - opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ - OPAL_EV_WRITE, orte_notifier_base_event, (_n)); \ - opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ - ORTE_POST_OBJECT(_n); \ - opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -/* - * Ver 1.0 - */ -typedef struct { - orte_notifier_base_module_init_fn_t init; - orte_notifier_base_module_finalize_fn_t finalize; - orte_notifier_base_module_log_fn_t log; - orte_notifier_base_module_event_fn_t event; - orte_notifier_base_module_report_fn_t report; -} orte_notifier_base_module_t; - - -/* - * the standard component data structure - */ -typedef struct { - mca_base_component_t base_version; - mca_base_component_data_t base_data; -} orte_notifier_base_component_t; - - -/* - * Macro for use in components that are of type notifier v1.0.0 - */ -#define ORTE_NOTIFIER_BASE_VERSION_1_0_0 \ - /* notifier v1.0 is chained to MCA v2.0 */ \ - ORTE_MCA_BASE_VERSION_2_1_0("notifier", 1, 0, 0) - -END_C_DECLS - -#endif /* MCA_NOTIFIER_H */ diff --git a/orte/mca/notifier/smtp/.opal_ignore b/orte/mca/notifier/smtp/.opal_ignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/orte/mca/notifier/smtp/Makefile.am b/orte/mca/notifier/smtp/Makefile.am deleted file mode 100644 index 87e978e534..0000000000 --- a/orte/mca/notifier/smtp/Makefile.am +++ /dev/null @@ -1,54 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(notifier_smtp_CPPFLAGS) - -dist_ortedata_DATA = \ - help-orte-notifier-smtp.txt - -sources = \ - notifier_smtp.h \ - notifier_smtp_module.c \ - notifier_smtp_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_notifier_smtp_DSO -component_noinst = -component_install = mca_notifier_smtp.la -else -component_noinst = libmca_notifier_smtp.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_notifier_smtp_la_SOURCES = $(sources) -mca_notifier_smtp_la_LDFLAGS = -module -avoid-version $(notifier_smtp_LDFLAGS) -mca_notifier_smtp_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(notifier_smtp_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_notifier_smtp_la_SOURCES =$(sources) -libmca_notifier_smtp_la_LDFLAGS = -module -avoid-version $(notifier_smtp_LDFLAGS) -libmca_notifier_smtp_la_LIBADD = $(notifier_smtp_LIBS) diff --git a/orte/mca/notifier/smtp/configure.m4 b/orte/mca/notifier/smtp/configure.m4 deleted file mode 100644 index a4a4771050..0000000000 --- a/orte/mca/notifier/smtp/configure.m4 +++ /dev/null @@ -1,39 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_notifier_smtp_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_notifier_smtp_CONFIG], [ - AC_CONFIG_FILES([orte/mca/notifier/smtp/Makefile]) - - AC_CHECK_TYPES( [include/libesmtp.h], - [libesmtp*], - [libesmtp.h], - [esmtp], - [smtp_create_session], - [], - [orte_notifier_want_smtp=1], - [orte_notifier_want_smtp=0]) - - AS_IF([test "$orte_notifier_want_smtp" = 1], - [$1], - [$2]) -])dnl diff --git a/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt b/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt deleted file mode 100644 index 58b06bc81b..0000000000 --- a/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt +++ /dev/null @@ -1,33 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI's SMTP notifier support -# -[to/from not specified] -Error: the Open MPI SMTP notifier component had no "to" and/or "from" -email addresses specified. -# -[server not specified] -Error: the Open MPI SMTP notifier component had no SMTP server name or -IP address specified. -# -[unable to resolve server] -Sorry, Open MPI's SMTP notifier component was unable to resolve the IP -address of the server provided. - - Server: %s -# -[send_email failed] -Oops! Open MPI's SMTP notifier failed to send an email. - - Reason: %s - libESMTP function: %s - libESMTP message: %s - Message: %s -# diff --git a/orte/mca/notifier/smtp/notifier_smtp.h b/orte/mca/notifier/smtp/notifier_smtp.h deleted file mode 100644 index 5673254186..0000000000 --- a/orte/mca/notifier/smtp/notifier_smtp.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef NOTIFIER_SMTP_H -#define NOTIFIER_SMTP_H - -#include "orte_config.h" - -#include - -#include "libesmtp.h" - -#include "orte/mca/notifier/notifier.h" - -BEGIN_C_DECLS - -typedef struct { - orte_notifier_base_component_t super; - - /* libesmtp version */ - char *version; - - /* SMTP server name and port */ - char *server; - int port; - - /* To, From, Subject */ - char *to, **to_argv, *from_name, *from_addr, *subject; - - /* Mail body prefix and suffix */ - char *body_prefix, *body_suffix; - - /* struct hostent from resolved SMTP server name */ - struct hostent *server_hostent; - - /* Priority of this component */ - int priority; -} orte_notifier_smtp_component_t; - - -/* - * Notifier interfaces - */ -ORTE_MODULE_DECLSPEC extern orte_notifier_smtp_component_t - mca_notifier_smtp_component; -extern orte_notifier_base_module_t orte_notifier_smtp_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/notifier/smtp/notifier_smtp_component.c b/orte/mca/notifier/smtp/notifier_smtp_component.c deleted file mode 100644 index ee37eda593..0000000000 --- a/orte/mca/notifier/smtp/notifier_smtp_component.c +++ /dev/null @@ -1,197 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ -*/ - -/* - * Simple smtp notifier (using libesmtp) - */ - -#include "orte_config.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "orte/constants.h" -#include "orte/util/show_help.h" - -#include "notifier_smtp.h" - -static int smtp_component_query(mca_base_module_t **module, int *priority); -static int smtp_close(void); -static int smtp_register(void); - -/* - * Struct of function pointers that need to be initialized - */ -orte_notifier_smtp_component_t mca_notifier_smtp_component = { - { - .base_version = { - ORTE_NOTIFIER_BASE_VERSION_1_0_0, - - .mca_component_name = "smtp", - - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_close_component = smtp_close, - .mca_query_component = smtp_component_query, - .mca_register_component_params = smtp_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - }, -}; - -static int smtp_register(void) -{ - char version[256]; - - /* Server stuff */ - mca_notifier_smtp_component.server = strdup("localhost"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "server", - "SMTP server name or IP address", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.server); - - mca_notifier_smtp_component.port = 25; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "port", - "SMTP server port", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.port); - - /* Email stuff */ - mca_notifier_smtp_component.to = NULL; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "to", - "Comma-delimited list of email addresses to send to", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.to); - mca_notifier_smtp_component.from_addr = NULL; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "from_addr", - "Email address that messages will be from", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.from_addr); - mca_notifier_smtp_component.from_name = strdup("ORTE Notifier"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "from_name", - "Email name that messages will be from", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.from_name); - mca_notifier_smtp_component.subject = strdup("ORTE Notifier"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "subject", - "Email subject", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.subject); - - /* Mail body prefix and suffix */ - mca_notifier_smtp_component.body_prefix = strdup("The ORTE SMTP notifier wishes to inform you of the following message:\n\n"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "body_prefix", - "Text to put at the beginning of the mail message", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.body_prefix); - mca_notifier_smtp_component.body_suffix = strdup("\n\nSincerely,\nOscar the ORTE Owl"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "body_prefix", - "Text to put at the end of the mail message", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.body_suffix); - - /* Priority */ - mca_notifier_smtp_component.priority = 10; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "priority", - "Priority of this component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.priority); - /* Libesmtp version */ - smtp_version(version, sizeof(version), 0); - version[sizeof(version) - 1] = '\0'; - mca_notifier_smtp_component.version = strdup(version); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "libesmtp_version", - "Version of libesmtp that this component is linked against", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.version); - - return ORTE_SUCCESS; -} - -static int smtp_close(void) -{ - return ORTE_SUCCESS; -} - -static int smtp_component_query(mca_base_module_t **module, - int *priority) -{ - *priority = 0; - *module = NULL; - - /* If there's no to or from, there's no love */ - if (NULL == mca_notifier_smtp_component.to || - '\0' == mca_notifier_smtp_component.to[0] || - NULL == mca_notifier_smtp_component.from_addr || - '\0' == mca_notifier_smtp_component.from_addr[0]) { - orte_show_help("help-orte-notifier-smtp.txt", - "to/from not specified", true); - return ORTE_ERR_NOT_FOUND; - } - - /* Sanity checks */ - if (NULL == mca_notifier_smtp_component.server || - '\0' == mca_notifier_smtp_component.server[0]) { - orte_show_help("help-orte-notifier-smtp.txt", - "server not specified", true); - return ORTE_ERR_NOT_FOUND; - } - - /* Since we have to open a socket later, try to resolve the IP - address of the server now. Save the result, or abort if we - can't resolve it. */ - mca_notifier_smtp_component.server_hostent = - gethostbyname(mca_notifier_smtp_component.server); - if (NULL == mca_notifier_smtp_component.server_hostent) { - orte_show_help("help-orte-notifier-smtp.txt", - "unable to resolve server", - true, mca_notifier_smtp_component.server); - return ORTE_ERR_NOT_FOUND; - } - - *priority = 10; - *module = (mca_base_module_t *)&orte_notifier_smtp_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/notifier/smtp/notifier_smtp_module.c b/orte/mca/notifier/smtp/notifier_smtp_module.c deleted file mode 100644 index 666fd08028..0000000000 --- a/orte/mca/notifier/smtp/notifier_smtp_module.c +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * Send an email upon notifier events. - */ - -#include "orte_config.h" - -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/show_help.h" -#include "opal/util/argv.h" - -#include "orte/constants.h" -#include "orte/mca/ess/ess.h" -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/notifier/base/base.h" - -#include "notifier_smtp.h" - - -/* Static API's */ -static void mylog(orte_notifier_base_severity_t severity, int errcode, - const char *msg, va_list ap); - -/* Module */ -orte_notifier_base_module_t orte_notifier_smtp_module = { - .log = mylog -}; - -typedef enum { - SENT_NONE, - SENT_HEADER, - SENT_BODY_PREFIX, - SENT_BODY, - SENT_BODY_SUFFIX, - SENT_ALL -} sent_flag_t; - -typedef struct { - sent_flag_t sent_flag; - char *msg; - char *prev_string; -} message_status_t; - -/* - * Convert lone \n's to \r\n - */ -static char *crnl(char *orig) -{ - int i, j, max, count; - char *str; - return strdup(orig); - - /* Count how much space we need */ - count = max = strlen(orig); - for (i = 0; i < max; ++i) { - if (orig[i] == '\n' && i > 0 && orig[i - 1] != '\r') { - ++count; - } - } - - /* Copy, changing \n to \r\n */ - str = malloc(count + 1); - for (j = i = 0; i < max; ++i) { - if (orig[i] == '\n' && i > 0 && orig[i - 1] != '\r') { - str[j++] = '\n'; - } - str[j++] = orig[i]; - } - str[j] = '\0'; - return str; -} - -/* - * Callback function invoked via smtp_start_session() - */ -static const char *message_cb(void **buf, int *len, void *arg) -{ - message_status_t *ms = (message_status_t*) arg; - - if (NULL == *buf) { - *buf = malloc(8192); - } - if (NULL == len) { - ms->sent_flag = SENT_NONE; - return NULL; - } - - /* Free the previous string */ - if (NULL != ms->prev_string) { - free(ms->prev_string); - ms->prev_string = NULL; - } - - switch (ms->sent_flag) { - case SENT_NONE: - /* Send a blank line to signify the end of the header */ - ms->sent_flag = SENT_HEADER; - ms->prev_string = NULL; - *len = 2; - return "\r\n"; - - case SENT_HEADER: - if (NULL != mca_notifier_smtp_component.body_prefix) { - ms->sent_flag = SENT_BODY_PREFIX; - ms->prev_string = crnl(mca_notifier_smtp_component.body_prefix); - *len = strlen(ms->prev_string); - return ms->prev_string; - } - - case SENT_BODY_PREFIX: - ms->sent_flag = SENT_BODY; - ms->prev_string = crnl(ms->msg); - *len = strlen(ms->prev_string); - return ms->prev_string; - - case SENT_BODY: - if (NULL != mca_notifier_smtp_component.body_suffix) { - ms->sent_flag = SENT_BODY_SUFFIX; - ms->prev_string = crnl(mca_notifier_smtp_component.body_suffix); - *len = strlen(ms->prev_string); - return ms->prev_string; - } - - case SENT_BODY_SUFFIX: - case SENT_ALL: - default: - ms->sent_flag = SENT_ALL; - *len = 0; - return NULL; - } -} - -/* - * Back-end function to actually send the email - */ -static int send_email(char *msg) -{ - int i, err = ORTE_SUCCESS; - char *str = NULL; - char *errmsg = NULL; - struct sigaction sig, oldsig; - bool set_oldsig = false; - smtp_session_t session = NULL; - smtp_message_t message = NULL; - message_status_t ms; - orte_notifier_smtp_component_t *c = &mca_notifier_smtp_component; - - if (NULL == c->to_argv) { - c->to_argv = opal_argv_split(c->to, ','); - if (NULL == c->to_argv || - NULL == c->to_argv[0]) { - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - - ms.sent_flag = SENT_NONE; - ms.prev_string = NULL; - ms.msg = msg; - - /* Temporarily disable SIGPIPE so that if remote servers timeout - or hang up on us, it doesn't kill this application. We'll - restore the original SIGPIPE handler when we're done. */ - sig.sa_handler = SIG_IGN; - sigemptyset(&sig.sa_mask); - sig.sa_flags = 0; - sigaction(SIGPIPE, &sig, &oldsig); - set_oldsig = true; - - /* Try to get a libesmtp session. If so, assume that libesmtp is - happy and proceeed */ - session = smtp_create_session(); - if (NULL == session) { - err = ORTE_ERR_NOT_SUPPORTED; - errmsg = "stmp_create_session"; - goto error; - } - - /* Create the message */ - message = smtp_add_message(session); - if (NULL == message) { - err = ORTE_ERROR; - errmsg = "stmp_add_message"; - goto error; - } - - /* Set the SMTP server (yes, it's a weird return status!) */ - asprintf(&str, "%s:%d", c->server, c->port); - if (0 == smtp_set_server(session, str)) { - err = ORTE_ERROR; - errmsg = "stmp_set_server"; - goto error; - } - free(str); - str = NULL; - - /* Add the sender */ - if (0 == smtp_set_reverse_path(message, c->from_addr)) { - err = ORTE_ERROR; - errmsg = "stmp_set_reverse_path"; - goto error; - } - - /* Set the subject and some headers */ - asprintf(&str, "Open MPI SMTP Notifier v%d.%d.%d", - c->super.base_version.mca_component_major_version, - c->super.base_version.mca_component_minor_version, - c->super.base_version.mca_component_release_version); - if (0 == smtp_set_header(message, "Subject", c->subject) || - 0 == smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1) || - 0 == smtp_set_header(message, "To", NULL, NULL) || - 0 == smtp_set_header(message, "From", - (NULL != c->from_name ? - c->from_name : c->from_addr), - c->from_addr) || - 0 == smtp_set_header(message, "X-Mailer", str) || - 0 == smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1)) { - err = ORTE_ERROR; - errmsg = "smtp_set_header"; - goto error; - } - free(str); - str = NULL; - - /* Add the recipients */ - for (i = 0; NULL != c->to_argv[i]; ++i) { - if (NULL == smtp_add_recipient(message, c->to_argv[i])) { - err = ORTE_ERR_OUT_OF_RESOURCE; - errmsg = "stmp_add_recipient"; - goto error; - } - } - - /* Set the callback to get the message */ - if (0 == smtp_set_messagecb(message, message_cb, &ms)) { - err = ORTE_ERROR; - errmsg = "smtp_set_messagecb"; - goto error; - } - - /* Send it! */ - if (0 == smtp_start_session(session)) { - err = ORTE_ERROR; - errmsg = "smtp_start_session"; - goto error; - } - - /* Fall through */ - - error: - if (NULL != str) { - free(str); - } - if (NULL != session) { - smtp_destroy_session(session); - } - /* Restore the SIGPIPE handler */ - if (set_oldsig) { - sigaction(SIGPIPE, &oldsig, NULL); - } - if (ORTE_SUCCESS != err) { - int e; - char em[256]; - - e = smtp_errno(); - smtp_strerror(e, em, sizeof(em)); - orte_show_help("help-orte-notifier-smtp.txt", - "send_email failed", - true, "libesmtp library call failed", - errmsg, em, e, msg); - } - return err; -} - -static void mylog(orte_notifier_base_severity_t severity, int errcode, - const char *msg, va_list ap) -{ - char *output; - - /* If there was a message, output it */ - vasprintf(&output, msg, ap); - - if (NULL != output) { - send_email(output); - free(output); - } -} diff --git a/orte/mca/notifier/syslog/Makefile.am b/orte/mca/notifier/syslog/Makefile.am deleted file mode 100644 index b4f57089f7..0000000000 --- a/orte/mca/notifier/syslog/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - notifier_syslog.h \ - notifier_syslog_module.c \ - notifier_syslog_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_notifier_syslog_DSO -component_noinst = -component_install = mca_notifier_syslog.la -else -component_noinst = libmca_notifier_syslog.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_notifier_syslog_la_SOURCES = $(sources) -mca_notifier_syslog_la_LDFLAGS = -module -avoid-version -mca_notifier_syslog_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_notifier_syslog_la_SOURCES =$(sources) -libmca_notifier_syslog_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/notifier/syslog/configure.m4 b/orte/mca/notifier/syslog/configure.m4 deleted file mode 100644 index d9e54bb942..0000000000 --- a/orte/mca/notifier/syslog/configure.m4 +++ /dev/null @@ -1,31 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2017 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_notifier_syslog_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_notifier_syslog_CONFIG], [ - AC_CONFIG_FILES([orte/mca/notifier/syslog/Makefile]) - - OPAL_VAR_SCOPE_PUSH([orte_notifier_syslog_happy]) - - # Per https://github.com/open-mpi/ompi/issues/4373 and - # https://github.com/open-mpi/ompi/pull/4374, we need to check - # that syslog.h is compilable. If syslog.h is not compilable, - # disable this component. - AC_CHECK_HEADER([syslog.h], - [orte_notifier_syslog_happy=1], - [orte_notifier_syslog_happy=0]) - - AS_IF([test $orte_notifier_syslog_happy -eq 1], - [$1], - [$2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/orte/mca/notifier/syslog/notifier_syslog.h b/orte/mca/notifier/syslog/notifier_syslog.h deleted file mode 100644 index a78bb915b7..0000000000 --- a/orte/mca/notifier/syslog/notifier_syslog.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef NOTIFIER_SYSLOG_H -#define NOTIFIER_SYSLOG_H - -#include "orte_config.h" - -#include "orte/mca/notifier/notifier.h" - -BEGIN_C_DECLS - -/* - * Notifier interfaces - */ - -ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_syslog_component; -extern orte_notifier_base_module_t orte_notifier_syslog_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/notifier/syslog/notifier_syslog_component.c b/orte/mca/notifier/syslog/notifier_syslog_component.c deleted file mode 100644 index 9908533816..0000000000 --- a/orte/mca/notifier/syslog/notifier_syslog_component.c +++ /dev/null @@ -1,60 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ -*/ - -/* - * includes - */ -#include "orte_config.h" -#include "orte/constants.h" - -#include "notifier_syslog.h" - - -static int orte_notifier_syslog_component_query(mca_base_module_t **module, - int *priority); - -/* - * Struct of function pointers that need to be initialized - */ -orte_notifier_base_component_t mca_notifier_syslog_component = { - .base_version = { - ORTE_NOTIFIER_BASE_VERSION_1_0_0, - - .mca_component_name = "syslog", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = orte_notifier_syslog_component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int orte_notifier_syslog_component_query(mca_base_module_t **module, - int *priority) -{ - *priority = 1; - *module = (mca_base_module_t *)&orte_notifier_syslog_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/notifier/syslog/notifier_syslog_module.c b/orte/mca/notifier/syslog/notifier_syslog_module.c deleted file mode 100644 index d488ca392f..0000000000 --- a/orte/mca/notifier/syslog/notifier_syslog_module.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ -#ifdef HAVE_SYSLOG_H -#include -#endif -#include - -#include "opal/util/show_help.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/notifier/base/base.h" -#include "notifier_syslog.h" - - -/* Static API's */ -static int init(void); -static void finalize(void); -static void mylog(orte_notifier_request_t *req); -static void myevent(orte_notifier_request_t *req); -static void myreport(orte_notifier_request_t *req); - -/* Module def */ -orte_notifier_base_module_t orte_notifier_syslog_module = { - .init = init, - .finalize = finalize, - .log = mylog, - .event = myevent, - .report = myreport -}; - - -static int init(void) -{ - int opts; - - opts = LOG_CONS | LOG_PID; - openlog("OpenRTE Error Report:", opts, LOG_USER); - - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - closelog(); -} - -static void mylog(orte_notifier_request_t *req) -{ - char tod[48]; - - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:syslog:mylog function called with severity %d errcode %d and messg %s", - (int)req->severity, req->errcode, req->msg); - /* If there was a message, output it */ - (void)ctime_r(&req->t, tod); - /* trim the newline */ - tod[strlen(tod)] = '\0'; - - syslog(req->severity, "[%s]%s %s: JOBID %s REPORTS ERROR %s: %s", tod, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_notifier_base_sev2str(req->severity), - ORTE_JOBID_PRINT((NULL == req->jdata) ? - ORTE_JOBID_INVALID : req->jdata->jobid), - orte_job_state_to_str(req->state), - (NULL == req->msg) ? "" : req->msg); -} - -static void myevent(orte_notifier_request_t *req) -{ - char tod[48]; - - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:syslog:myevent function called with severity %d and messg %s", - (int)req->severity, req->msg); - /* If there was a message, output it */ - (void)ctime_r(&req->t, tod); - /* trim the newline */ - tod[strlen(tod)] = '\0'; - - syslog(req->severity, "[%s]%s %s SYSTEM EVENT : %s", tod, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_notifier_base_sev2str(req->severity), - (NULL == req->msg) ? "" : req->msg); -} - -static void myreport(orte_notifier_request_t *req) -{ - char tod[48]; - - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:syslog:myreport function called with severity %d state %s and messg %s", - (int)req->severity, orte_job_state_to_str(req->state), - req->msg); - /* If there was a message, output it */ - (void)ctime_r(&req->t, tod); - /* trim the newline */ - tod[strlen(tod)] = '\0'; - - syslog(req->severity, "[%s]%s JOBID %s REPORTS STATE %s: %s", tod, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT((NULL == req->jdata) ? - ORTE_JOBID_INVALID : req->jdata->jobid), - orte_job_state_to_str(req->state), - (NULL == req->msg) ? "" : req->msg); -} diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index b2cf785712..60f1a033e1 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -75,7 +75,6 @@ #include "orte/mca/schizo/schizo.h" #include "orte/mca/state/state.h" #include "orte/mca/filem/filem.h" -#include "orte/mca/dfs/dfs.h" #include "orte/util/context_fns.h" #include "orte/util/name_fns.h" @@ -466,13 +465,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, return ORTE_SUCCESS; } -static void fm_release(void *cbdata) -{ - opal_buffer_t *bptr = (opal_buffer_t*)cbdata; - - OBJ_RELEASE(bptr); -} - static void ls_cbunc(int status, void *cbdata) { opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; @@ -831,15 +823,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, lock.active = false; // we won't get a callback } - /* if we have a file map, then we need to load it */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FILE_MAPS, (void**)&bptr, OPAL_BUFFER)) { - if (NULL != orte_dfs.load_file_maps) { - orte_dfs.load_file_maps(jdata->jobid, bptr, fm_release, bptr); - } else { - OBJ_RELEASE(bptr); - } - } - /* load any controls into the job */ orte_rtc.assign(jdata); diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 85aba0a0f3..7f80b147ae 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -81,7 +81,6 @@ #include "opal/class/opal_pointer_array.h" #include "opal/dss/dss.h" -#include "orte/mca/dfs/dfs.h" #include "orte/mca/odls/odls.h" #include "orte/mca/rml/rml.h" #include "orte/mca/state/state.h" diff --git a/orte/util/attr.c b/orte/util/attr.c index 9e8716f092..87047db7f5 100644 --- a/orte/util/attr.c +++ b/orte/util/attr.c @@ -269,8 +269,6 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) return "JOB-LAUNCH-MSG-RECVD"; case ORTE_JOB_MAX_LAUNCH_MSG_RECVD: return "JOB-MAX-LAUNCH-MSG-RECVD"; - case ORTE_JOB_FILE_MAPS: - return "JOB-FILE-MAPS"; case ORTE_JOB_CKPT_STATE: return "JOB-CKPT-STATE"; case ORTE_JOB_SNAPSHOT_REF: diff --git a/orte/util/attr.h b/orte/util/attr.h index 73bb21192a..621b577f04 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -101,7 +101,6 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_LAUNCH_MSG_SENT (ORTE_JOB_START_KEY + 1) // timeval - time launch message was sent #define ORTE_JOB_LAUNCH_MSG_RECVD (ORTE_JOB_START_KEY + 2) // timeval - time launch message was recvd #define ORTE_JOB_MAX_LAUNCH_MSG_RECVD (ORTE_JOB_START_KEY + 3) // timeval - max time for launch msg to be received -#define ORTE_JOB_FILE_MAPS (ORTE_JOB_START_KEY + 4) // opal_buffer_t - file maps associates with this job #define ORTE_JOB_CKPT_STATE (ORTE_JOB_START_KEY + 5) // size_t - ckpt state #define ORTE_JOB_SNAPSHOT_REF (ORTE_JOB_START_KEY + 6) // string - snapshot reference #define ORTE_JOB_SNAPSHOT_LOC (ORTE_JOB_START_KEY + 7) // string - snapshot location