diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index f05e53b6cd..f8fd172dbe 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -94,7 +94,9 @@ enum { OPAL_ERR_PROC_RESTART = (OPAL_ERR_BASE - 63), OPAL_ERR_PROC_CHECKPOINT = (OPAL_ERR_BASE - 64), OPAL_ERR_PROC_MIGRATE = (OPAL_ERR_BASE - 65), - OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66) + OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66), + OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67), + OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/pmix/ext2x/pmix2x.c b/opal/mca/pmix/ext2x/pmix2x.c index bb6d37d524..253276fca6 100644 --- a/opal/mca/pmix/ext2x/pmix2x.c +++ b/opal/mca/pmix/ext2x/pmix2x.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -352,7 +352,7 @@ static void _event_hdlr(int sd, short args, void *cbdata) if (NULL != chain->final_cbfunc) { chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata); } - + OBJ_RELEASE(chain); return; diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix.h b/opal/mca/pmix/pmix2x/pmix/include/pmix.h index df43e348ba..cf89a160a3 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix.h @@ -473,6 +473,59 @@ pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t directive, pmix_info_t *info, size_t ninfo, pmix_info_cbfunc_t cbfunc, void *cbdata); +/* Request a job control action. The targets array identifies the + * processes to which the requested job control action is to be applied. + * A NULL value can be used to indicate all processes in the caller's + * nspace. The use of PMIX_RANK_WILDARD can also be used to indicate + * that all processes in the given nspace are to be included. + * + * The directives are provided as pmix_info_t structs in the directives + * array. The callback function provides a status to indicate whether or + * not the request was granted, and to provide some information as to + * the reason for any denial in the pmix_info_cbfunc_t array of pmix_info_t + * structures. If non-NULL, then the specified release_fn must be called + * when the callback function completes - this will be used to release + * any provided pmix_info_t array. + */ +pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* Request that something be monitored - e.g., that the server monitor + * this process for periodic heartbeats as an indication that the process + * has not become "wedged". When a monitor detects the specified alarm + * condition, it will generate an event notification using the provided + * error code and passing along any available relevant information. It is + * up to the caller to register a corresponding event handler. + * + * Params: + * + * monitor: attribute indicating the type of monitor being requested - e.g., + * PMIX_MONITOR_FILE to indicate that the requestor is asking that + * a file be monitored. + * + * error: the status code to be used when generating an event notification + * alerting that the monitor has been triggered. The range of the + * notification defaults to PMIX_RANGE_NAMESPACE - this can be + * changed by providing a PMIX_RANGE directive + * + * directives: characterize the monitoring request (e.g., monitor file size) + * and frequency of checking to be done + * + * cbfunc: provides a status to indicate whether or not the request was granted, + * and to provide some information as to the reason for any denial in + * the pmix_info_cbfunc_t array of pmix_info_t structures. + * + * Note: a process can send a heartbeat to the server using the PMIx_Heartbeat + * macro provided below*/ +pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* define a special macro to simplify sending of a heartbeat */ +#define PMIx_Heartbeat() \ + PMIx_Process_monitor_nb(PMIX_SEND_HEARTBEAT, NULL, 0, NULL, NULL) + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 82c4ce59cc..bab05ee155 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -123,6 +123,8 @@ typedef uint32_t pmix_rank_t; // a local system-level PMIx server #define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first #define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data +#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server + /* identification attributes */ #define PMIX_USERID "pmix.euid" // (uint32_t) effective user id @@ -218,8 +220,9 @@ typedef uint32_t pmix_rank_t; #define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective #define PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory #define PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job -#define PMIX_RANGE "pmix.range" // (int) pmix_data_range_t value for calls to publish/lookup/unpublish -#define PMIX_PERSISTENCE "pmix.persist" // (int) pmix_persistence_t value for calls to publish +#define PMIX_RANGE "pmix.range" // (pmix_data_range_t) value for calls to publish/lookup/unpublish or for + // monitoring event notifications +#define PMIX_PERSISTENCE "pmix.persist" // (pmix_persistence_t) value for calls to publish #define PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do // not request data from the server if not found #define PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the @@ -259,66 +262,72 @@ typedef uint32_t pmix_rank_t; #define PMIX_EVENT_ACTION_TIMEOUT "pmix.evtimeout" // (int) time in sec before RM will execute error response /* attributes used to describe "spawn" attributes */ -#define PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use -#define PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs -#define PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs -#define PMIX_ADD_HOST "pmix.addhost" // (char*) comma-delimited list of hosts to add to allocation -#define PMIX_ADD_HOSTFILE "pmix.addhostfile" // (char*) hostfile to add to existing allocation -#define PMIX_PREFIX "pmix.prefix" // (char*) prefix to use for starting spawned procs -#define PMIX_WDIR "pmix.wdir" // (char*) working directory for spawned procs -#define PMIX_MAPPER "pmix.mapper" // (char*) mapper to use for placing spawned procs -#define PMIX_DISPLAY_MAP "pmix.dispmap" // (bool) display process map upon spawn -#define PMIX_PPR "pmix.ppr" // (char*) #procs to spawn on each identified resource -#define PMIX_MAPBY "pmix.mapby" // (char*) mapping policy -#define PMIX_RANKBY "pmix.rankby" // (char*) ranking policy -#define PMIX_BINDTO "pmix.bindto" // (char*) binding policy -#define PMIX_PRELOAD_BIN "pmix.preloadbin" // (bool) preload binaries -#define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position -#define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init -#define PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin -#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc -#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me -#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me -#define PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons -#define PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected - // job - i.e., not part of the "comm_world" of the job +#define PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use +#define PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs +#define PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs +#define PMIX_ADD_HOST "pmix.addhost" // (char*) comma-delimited list of hosts to add to allocation +#define PMIX_ADD_HOSTFILE "pmix.addhostfile" // (char*) hostfile to add to existing allocation +#define PMIX_PREFIX "pmix.prefix" // (char*) prefix to use for starting spawned procs +#define PMIX_WDIR "pmix.wdir" // (char*) working directory for spawned procs +#define PMIX_MAPPER "pmix.mapper" // (char*) mapper to use for placing spawned procs +#define PMIX_DISPLAY_MAP "pmix.dispmap" // (bool) display process map upon spawn +#define PMIX_PPR "pmix.ppr" // (char*) #procs to spawn on each identified resource +#define PMIX_MAPBY "pmix.mapby" // (char*) mapping policy +#define PMIX_RANKBY "pmix.rankby" // (char*) ranking policy +#define PMIX_BINDTO "pmix.bindto" // (char*) binding policy +#define PMIX_PRELOAD_BIN "pmix.preloadbin" // (bool) preload binaries +#define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position +#define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init +#define PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin +#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc +#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me +#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me +#define PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons +#define PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected + // job - i.e., not part of the "comm_world" of the job /* query attributes */ -#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces -#define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job -#define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues -#define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue -#define PMIX_QUERY_PROC_TABLE "pmix.qry.ptable" // (char*) input nspace of job whose info is being requested - // returns (pmix_data_array_t) an array of pmix_proc_info_t -#define PMIX_QUERY_LOCAL_PROC_TABLE "pmix.qry.lptable" // (char*) input nspace of job whose info is being requested - // returns (pmix_data_array_t) an array of pmix_proc_info_t for - // procs in job on same node -#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform -#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes -#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes -#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers -#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only -#define PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // report average values -#define PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // report minimum and maximum value -#define PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status - // is being requested +#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces +#define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job +#define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues +#define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue +#define PMIX_QUERY_PROC_TABLE "pmix.qry.ptable" // (char*) input nspace of job whose info is being requested + // returns (pmix_data_array_t) an array of pmix_proc_info_t +#define PMIX_QUERY_LOCAL_PROC_TABLE "pmix.qry.lptable" // (char*) input nspace of job whose info is being requested + // returns (pmix_data_array_t) an array of pmix_proc_info_t for + // procs in job on same node +#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // (bool) return operations tool is authorized to perform +#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // (bool) return a comma-delimited list of supported spawn attributes +#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // (bool) return a comma-delimited list of supported debug attributes +#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // (bool) return info on memory usage for the procs indicated in the qualifiers +#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // (bool) constrain the query to local information only +#define PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // (bool) report average values +#define PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // (bool) report minimum and maximum value +#define PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status + // is being requested +#define PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation + // for the specified nspace /* log attributes */ -#define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr -#define PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout -#define PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless -#define PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr +#define PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout +#define PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless +#define PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define PMIX_LOG_EMAIL "pmix.log.email" // (pmix_data_array_t) log via email based on pmix_info_t containing directives +#define PMIX_LOG_EMAIL_ADDR "pmix.log.emaddr" // (char*) comma-delimited list of email addresses that are to recv msg +#define PMIX_LOG_EMAIL_SUBJECT "pmix.log.emsub" // (char*) subject line for email +#define PMIX_LOG_EMAIL_MSG "pmix.log.emmsg" // (char*) msg to be included in email /* debugger attributes */ -#define PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start -#define PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init -#define PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification -#define PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are -#define PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release +#define PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start +#define PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init +#define PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification +#define PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are +#define PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release /* Resource Manager identification */ -#define PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager -#define PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string +#define PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager +#define PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string /* attributes for setting envars */ #define PMIX_SET_ENVAR "pmix.set.envar" // (char*) string "key=value" value shall be put into the environment @@ -327,7 +336,6 @@ typedef uint32_t pmix_rank_t; /* attributes relating to allocations */ #define PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request // which can later be used to query status of the request -#define PMIX_TIME_REMAINING "pmix.time.remaining" // (uint32_t) get number of seconds remaining in allocation #define PMIX_ALLOC_NUM_NODES "pmix.alloc.nnodes" // (uint64_t) number of nodes #define PMIX_ALLOC_NODE_LIST "pmix.alloc.nlist" // (char*) regex of specific nodes #define PMIX_ALLOC_NUM_CPUS "pmix.alloc.ncpus" // (uint64_t) number of cpus @@ -343,6 +351,38 @@ typedef uint32_t pmix_rank_t; #define PMIX_ALLOC_NETWORK_QOS "pmix.alloc.netqos" // (char*) quality of service level #define PMIX_ALLOC_TIME "pmix.alloc.time" // (uint32_t) time in seconds +/* job control attributes */ +#define PMIX_JOB_CTRL_ID "pmix.jctrl.id" // (char*) provide a string identifier for this request +#define PMIX_JOB_CTRL_PAUSE "pmix.jctrl.pause" // (bool) pause the specified processes +#define PMIX_JOB_CTRL_RESUME "pmix.jctrl.resume" // (bool) "un-pause" the specified processes +#define PMIX_JOB_CTRL_CANCEL "pmix.jctrl.cancel" // (char*) cancel the specified request + // (NULL => cancel all requests from this requestor) +#define PMIX_JOB_CTRL_KILL "pmix.jctrl.kill" // (bool) forcibly terminate the specified processes and cleanup +#define PMIX_JOB_CTRL_RESTART "pmix.jctrl.restart" // (char*) restart the specified processes using the given checkpoint ID +#define PMIX_JOB_CTRL_CHECKPOINT "pmix.jctrl.ckpt" // (char*) checkpoint the specified processes and assign the given ID to it +#define PMIX_JOB_CTRL_CHECKPOINT_EVENT "pmix.jctrl.ckptev" // (bool) use event notification to trigger process checkpoint +#define PMIX_JOB_CTRL_CHECKPOINT_SIGNAL "pmix.jctrl.ckptsig" // (int) use the given signal to trigger process checkpoint +#define PMIX_JOB_CTRL_CHECKPOINT_TIMEOUT "pmix.jctrl.ckptsig" // (int) time in seconds to wait for checkpoint to complete +#define PMIX_JOB_CTRL_SIGNAL "pmix.jctrl.sig" // (int) send given signal to specified processes +#define PMIX_JOB_CTRL_PROVISION "pmix.jctrl.pvn" // (char*) regex identifying nodes that are to be provisioned +#define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned +#define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted + +/* monitoring attributes */ +#define PMIX_MONITOR_HEARTBEAT "pmix.monitor.mbeat" // (void) register to have the server monitor the requestor for heartbeats +#define PMIX_SEND_HEARTBEAT "pmix.monitor.beat" // (void) send heartbeat to local server +#define PMIX_MONITOR_HEARTBEAT_TIME "pmix.monitor.btime" // (uint32_t) time in seconds before declaring heartbeat missed +#define PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before taking + // specified action +#define PMIX_MONITOR_FILE "pmix.monitor.fmon" // (char*) register to monitor file for signs of life +#define PMIX_MONITOR_FILE_SIZE "pmix.monitor.fsize" // (bool) monitor size of given file is growing to determine app is running +#define PMIX_MONITOR_FILE_ACCESS "pmix.monitor.faccess" // (char*) monitor time since last access of given file to determine app is running +#define PMIX_MONITOR_FILE_MODIFY "pmix.monitor.fmod" // (char*) monitor time since last modified of given file to determine app is running +#define PMIX_MONITOR_FILE_CHECK_TIME "pmix.monitor.ftime" // (uint32_t) time in seconds between checking file +#define PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before taking + // specified action + + /**** PROCESS STATE DEFINITIONS ****/ typedef uint8_t pmix_proc_state_t; #define PMIX_PROC_STATE_UNDEF 0 /* undefined process state */ @@ -455,7 +495,14 @@ typedef int pmix_status_t; #define PMIX_ERR_LOST_CONNECTION_TO_CLIENT (PMIX_ERR_V2X_BASE - 3) /* used by the query system */ #define PMIX_QUERY_PARTIAL_SUCCESS (PMIX_ERR_V2X_BASE - 4) +/* request responses */ #define PMIX_NOTIFY_ALLOC_COMPLETE (PMIX_ERR_V2X_BASE - 5) +/* job control */ +#define PMIX_JCTRL_CHECKPOINT (PMIX_ERR_V2X_BASE - 6) +#define PMIX_JCTRL_PREEMPT_ALERT (PMIX_ERR_V2X_BASE - 7) +/* monitoring */ +#define PMIX_MONITOR_HEARTBEAT_ALERT (PMIX_ERR_V2X_BASE - 8) +#define PMIX_MONITOR_FILE_ALERT (PMIX_ERR_V2X_BASE - 9) /* define a starting point for operational error constants so * we avoid renumbering when making additions */ diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h index 531bc173d9..9f53dd1831 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h @@ -328,6 +328,17 @@ typedef pmix_status_t (*pmix_server_alloc_fn_t)(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, pmix_info_cbfunc_t cbfunc, void *cbdata); +/* Execute a job control action on behalf of a client */ +typedef pmix_status_t (*pmix_server_job_control_fn_t)(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* Request that a client be monitored for activity */ +typedef pmix_status_t (*pmix_server_monitor_fn_t)(const pmix_proc_t *requestor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + typedef struct pmix_server_module_2_0_0_t { /* v1x interfaces */ pmix_server_client_connected_fn_t client_connected; @@ -350,12 +361,14 @@ typedef struct pmix_server_module_2_0_0_t { pmix_server_tool_connection_fn_t tool_connected; pmix_server_log_fn_t log; pmix_server_alloc_fn_t allocate; + pmix_server_job_control_fn_t job_control; + pmix_server_monitor_fn_t monitor; } pmix_server_module_t; /**** SERVER SUPPORT INIT/FINALIZE FUNCTIONS ****/ /* Initialize the server support library, and provide a - * pointer to a pmix_server_module_t structure + * pointer to a pmix_server_module_t structure * containing the caller's callback functions. The * array of pmix_info_t structs is used to pass * additional info that may be required by the server diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include index 4f29509b0f..6a566f58a4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include @@ -1,6 +1,6 @@ # -*- makefile -*- # -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # @@ -13,4 +13,5 @@ sources += \ common/pmix_query.c \ common/pmix_strings.c \ common/pmix_log.c \ - common/pmix_jobdata.c + common/pmix_jobdata.c \ + common/pmix_control.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c new file mode 100644 index 0000000000..9b3e6c59b0 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c @@ -0,0 +1,269 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include <src/include/pmix_config.h> + +#include <src/include/types.h> +#include <src/include/pmix_stdint.h> +#include <src/include/pmix_socket_errno.h> + +#include <pmix.h> +#include <pmix_common.h> +#include <pmix_server.h> +#include <pmix_rename.h> + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/ptl/ptl.h" + +#include "src/client/pmix_client_ops.h" +#include "src/server/pmix_server_ops.h" +#include "src/include/pmix_globals.h" + +static void relcbfunc(void *cbdata) +{ + pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:query release callback"); + + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + PMIX_RELEASE(cd); +} +static void query_cbfunc(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata; + pmix_status_t rc; + pmix_shift_caddy_t *results; + int cnt; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:query cback from server"); + + results = PMIX_NEW(pmix_shift_caddy_t); + + /* unpack the status */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->status, &cnt, PMIX_STATUS))) { + PMIX_ERROR_LOG(rc); + goto complete; + } + if (PMIX_SUCCESS != results->status) { + goto complete; + } + + /* unpack any returned data */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto complete; + } + if (0 < results->ninfo) { + PMIX_INFO_CREATE(results->info, results->ninfo); + cnt = results->ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, results->info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto complete; + } + } + + complete: + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:query cback from server releasing"); + /* release the caller */ + if (NULL != cd->cbfunc) { + cd->cbfunc(results->status, results->info, results->ninfo, cd->cbdata, relcbfunc, results); + } + PMIX_RELEASE(cd); +} + +PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix_buffer_t *msg; + pmix_cmd_t cmd = PMIX_JOB_CONTROL_CMD; + pmix_status_t rc; + pmix_query_caddy_t *cb; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: job control called"); + + if (pmix_globals.init_cntr <= 0) { + return PMIX_ERR_INIT; + } + + /* if we are the server, then we just issue the request and + * return the response */ + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (NULL == pmix_host_server.job_control) { + /* nothing we can do */ + return PMIX_ERR_NOT_SUPPORTED; + } + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:job_control handed to RM"); + rc = pmix_host_server.job_control(&pmix_globals.myid, + targets, ntargets, + directives, ndirs, + cbfunc, cbdata); + return rc; + } + + /* if we are a client, then relay this request to the server */ + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + return PMIX_ERR_UNREACH; + } + + msg = PMIX_NEW(pmix_buffer_t); + /* pack the cmd */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + + /* pack the number of targets */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ntargets, 1, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + /* remember, the targets can be NULL to indicate that the operation + * is to be done against all members of our nspace */ + if (0 < ntargets) { + /* pack the targets */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, targets, ntargets, PMIX_PROC))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + + /* pack the directives */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirs, 1, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + if (0 < ndirs) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirs, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + + /* create a callback object as we need to pass it to the + * recv routine so we know which callback to use when + * the return message is recvd */ + cb = PMIX_NEW(pmix_query_caddy_t); + cb->cbfunc = cbfunc; + cb->cbdata = cbdata; + + /* push the message into our event base to send to the server */ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + PMIX_RELEASE(msg); + PMIX_RELEASE(cb); + } + + return rc; +} + +PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix_buffer_t *msg; + pmix_cmd_t cmd = PMIX_MONITOR_CMD; + pmix_status_t rc; + pmix_query_caddy_t *cb; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: monitor called"); + + if (pmix_globals.init_cntr <= 0) { + return PMIX_ERR_INIT; + } + + /* if we are the server, then we just issue the request and + * return the response */ + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (NULL == pmix_host_server.monitor) { + /* nothing we can do */ + return PMIX_ERR_NOT_SUPPORTED; + } + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:monitor handed to RM"); + rc = pmix_host_server.monitor(&pmix_globals.myid, error, + directives, ndirs, cbfunc, cbdata); + return rc; + } + + /* if we are a client, then relay this request to the server */ + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + return PMIX_ERR_UNREACH; + } + + msg = PMIX_NEW(pmix_buffer_t); + /* pack the cmd */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + + /* pack the error */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &error, 1, PMIX_STATUS))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + + /* pack the directives */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirs, 1, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + if (0 < ndirs) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirs, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + + /* create a callback object as we need to pass it to the + * recv routine so we know which callback to use when + * the return message is recvd */ + cb = PMIX_NEW(pmix_query_caddy_t); + cb->cbfunc = cbfunc; + cb->cbdata = cbdata; + + /* push the message into our event base to send to the server */ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + PMIX_RELEASE(msg); + PMIX_RELEASE(cb); + } + + return rc; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index 8cddeb5d44..bdfb143c9a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -257,6 +257,8 @@ static void qcon(pmix_query_caddy_t *p) { p->queries = NULL; p->nqueries = 0; + p->targets = NULL; + p->ntargets = 0; p->info = NULL; p->ninfo = 0; p->cbfunc = NULL; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 85560390d6..1333cb24f1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -72,7 +72,9 @@ typedef enum { PMIX_DEREGEVENTS_CMD, PMIX_QUERY_CMD, PMIX_LOG_CMD, - PMIX_ALLOC_CMD + PMIX_ALLOC_CMD, + PMIX_JOB_CONTROL_CMD, + PMIX_MONITOR_CMD } pmix_cmd_t; /* provide a "pretty-print" function for cmds */ @@ -214,6 +216,8 @@ typedef struct { pmix_status_t status; pmix_query_t *queries; size_t nqueries; + pmix_proc_t *targets; + size_t ntargets; pmix_info_t *info; size_t ninfo; pmix_info_cbfunc_t cbfunc; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/types.h b/opal/mca/pmix/pmix2x/pmix/src/include/types.h index d46df75ec8..7c073ccf4f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/types.h @@ -256,4 +256,13 @@ typedef struct event pmix_event_t; #define pmix_event_active(x, y, z) event_active((x), (y), (z)) +#define pmix_event_evtimer_new(b, cb, arg) pmix_event_new((b), -1, 0, (cb), (arg)) + +#define pmix_event_evtimer_add(x, tv) pmix_event_add((x), (tv)) + +#define pmix_event_evtimer_set(b, x, cb, arg) event_assign((x), (b), -1, 0, (event_callback_fn) (cb), (arg)) + +#define pmix_event_evtimer_del(x) pmix_event_del((x)) + + #endif /* PMIX_TYPES_H */ diff --git a/orte/mca/sensor/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/Makefile.am similarity index 62% rename from orte/mca/sensor/Makefile.am rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/Makefile.am index 2e59fe28eb..81072424d0 100644 --- a/orte/mca/sensor/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/Makefile.am @@ -3,26 +3,27 @@ # # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # +AM_CPPFLAGS = $(LTDLINCL) + # main library setup -noinst_LTLIBRARIES = libmca_sensor.la -libmca_sensor_la_SOURCES = +noinst_LTLIBRARIES = libmca_psensor.la +libmca_psensor_la_SOURCES = # local files -headers = sensor.h \ - sensor_types.h +headers = psensor.h -libmca_sensor_la_SOURCES += $(headers) +libmca_psensor_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS -ortedir = $(ompiincludedir)/$(subdir) -nobase_orte_HEADERS = $(headers) +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) endif include base/Makefile.am diff --git a/orte/mca/sensor/base/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/Makefile.am similarity index 60% rename from orte/mca/sensor/base/Makefile.am rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/Makefile.am index 7155261700..fe9c53ed55 100644 --- a/orte/mca/sensor/base/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. # # Copyright (c) 2017 Intel, Inc. All rights reserved. @@ -11,10 +11,9 @@ # headers += \ - base/base.h \ - base/sensor_private.h + base/base.h -libmca_sensor_la_SOURCES += \ - base/sensor_base_frame.c \ - base/sensor_base_select.c \ - base/sensor_base_fns.c +libmca_psensor_la_SOURCES += \ + base/psensor_base_frame.c \ + base/psensor_base_select.c \ + base/psensor_base_stubs.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/base.h new file mode 100644 index 0000000000..a01437acff --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/base.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef PMIX_PSENSOR_BASE_H_ +#define PMIX_PSENSOR_BASE_H_ + +#include <src/include/pmix_config.h> + +#include "src/class/pmix_list.h" +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_framework.h" + +#include "src/mca/psensor/psensor.h" + +BEGIN_C_DECLS + +/* + * MCA Framework + */ +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_psensor_base_framework; + +PMIX_EXPORT int pmix_psensor_base_select(void); + +/* define a struct to hold framework-global values */ +typedef struct { + pmix_list_t actives; + pmix_event_base_t *evbase; +} pmix_psensor_base_t; + +typedef struct { + pmix_list_item_t super; + pmix_psensor_base_component_t *component; + pmix_psensor_base_module_t *module; + int priority; +} pmix_psensor_active_module_t; +PMIX_CLASS_DECLARATION(pmix_psensor_active_module_t); + +PMIX_EXPORT extern pmix_psensor_base_t pmix_psensor_base; + +PMIX_EXPORT pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); + +PMIX_EXPORT pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor, + char *id); + +END_C_DECLS +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c new file mode 100644 index 0000000000..ffeda766db --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c @@ -0,0 +1,103 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include <src/include/pmix_config.h> + +#include <pmix_common.h> + +#include <pthread.h> +#include PMIX_EVENT_HEADER + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" +#include "src/class/pmix_list.h" +#include "src/runtime/pmix_progress_threads.h" +#include "src/include/types.h" + +#include "src/mca/psensor/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "src/mca/psensor/base/static-components.h" + +/* + * Global variables + */ +pmix_psensor_base_module_t pmix_psensor = { + pmix_psensor_base_start, + pmix_psensor_base_stop +}; +pmix_psensor_base_t pmix_psensor_base = {{{0}}};; + +static bool use_separate_thread = false; + +static int pmix_psensor_register(pmix_mca_base_register_flag_t flags) +{ + (void) pmix_mca_base_var_register("pmix", "psensor", "base", "use_separate_thread", + "Use a separate thread for monitoring local procs", + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + PMIX_INFO_LVL_9, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &use_separate_thread); + return PMIX_SUCCESS; +} + + +static int pmix_psensor_base_close(void) +{ + PMIX_LIST_DESTRUCT(&pmix_psensor_base.actives); + + if (use_separate_thread && NULL != pmix_psensor_base.evbase) { + (void)pmix_progress_thread_stop("PSENSOR"); + } + + /* Close all remaining available components */ + return pmix_mca_base_framework_components_close(&pmix_psensor_base_framework, NULL); +} + +/** + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +static int pmix_psensor_base_open(pmix_mca_base_open_flag_t flags) +{ + /* construct the list of modules */ + PMIX_CONSTRUCT(&pmix_psensor_base.actives, pmix_list_t); + + if (use_separate_thread) { + /* create an event base and progress thread for us */ + if (NULL == (pmix_psensor_base.evbase = pmix_progress_thread_init("PSENSOR"))) { + return PMIX_ERROR; + } + + } else { + pmix_psensor_base.evbase = pmix_globals.evbase; + } + + /* Open up all available components */ + return pmix_mca_base_framework_components_open(&pmix_psensor_base_framework, flags); +} + +PMIX_MCA_BASE_FRAMEWORK_DECLARE(pmix, psensor, "PMIx Monitoring Sensors", + pmix_psensor_register, + pmix_psensor_base_open, pmix_psensor_base_close, + mca_psensor_base_static_components, 0); + +PMIX_CLASS_INSTANCE(pmix_psensor_active_module_t, + pmix_list_item_t, + NULL, NULL); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_select.c new file mode 100644 index 0000000000..4a1f1f0c2a --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_select.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include <src/include/pmix_config.h> +#include <pmix_common.h> + +#include <string.h> + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" + +#include "src/mca/psensor/base/base.h" + +static bool selected = false; + +/* Function for selecting a prioritized list of components + * from all those that are available. */ +int pmix_psensor_base_select(void) +{ + pmix_mca_base_component_list_item_t *cli = NULL; + pmix_psensor_base_component_t *component = NULL; + pmix_psensor_active_module_t *newactive, *active; + pmix_mca_base_module_t *mod; + int pri; + bool inserted; + + if (selected) { + /* ensure we don't do this twice */ + return PMIX_SUCCESS; + } + selected = true; + + /* Query all available components and ask if they have a module */ + PMIX_LIST_FOREACH(cli, &pmix_psensor_base_framework.framework_components, pmix_mca_base_component_list_item_t) { + component = (pmix_psensor_base_component_t *) cli->cli_component; + + pmix_output_verbose(5, pmix_psensor_base_framework.framework_output, + "mca:psensor:select: checking available component %s", + component->base.pmix_mca_component_name); + + /* get the module for this component */ + if (PMIX_SUCCESS != component->base.pmix_mca_query_component(&mod, &pri)) { + continue; + } + + /* add to our prioritized list of available actives */ + newactive = PMIX_NEW(pmix_psensor_active_module_t); + newactive->priority = pri; + newactive->component = component; + newactive->module = (pmix_psensor_base_module_t*)mod; + + /* maintain priority order */ + inserted = false; + PMIX_LIST_FOREACH(active, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + if (newactive->priority > active->priority) { + pmix_list_insert_pos(&pmix_psensor_base.actives, + (pmix_list_item_t*)active, &newactive->super); + inserted = true; + break; + } + } + if (!inserted) { + /* must be lowest priority - add to end */ + pmix_list_append(&pmix_psensor_base.actives, &newactive->super); + } + } + + if (4 < pmix_output_get_verbosity(pmix_psensor_base_framework.framework_output)) { + pmix_output(0, "Final PSENSOR priorities"); + /* show the prioritized list */ + PMIX_LIST_FOREACH(active, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + pmix_output(0, "\tPSENSOR: %s Priority: %d", + active->component->base.pmix_mca_component_name, active->priority); + } + } + + return PMIX_SUCCESS;; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c new file mode 100644 index 0000000000..3250980b4f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include <src/include/pmix_config.h> +#include <pmix_common.h> + +#include "src/util/error.h" + +#include "src/mca/psensor/base/base.h" + +static bool mods_active = false; + +pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs) +{ + pmix_psensor_active_module_t *mod; + pmix_status_t rc; + + opal_output_verbose(5, pmix_psensor_base_framework.framework_output, + "%s:%d sensor:base: starting sensors", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* call the start function of all modules in priority order */ + PMIX_LIST_FOREACH(mod, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + if (NULL != mod->module->start) { + rc = mod->module->start(requestor, error, monitor, directives, ndirs); + if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) { + return rc; + } + } + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor, + char *id) +{ + pmix_psensor_active_module_t *mod; + pmix_status_t rc; + + opal_output_verbose(5, pmix_psensor_base_framework.framework_output, + "%s:%d sensor:base: stopping sensors", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* call the stop function of all modules in priority order */ + PMIX_LIST_FOREACH(mod, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + if (NULL != mod->module->stop) { + rc = mod->module->stop(requestor, id); + if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) { + return rc; + } + } + } + + return PMIX_SUCCESS; +} diff --git a/orte/mca/sensor/file/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/Makefile.am similarity index 50% rename from orte/mca/sensor/file/Makefile.am rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/Makefile.am index 2d0640ab43..30dce46e38 100644 --- a/orte/mca/sensor/file/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/Makefile.am @@ -1,37 +1,37 @@ # -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -dist_ompidata_DATA = help-orte-sensor-file.txt +dist_pmixdata_DATA = help-pmix-psensor-file.txt sources = \ - sensor_file.c \ - sensor_file.h \ - sensor_file_component.c + psensor_file.c \ + psensor_file.h \ + psensor_file_component.c # Make the output library in this directory, and name it either # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la # (for static builds). -if MCA_BUILD_orte_sensor_file_DSO +if MCA_BUILD_pmix_psensor_file_DSO component_noinst = -component_install = mca_sensor_file.la +component_install = mca_psensor_file.la else -component_noinst = libmca_sensor_file.la +component_noinst = libmca_psensor_file.la component_install = endif -mcacomponentdir = $(ompilibdir) +mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_file_la_SOURCES = $(sources) -mca_sensor_file_la_LDFLAGS = -module -avoid-version +mca_psensor_file_la_SOURCES = $(sources) +mca_psensor_file_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_file_la_SOURCES =$(sources) -libmca_sensor_file_la_LDFLAGS = -module -avoid-version +libmca_psensor_file_la_SOURCES =$(sources) +libmca_psensor_file_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/file/help-orte-sensor-file.txt b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/help-pmix-psensor-file.txt similarity index 98% rename from orte/mca/sensor/file/help-orte-sensor-file.txt rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/help-pmix-psensor-file.txt index 321c6cd771..98fd3a010c 100644 --- a/orte/mca/sensor/file/help-orte-sensor-file.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/help-pmix-psensor-file.txt @@ -4,9 +4,9 @@ # # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for the file sensor diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c new file mode 100644 index 0000000000..4daeac29b1 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2011-2012 Los Alamos National Security, LLC. + * All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include <src/include/pmix_config.h> +#include <src/include/types.h> +#include <pmix_common.h> + +#include <stdio.h> +#include <stddef.h> +#include <ctype.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif +#ifdef HAVE_SYS_PARAM_H +#include <sys/param.h> +#endif +#include <fcntl.h> +#include <errno.h> +#include <signal.h> +#ifdef HAVE_TIME_H +#include <time.h> +#endif +#include <sys/stat.h> +#include <sys/types.h> + +#include "src/class/pmix_list.h" +#include "src/include/pmix_globals.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/show_help.h" + +#include "src/mca/psensor/base/base.h" +#include "psensor_file.h" + +/* declare the API functions */ +static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); +static pmix_status_t stop(pmix_peer_t *requestor, char *id); + +/* instantiate the module */ +pmix_psensor_base_module_t pmix_psensor_file_module = { + .start = start, + .stop = stop +}; + +/* define a tracking object */ +typedef struct { + pmix_list_item_t super; + pmix_peer_t *requestor; + char *id; + bool event_active; + pmix_event_t ev; + pmix_event_t cdev; + struct timeval tv; + int tick; + char *file; + bool file_size; + bool file_access; + bool file_mod; + size_t last_size; + time_t last_access; + time_t last_mod; + uint32_t ndrops; + uint32_t nmisses; + pmix_status_t error; + pmix_data_range_t range; + pmix_info_t *info; + size_t ninfo; +} file_tracker_t; +static void ft_constructor(file_tracker_t *ft) +{ + ft->requestor = NULL; + ft->id = NULL; + ft->event_active = false; + ft->tv.tv_sec = 0; + ft->tv.tv_usec = 0; + ft->tick = 0; + ft->file_size = false; + ft->file_access = false; + ft->file_mod = false; + ft->last_size = 0; + ft->last_access = 0; + ft->last_mod = 0; + ft->ndrops = 0; + ft->nmisses = 0; + ft->error = PMIX_SUCCESS; + ft->range = PMIX_RANGE_NAMESPACE; + ft->info = NULL; + ft->ninfo = 0; +} +static void ft_destructor(file_tracker_t *ft) +{ + if (NULL != ft->requestor) { + PMIX_RELEASE(ft->requestor); + } + if (NULL != ft->id) { + free(ft->id); + } + if (event_active) { + pmix_event_del(&ft->ev); + } + if (NULL != ft->file) { + free(ft->file); + } + if (NULL != ft->info) { + PMIX_INFO_FREE(ft->info, ft->ninfo); + } +} +PMIX_CLASS_INSTANCE(file_tracker_t, + pmix_list_item_t, + ft_constructor, ft_destructor); + +/* define a local caddy */ +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_peer_t *requestor; + char *id; +} file_caddy_t; +static void cd_con(file_caddy_t *p) +{ + p->requestor = NULL; + p->id = NULL; +} +static void cd_des(file_caddy_t *p) +{ + if (NULL != (p->requestor)) { + PMIX_RELEASE(p->requestor); + } + if (NULL != p->id) { + free(p->id); + } +} +PMIX_CLASS_INSTANCE(file_caddy_t, + pmix_object_t, + cd_con, cd_des); + +static void file_sample(int sd, short args, void *cbdata); + +static void add_tracker(int sd, short flags, void *cbdata) +{ + file_tracker_t *ft = (file_tracker_t*)cbdata; + + /* add the tracker to our list */ + pmix_list_append(&mca_psensor_file_component.trackers, &ft->super); + + /* setup the timer event */ + pmix_event_evtimer_set(pmix_psensor_base.evbase, &ft->ev, + file_sample, ft); + pmix_event_evtimer_add(&ft->ev, &ft->tv); + ft->event_active = true; +} + +/* + * Start monitoring of local processes + */ +static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs) +{ + file_tracker_t *ft; + pmix_info_t *ptr; + size_t n, n2; + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] checking file monitoring for requestor %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + requestor->info->nptr->nspace, requestor->info->rank)); + + /* if they didn't ask to monitor a file, then nothing for us to do */ + if (0 != strcmp(monitor->key, PMIX_MONITOR_FILE)) { + return PMIX_ERR_TAKE_NEXT_OPTION; + } + + /* setup to track this monitoring operation */ + ft = PMIX_NEW(file_tracker_t); + PMIX_RETAIN(requestor); + ft->requestor = requestor; + ft->file = strdup(monitor->value.data.string); + + /* check the directives to see if what they want monitored */ + for (n=0; n < ndirs; n++) { + if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_SIZE)) { + ft->file_size = directives[n].value.data.flag; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_ACCESS)) { + ft->file_access = directives[n].value.data.flag; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_MODIFY)) { + ft->file_mod = directives[n].value.data.flag; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_DROPS)) { + ft->ndrops = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_CHECK_TIME)) { + ft->tv.tv_sec = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_RANGE)) { + ft->range = directives[n].value.data.range; + } + } + + if (0 == ft->tv.tv_sec || + (!ft->file_size && !ft->file_access && !ft->file_mod)) { + /* didn't specify a sample rate, or what should be sampled */ + PMIX_RELEASE(ft); + return PMIX_ERR_BAD_PARAM; + } + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, + EV_WRITE, add_tracker, ft); + pmix_event_active(&ft->cdev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + + +static void del_tracker(int sd, short flags, void *cbdata) +{ + file_caddy_t *cd = (file_caddy_t*)cbdata; + file_tracker_t *ft, *ftnext; + + /* remove the tracker from our list */ + PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_file_component.trackers, file_tracker_t) { + if (ft->requestor != cd->requestor) { + continue; + } + if (NULL == cd->id || + (NULL != ft->id && 0 == strcmp(ft->id, cd->id))) { + pmix_list_remove_item(&mca_psensor_file_component.trackers, &ft->super); + PMIX_RELEASE(ft); + } + } + PMIX_RELEASE(cd); +} + +static pmix_status_t stop(pmix_peer_t *requestor, char *id) +{ + file_caddy_t *cd; + + cd = PMIX_NEW(file_caddy_t); + PMIX_RETAIN(requestor); + cd->requestor = requestor; + cd->id = strdup(id); + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, + EV_WRITE, del_tracker, cd); + pmix_event_active(&cd->ev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + file_tracker_t *ft = (file_tracker_t*)cbdata; + + PMIX_RELEASE(ft); +} + +static void file_sample(int sd, short args, void *cbdata) +{ + file_tracker_t *ft = (file_tracker_t*)cbdata; + struct stat buf; + pmix_status_t rc; + pmix_proc_t source; + + OPAL_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sampling file %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->file)); + + /* stat the file and get its info */ + if (0 > stat(ft->file, &buf)) { + /* cannot stat file */ + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] could not stat %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->file)); + /* re-add the timer, in case this file shows up */ + pmix_event_evtimer_add(&ft->ev, &ft->tv); + return; + } + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] size %lu access %s\tmod %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + (unsigned long)buf.st_size, ctime(&buf.st_atime), ctime(&buf.st_mtime))); + + if (ft->file_size) { + if (buf.st_size == ft->last_size) { + ft->nmisses++; + } else { + ft->nmisses = 0; + ft->last_size = buf.st_size; + } + } else if (ft->file_access) { + if (buf.st_atime == ft->last_access) { + ft->nmisses++; + } else { + ft->nmisses = 0; + ft->last_access = buf.st_atime; + } + } else if (ft->file_mod) { + if (buf.st_mtime == ft->last_mod) { + ft->nmisses++; + } else { + ft->nmisses = 0; + ft->last_mod = buf.st_mtime; + } + } + + CHECK: + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sampled file %s misses %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->file, ft->nmisses)); + + if (ft->nmisses == ft->ndrops) { + if (4 < pmix_output_get_verbosity(pmix_psensor_base_framework.framework_output)) { + pmix_show_help("help-pmix-psensor-file.txt", "file-stalled", true, + ft->file, ft->last_size, ctime(&ft->last_access), ctime(&ft->last_mod)); + } + /* stop monitoring this client */ + pmix_list_remove_item(&mca_psensor_file_component.trackers, &ft->super); + /* generate an event */ + (void)strncpy(source.nspace, ft->requestor->info->nptr->nspace, PMIX_MAX_NSLEN); + source.rank = ft->requestor->info->rank; + rc = PMIx_Notify_event(PMIX_MONITOR_FILE_ALERT, &source, + ft->range, ft->info, ft->ninfo, opcbfunc, ft); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + return; + } + + /* re-add the timer */ + pmix_event_evtimer_add(&ft->ev, &ft->tv); +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.h new file mode 100644 index 0000000000..f78502cd8e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * File movement sensor + */ +#ifndef PMIX_PSENSOR_FILE_H +#define PMIX_PSENSOR_FILE_H + +#include <src/include/pmix_config.h> + +#include "src/class/pmix_list.h" + +#include "src/mca/psensor/psensor.h" + +BEGIN_C_DECLS + +typedef struct { + pmix_psensor_base_component_t super; + pmix_list_t trackers; +} pmix_psensor_file_component_t; + +extern pmix_psensor_file_component_t mca_psensor_file_component; +extern pmix_psensor_base_module_t pmix_psensor_file_module; + + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file_component.c new file mode 100644 index 0000000000..2b751d7199 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file_component.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include <src/include/pmix_config.h> +#include <pmix_common.h> + +#include "src/class/pmix_list.h" + +#include "src/mca/psensor/base/base.h" +#include "src/mca/psensor/file/psensor_file.h" + +/* + * Local functions + */ +static int psensor_file_open(void); +static int psensor_file_close(void); +static int psensor_file_query(pmix_mca_base_module_t **module, int *priority); + +pmix_psensor_file_component_t mca_psensor_file_component = { + .super = { + .base = { + PMIX_PSENSOR_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "file", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + psensor_file_open, /* component open */ + psensor_file_close, /* component close */ + psensor_file_query /* component query */ + }, + } +}; + + +static int psensor_file_open(void) +{ + PMIX_CONSTRUCT(&mca_psensor_file_component.trackers, pmix_list_t); + return PMIX_SUCCESS; +} + + +static int psensor_file_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 20; /* irrelevant */ + *module = (pmix_mca_base_module_t *)&pmix_psensor_file_module; + return PMIX_SUCCESS; +} + +/** + * Close all subsystems. + */ + +static int psensor_file_close(void) +{ + PMIX_LIST_DESTRUCT(&mca_psensor_file_component.trackers); + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/Makefile.am new file mode 100644 index 0000000000..df4fe0466a --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/Makefile.am @@ -0,0 +1,38 @@ +# +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_pmixdata_DATA = help-pmix-psensor-heartbeat.txt + +sources = \ + psensor_heartbeat.c \ + psensor_heartbeat.h \ + psensor_heartbeat_component.c + +# Make the output library in this directory, and name it either +# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la +# (for static builds). + +if MCA_BUILD_pmix_psensor_heartbeat_DSO +component_noinst = +component_install = mca_psensor_heartbeat.la +else +component_noinst = libmca_psensor_heartbeat.la +component_install = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_psensor_heartbeat_la_SOURCES = $(sources) +mca_psensor_heartbeat_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_psensor_heartbeat_la_SOURCES =$(sources) +libmca_psensor_heartbeat_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/heartbeat/help-orte-sensor-heartbeat.txt b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/help-pmix-psensor-heartbeat.txt similarity index 98% rename from orte/mca/sensor/heartbeat/help-orte-sensor-heartbeat.txt rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/help-pmix-psensor-heartbeat.txt index 4b27231a3a..945e60badb 100644 --- a/orte/mca/sensor/heartbeat/help-orte-sensor-heartbeat.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/help-pmix-psensor-heartbeat.txt @@ -4,9 +4,9 @@ # # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for the memory usage sensor @@ -18,4 +18,3 @@ Node: %s Process rank: %s Memory used: %luGbytes Memory limit: %luGbytes - diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c new file mode 100644 index 0000000000..0c07084279 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights + * reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include <src/include/pmix_config.h> +#include <pmix_common.h> + +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include <string.h> +#endif /* HAVE_STRING_H */ +#include <stdio.h> +#include <pthread.h> +#include PMIX_EVENT_HEADER + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/show_help.h" +#include "src/include/pmix_globals.h" +#include "src/mca/ptl/ptl.h" + +#include "src/mca/psensor/base/base.h" +#include "psensor_heartbeat.h" + +/* declare the API functions */ +static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); +static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id); + +/* instantiate the module */ +pmix_psensor_base_module_t pmix_psensor_heartbeat_module = { + .start = heartbeat_start, + .stop = heartbeat_stop +}; + +/* tracker object */ +typedef struct { + pmix_list_item_t super; + pmix_peer_t *requestor; + char *id; + bool event_active; + pmix_event_t ev; + pmix_event_t cdev; + struct timeval tv; + uint32_t nbeats; + uint32_t ndrops; + uint32_t nmissed; + pmix_status_t error; + pmix_data_range_t range; + pmix_info_t *info; + size_t ninfo; +} pmix_heartbeat_trkr_t; + +static void ft_constructor(pmix_heartbeat_trkr_t *ft) +{ + ft->requestor = NULL; + ft->id = NULL; + ft->event_active = false; + ft->tv.tv_sec = 0; + ft->tv.tv_usec = 0; + ft->nbeats = 0; + ft->ndrops = 0; + ft->nmissed = 0; + ft->error = PMIX_SUCCESS; + ft->range = PMIX_RANGE_NAMESPACE; + ft->info = NULL; + ft->ninfo = 0; +} +static void ft_destructor(pmix_heartbeat_trkr_t *ft) +{ + if (NULL != ft->requestor) { + PMIX_RELEASE(ft->requestor); + } + if (NULL != ft->id) { + free(ft->id); + } + if (event_active) { + pmix_event_del(&ft->ev); + } + if (NULL != ft->info) { + PMIX_INFO_FREE(ft->info, ft->ninfo); + } +} +PMIX_CLASS_INSTANCE(pmix_heartbeat_trkr_t, + pmix_list_item_t, + ft_constructor, ft_destructor); + +/* define a local caddy */ +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_peer_t *requestor; + char *id; +} heartbeat_caddy_t; +static void cd_con(heartbeat_caddy_t *p) +{ + p->requestor = NULL; + p->id = NULL; +} +static void cd_des(heartbeat_caddy_t *p) +{ + if (NULL != (p->requestor)) { + PMIX_RELEASE(p->requestor); + } + if (NULL != p->id) { + free(p->id); + } +} +PMIX_CLASS_INSTANCE(heartbeat_caddy_t, + pmix_object_t, + cd_con, cd_des); + +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_peer_t *peer; +} pmix_psensor_beat_t; + +static void bcon(pmix_psensor_beat_t *p) +{ + p->peer = NULL; +} +static void bdes(pmix_psensor_beat_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} +PMIX_CLASS_INSTANCE(pmix_psensor_beat_t, + pmix_object_t, + bcon, bdes); + +static void check_heartbeat(int fd, short dummy, void *arg); + +static void add_tracker(int sd, short flags, void *cbdata) +{ + pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + + /* add the tracker to our list */ + pmix_list_append(&mca_psensor_heartbeat_component.trackers, &ft->super); + + /* setup the timer event */ + pmix_event_evtimer_set(pmix_psensor_base.evbase, &ft->ev, + check_heartbeat, ft); + pmix_event_evtimer_add(&ft->ev, &ft->tv); + ft->event_active = true; +} + +static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs) +{ + pmix_heartbeat_trkr_t *ft; + size_t n, n2; + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] checking heartbeat monitoring for requestor %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + requestor->info->nptr->nspace, requestor->info->rank)); + + /* if they didn't ask for heartbeats, then nothing for us to do */ + if (0 != strcmp(monitor->key, PMIX_MONITOR_HEARTBEAT)) { + return PMIX_ERR_TAKE_NEXT_OPTION; + } + + /* setup to track this monitoring operation */ + ft = PMIX_NEW(pmix_heartbeat_trkr_t); + PMIX_RETAIN(requestor); + ft->requestor = requestor; + ft->error = error; + + /* check the directives to see what they want monitored */ + for (n=0; n < ndirs; n++) { + if (0 == strcmp(directives[n].key, PMIX_MONITOR_HEARTBEAT_TIME)) { + ft->tv.tv_sec = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_HEARTBEAT_DROPS)) { + ft->ndrops = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_RANGE)) { + ft->range = directives[n].value.data.range; + } + } + + if (0 == ft->tv.tv_sec) { + /* didn't specify a sample rate, or what should be sampled */ + PMIX_RELEASE(ft); + return PMIX_ERR_BAD_PARAM; + } + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, + EV_WRITE, add_tracker, ft); + pmix_event_active(&ft->cdev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + +static void del_tracker(int sd, short flags, void *cbdata) +{ + heartbeat_caddy_t *cd = (heartbeat_caddy_t*)cbdata; + pmix_heartbeat_trkr_t *ft, *ftnext; + + /* remove the tracker from our list */ + PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { + if (ft->requestor != cd->requestor) { + continue; + } + if (NULL == cd->id || + (NULL != ft->id && 0 == strcmp(ft->id, cd->id))) { + pmix_list_remove_item(&mca_psensor_heartbeat_component.trackers, &ft->super); + PMIX_RELEASE(ft); + } + } + PMIX_RELEASE(cd); +} + +static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id) +{ + heartbeat_caddy_t *cd; + + cd = PMIX_NEW(heartbeat_caddy_t); + PMIX_RETAIN(requestor); + cd->requestor = requestor; + cd->id = strdup(id); + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, + EV_WRITE, del_tracker, cd); + pmix_event_active(&cd->ev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + + PMIX_RELEASE(ft); +} + +/* this function automatically gets periodically called + * by the event library so we can check on the state + * of the various procs we are monitoring + */ +static void check_heartbeat(int fd, short dummy, void *cbdata) +{ + pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + pmix_status_t rc; + pmix_proc_t source; + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sensor:check_heartbeat for proc %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + + if (0 == ft->nbeats) { + /* no heartbeat recvd in last window */ + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sensor:check_heartbeat failed for proc %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + /* stop monitoring this client */ + pmix_list_remove_item(&mca_psensor_heartbeat_component.trackers, &ft->super); + /* generate an event */ + (void)strncpy(source.nspace, ft->requestor->info->nptr->nspace, PMIX_MAX_NSLEN); + source.rank = ft->requestor->info->rank; + rc = PMIx_Notify_event(PMIX_MONITOR_HEARTBEAT_ALERT, &source, + ft->range, ft->info, ft->ninfo, opcbfunc, ft); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + return; + } else { + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sensor:check_heartbeat detected %d beats for proc %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, ft->nbeats, + ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + } + /* reset for next period */ + ft->nbeats = 0; + + /* reset the timer */ + pmix_event_evtimer_add(&ft->ev, &ft->tv); +} + +static void add_beat(int sd, short args, void *cbdata) +{ + pmix_psensor_beat_t *b = (pmix_psensor_beat_t*)cbdata; + pmix_heartbeat_trkr_t *ft; + + /* find this peer in our trackers */ + PMIX_LIST_FOREACH(ft, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { + if (ft->requestor == b->peer) { + /* increment the beat count */ + ++ft->nbeats; + break; + } + } + + PMIX_RELEASE(b); +} + +void pmix_psensor_heartbeat_recv_beats(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_psensor_beat_t *b; + + b = PMIX_NEW(pmix_psensor_beat_t); + PMIX_RETAIN(peer); + b->peer = peer; + + /* shift this to our thread for processing */ + pmix_event_assign(&b->ev, pmix_psensor_base.evbase, -1, + EV_WRITE, add_beat, b); + pmix_event_active(&b->ev, EV_WRITE, 1); +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h new file mode 100644 index 0000000000..2f904b6035 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Heartbeat sensor + */ +#ifndef PMIX_PSENSOR_HEARTBEAT_H +#define PMIX_PSENSOR_HEARTBEAT_H + +#include <src/include/pmix_config.h> +#include <src/include/types.h> + +#include "src/class/pmix_list.h" +#include "src/include/pmix_globals.h" +#include "src/mca/psensor/psensor.h" + +BEGIN_C_DECLS + +typedef struct { + pmix_psensor_base_component_t super; + pmix_list_t trackers; +} pmix_psensor_heartbeat_component_t; + +PMIX_EXPORT extern pmix_psensor_heartbeat_component_t mca_psensor_heartbeat_component; +extern pmix_psensor_base_module_t pmix_psensor_heartbeat_module; + +void pmix_psensor_heartbeat_recv_beats(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata); + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c new file mode 100644 index 0000000000..e16a26a347 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include <src/include/pmix_config.h> +#include <pmix_common.h> + +#include "src/mca/ptl/ptl.h" +#include "src/mca/psensor/base/base.h" +#include "src/mca/psensor/heartbeat/psensor_heartbeat.h" + +/* + * Local functions + */ + +static int heartbeat_open(void); +static int heartbeat_close(void); +static int heartbeat_query(pmix_mca_base_module_t **module, int *priority); + +pmix_psensor_heartbeat_component_t mca_psensor_heartbeat_component = { + .super = { + .base = { + PMIX_PSENSOR_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "heartbeat", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + heartbeat_open, /* component open */ + heartbeat_close, /* component close */ + heartbeat_query /* component query */ + } + } +}; + + +/** + * component open/close/init function + */ +static int heartbeat_open(void) +{ + PMIX_CONSTRUCT(&mca_psensor_heartbeat_component.trackers, pmix_list_t); + + /* setup to receive heartbeats */ + pmix_ptl.recv(pmix_globals.mypeer, pmix_psensor_heartbeat_recv_beats, PMIX_PTL_TAG_HEARTBEAT); + + return PMIX_SUCCESS; +} + + +static int heartbeat_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 5; // irrelevant + *module = (pmix_mca_base_module_t *)&pmix_psensor_heartbeat_module; + return PMIX_SUCCESS; +} + +/** + * Close all subsystems. + */ + +static int heartbeat_close(void) +{ + /* cancel our persistent recv */ + pmix_ptl.cancel(pmix_globals.mypeer, PMIX_PTL_TAG_HEARTBEAT); + + PMIX_LIST_DESTRUCT(&mca_psensor_heartbeat_component.trackers); + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/psensor.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/psensor.h new file mode 100644 index 0000000000..e1c019e388 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/psensor.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * @file: + * + */ + +#ifndef PMIX_PSENSOR_H_ +#define PMIX_PSENSOR_H_ + +#include <src/include/pmix_config.h> + +#include "src/class/pmix_list.h" +#include "src/mca/mca.h" +#include "src/include/pmix_globals.h" + +BEGIN_C_DECLS + +/* + * Component functions - all MUST be provided! + */ + +/* start a sensor operation: + * + * requestor - the process requesting this operation + * + * monitor - a PMIx attribute specifying what is to be monitored + * + * directives - an array of pmix_info_t specifying relevant limits on values, and action + * to be taken when limits exceeded. Can include + * user-provided "id" string */ +typedef pmix_status_t (*pmix_psensor_base_module_start_fn_t)(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); + +/* stop a sensor operation: + * + * requestor - the process requesting this operation + * + * id - the "id" string provided by the user at the time the + * affected monitoring operation was started. A NULL indicates + * that all operations started by this requestor are to + * be terminated */ +typedef pmix_status_t (*pmix_psensor_base_module_stop_fn_t)(pmix_peer_t *requestor, + char *id); + +/* API module */ +/* + * Ver 1.0 + */ +typedef struct pmix_psensor_base_module_1_0_0_t { + pmix_psensor_base_module_start_fn_t start; + pmix_psensor_base_module_stop_fn_t stop; +} pmix_psensor_base_module_t; + +/* + * the standard component data structure + */ +typedef struct pmix_psensor_base_component_1_0_0_t { + pmix_mca_base_component_t base; + pmix_mca_base_component_data_t data; +} pmix_psensor_base_component_t; + + + +/* + * Macro for use in components that are of type sensor v1.0.0 + */ +#define PMIX_PSENSOR_BASE_VERSION_1_0_0 \ + PMIX_MCA_BASE_VERSION_1_0_0("psensor", 1, 0, 0) + +/* Global structure for accessing sensor functions + */ +PMIX_EXPORT extern pmix_psensor_base_module_t pmix_psensor; /* holds API function pointers */ + +END_C_DECLS + +#endif /* MCA_SENSOR_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h index a99e277f5f..ac92ed9dc9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -73,6 +73,7 @@ struct pmix_ptl_globals_t { pmix_list_t actives; bool initialized; pmix_list_t posted_recvs; // list of pmix_ptl_posted_recv_t + pmix_list_t unexpected_msgs; int stop_thread[2]; bool listen_thread_active; pmix_list_t listeners; @@ -93,6 +94,11 @@ PMIX_EXPORT pmix_status_t pmix_ptl_stub_send_oneway(struct pmix_peer_t *peer, pmix_ptl_tag_t tag); PMIX_EXPORT pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, pmix_info_t info[], size_t ninfo); +PMIX_EXPORT pmix_status_t pmix_ptl_stub_register_recv(struct pmix_peer_t *peer, + pmix_ptl_cbfunc_t cbfunc, + pmix_ptl_tag_t tag); +PMIX_EXPORT pmix_status_t pmix_ptl_stub_cancel_recv(struct pmix_peer_t *peer, + pmix_ptl_tag_t tag); PMIX_EXPORT pmix_status_t pmix_ptl_base_start_listening(pmix_info_t *info, size_t ninfo); PMIX_EXPORT void pmix_ptl_base_stop_listening(void); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index 00799c4608..c17029d46f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -61,6 +61,8 @@ pmix_ptl_API_t pmix_ptl = { .send_recv = pmix_ptl_stub_send_recv, .send_oneway = pmix_ptl_stub_send_oneway, .connect_to_peer = pmix_ptl_stub_connect_to_peer, + .recv = pmix_ptl_stub_register_recv, + .cancel = pmix_ptl_stub_cancel_recv, .start_listening = pmix_ptl_base_start_listening, .stop_listening = pmix_ptl_base_stop_listening }; @@ -88,6 +90,7 @@ static pmix_status_t pmix_ptl_close(void) /* the components will cleanup when closed */ PMIX_LIST_DESTRUCT(&pmix_ptl_globals.actives); PMIX_LIST_DESTRUCT(&pmix_ptl_globals.posted_recvs); + PMIX_LIST_DESTRUCT(&pmix_ptl_globals.unexpected_msgs); PMIX_LIST_DESTRUCT(&pmix_ptl_globals.listeners); return pmix_mca_base_framework_components_close(&pmix_ptl_base_framework, NULL); @@ -99,6 +102,7 @@ static pmix_status_t pmix_ptl_open(pmix_mca_base_open_flag_t flags) pmix_ptl_globals.initialized = true; PMIX_CONSTRUCT(&pmix_ptl_globals.actives, pmix_list_t); PMIX_CONSTRUCT(&pmix_ptl_globals.posted_recvs, pmix_list_t); + PMIX_CONSTRUCT(&pmix_ptl_globals.unexpected_msgs, pmix_list_t); pmix_ptl_globals.listen_thread_active = false; PMIX_CONSTRUCT(&pmix_ptl_globals.listeners, pmix_list_t); pmix_client_globals.myserver.sd = -1; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index b51c790205..d7f77a3d69 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -46,7 +46,7 @@ #include "src/mca/ptl/base/base.h" -static uint32_t current_tag = 1; // 0 is reserved for system purposes +static uint32_t current_tag = PMIX_PTL_TAG_DYNAMIC; static void _notify_complete(pmix_status_t status, void *cbdata) { @@ -162,7 +162,7 @@ static pmix_status_t send_msg(int sd, pmix_ptl_send_t *msg) } else { iov_count = 1; } -retry: + retry: rc = writev(sd, iov, iov_count); if (PMIX_LIKELY(rc == remain)) { /* we successfully sent the header and the msg data if any */ @@ -521,16 +521,16 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) return; } - /* set the tag */ - tag = current_tag++; + /* take the next tag in the sequence */ + current_tag++; + if (UINT32_MAX == current_tag ) { + current_tag = PMIX_PTL_TAG_DYNAMIC; + } + tag = current_tag; if (NULL != ms->cbfunc) { /* if a callback msg is expected, setup a recv for it */ req = PMIX_NEW(pmix_ptl_posted_recv_t); - /* take the next tag in the sequence */ - if (UINT32_MAX == current_tag ) { - current_tag = 1; - } req->tag = tag; req->cbfunc = ms->cbfunc; req->cbdata = ms->cbdata; @@ -597,23 +597,29 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; } msg->data = NULL; // protect the data region - if (NULL != rcv->cbfunc) { - rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); - } + rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); PMIX_DESTRUCT(&buf); // free's the msg data - /* also done with the recv, if not a wildcard or the error tag */ - if (UINT32_MAX != rcv->tag && 0 != rcv->tag) { - pmix_list_remove_item(&pmix_ptl_globals.posted_recvs, &rcv->super); - PMIX_RELEASE(rcv); - } - PMIX_RELEASE(msg); - return; } + /* done with the recv if it is a dynamic tag */ + if (PMIX_PTL_TAG_DYNAMIC <= rcv->tag && UINT_MAX != rcv->tag) { + pmix_list_remove_item(&pmix_ptl_globals.posted_recvs, &rcv->super); + PMIX_RELEASE(rcv); + } + PMIX_RELEASE(msg); + return; } } - /* we get here if no matching recv was found - this is an error */ - pmix_output(0, "UNEXPECTED MESSAGE tag = %d", msg->hdr.tag); - PMIX_RELEASE(msg); - PMIX_REPORT_EVENT(PMIX_ERROR, _notify_complete); + /* if the tag in this message is above the dynamic marker, then + * that is an error */ + if (PMIX_PTL_TAG_DYNAMIC <= msg->hdr.tag) { + pmix_output(0, "UNEXPECTED MESSAGE tag = %d", msg->hdr.tag); + PMIX_RELEASE(msg); + PMIX_REPORT_EVENT(PMIX_ERROR, _notify_complete); + return; + } + + /* it is possible that someone may post a recv for this message + * at some point, so we have to hold onto it */ + pmix_list_append(&pmix_ptl_globals.unexpected_msgs, &msg->super); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c index a82d4112e6..f13fde1bd7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -105,3 +105,92 @@ pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, return PMIX_ERR_UNREACH; } + +static void post_recv(int fd, short args, void *cbdata) +{ + pmix_ptl_posted_recv_t *req = (pmix_ptl_posted_recv_t*)cbdata; + pmix_ptl_recv_t *msg, *nmsg; + pmix_buffer_t buf; + + pmix_output_verbose(5, pmix_globals.debug_output, + "posting recv on tag %d", req->tag); + + /* add it to the list of recvs */ + pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super); + + /* now check the unexpected msg queue to see if we already + * recvd something for it */ + PMIX_LIST_FOREACH_SAFE(msg, nmsg, &pmix_ptl_globals.unexpected_msgs, pmix_ptl_recv_t) { + if (msg->hdr.tag == req->tag || UINT_MAX == req->tag) { + if (NULL != req->cbfunc) { + /* construct and load the buffer */ + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + if (NULL != msg->data) { + buf.base_ptr = (char*)msg->data; + buf.bytes_allocated = buf.bytes_used = msg->hdr.nbytes; + buf.unpack_ptr = buf.base_ptr; + buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; + } + msg->data = NULL; // protect the data region + req->cbfunc(msg->peer, &msg->hdr, &buf, req->cbdata); + PMIX_DESTRUCT(&buf); // free's the msg data + } + pmix_list_remove_item(&pmix_ptl_globals.unexpected_msgs, &msg->super); + PMIX_RELEASE(msg); + } + } +} + +pmix_status_t pmix_ptl_stub_register_recv(struct pmix_peer_t *peer, + pmix_ptl_cbfunc_t cbfunc, + pmix_ptl_tag_t tag) +{ + pmix_ptl_posted_recv_t *req; + + req = PMIX_NEW(pmix_ptl_posted_recv_t); + if (NULL == req) { + return PMIX_ERR_NOMEM; + } + req->tag = tag; + req->cbfunc = cbfunc; + /* have to push this into an event so we can add this + * to the list of posted recvs */ + pmix_event_assign(&(req->ev), pmix_globals.evbase, -1, + EV_WRITE, post_recv, req); + pmix_event_active(&(req->ev), EV_WRITE, 1); + return PMIX_SUCCESS; +} + +static void cancel_recv(int fd, short args, void *cbdata) +{ + pmix_ptl_posted_recv_t *req = (pmix_ptl_posted_recv_t*)cbdata; + pmix_ptl_posted_recv_t *rcv; + + PMIX_LIST_FOREACH(rcv, &pmix_ptl_globals.posted_recvs, pmix_ptl_posted_recv_t) { + if (rcv->tag == req->tag) { + pmix_list_remove_item(&pmix_ptl_globals.posted_recvs, &rcv->super); + PMIX_RELEASE(rcv); + PMIX_RELEASE(req); + return; + } + } + PMIX_RELEASE(req); +} + +pmix_status_t pmix_ptl_stub_cancel_recv(struct pmix_peer_t *peer, + pmix_ptl_tag_t tag) +{ + pmix_ptl_posted_recv_t *req; + + req = PMIX_NEW(pmix_ptl_posted_recv_t); + if (NULL == req) { + return PMIX_ERR_NOMEM; + } + req->tag = tag; + /* have to push this into an event so we can modify + * the list of posted recvs */ + pmix_event_assign(&(req->ev), pmix_globals.evbase, -1, + EV_WRITE, cancel_recv, req); + pmix_event_active(&(req->ev), EV_WRITE, 1); + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h index 3681f8bb46..f2f5ad6033 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -110,6 +110,15 @@ typedef pmix_status_t (*pmix_ptl_send_fn_t)(struct pmix_peer_t *peer, pmix_buffer_t *bfr, pmix_ptl_tag_t tag); +/* (ONE-WAY) register a persistent recv */ +typedef pmix_status_t (*pmix_ptl_recv_fn_t)(struct pmix_peer_t *peer, + pmix_ptl_cbfunc_t cbfunc, + pmix_ptl_tag_t tag); + +/* Cancel a persistent recv */ +typedef pmix_status_t (*pmix_ptl_cancel_fn_t)(struct pmix_peer_t *peer, + pmix_ptl_tag_t tag); + /* connect to a peer - this is a blocking function * to establish a connection to a peer. It assigns * the corresponding module to the peer's compat @@ -126,6 +135,8 @@ struct pmix_ptl_module_t { pmix_ptl_finalize_fn_t finalize; pmix_ptl_send_recv_fn_t send_recv; pmix_ptl_send_fn_t send; + pmix_ptl_recv_fn_t recv; + pmix_ptl_cancel_fn_t cancel; pmix_ptl_connect_to_peer_fn_t connect_to_peer; }; typedef struct pmix_ptl_module_t pmix_ptl_module_t; @@ -152,6 +163,8 @@ typedef struct { pmix_ptl_get_available_modules_fn_t get_available_modules; pmix_ptl_send_recv_fn_t send_recv; pmix_ptl_send_fn_t send_oneway; + pmix_ptl_recv_fn_t recv; + pmix_ptl_cancel_fn_t cancel; pmix_ptl_connect_to_peer_fn_t connect_to_peer; pmix_ptl_start_listening_fn_t start_listening; pmix_ptl_stop_listening_fn_t stop_listening; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h index 782a10779b..e5571c35db 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h @@ -63,6 +63,16 @@ struct pmix_ptl_module_t; /**** MESSAGING STRUCTURES ****/ typedef uint32_t pmix_ptl_tag_t; +/* define a range of "reserved" tags - these + * are tags that are used for persistent recvs + * within the system */ +#define PMIX_PTL_TAG_NOTIFY 0 +#define PMIX_PTL_TAG_HEARTBEAT 1 + +/* define the start of dynamic tags that are + * assigned for send/recv operations */ +#define PMIX_PTL_TAG_DYNAMIC 100 + /* header for messages */ typedef struct { diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h index 9a09a049c3..1dfb1df48b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * @@ -13,6 +13,11 @@ #include "pmix_config.h" +#include <pthread.h> +#include PMIX_EVENT_HEADER + +#include "src/include/types.h" + /** * Initialize a progress thread name; if a progress thread is not * already associated with that name, start a progress thread. diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index dd37c289ba..9e4b220ad1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -2345,6 +2345,18 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, return rc; } + if (PMIX_JOB_CONTROL_CMD == cmd) { + PMIX_PEER_CADDY(cd, peer, tag); + rc = pmix_server_job_ctrl(peer, buf, query_cbfunc, cd); + return rc; + } + + if (PMIX_MONITOR_CMD == cmd) { + PMIX_PEER_CADDY(cd, peer, tag); + rc = pmix_server_monitor(peer, buf, query_cbfunc, cd); + return rc; + } + return PMIX_ERR_NOT_SUPPORTED; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index ba9c749d03..876c28be5d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1562,6 +1562,134 @@ pmix_status_t pmix_server_alloc(pmix_peer_t *peer, return rc; } +pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + pmix_query_caddy_t *cd; + pmix_proc_t proc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd job control request from client"); + + if (NULL == pmix_host_server.job_control) { + return PMIX_ERR_NOT_SUPPORTED; + } + + cd = PMIX_NEW(pmix_query_caddy_t); + cd->cbdata = cbdata; + + /* unpack the number of targets */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ntargets, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + if (0 < cd->ntargets) { + PMIX_PROC_CREATE(cd->targets, cd->ntargets); + cnt = cd->ntargets; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->targets, &cnt, PMIX_PROC))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + /* unpack the number of info objects */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + /* unpack the info */ + if (0 < cd->ninfo) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + cnt = cd->ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + + /* setup the requesting peer name */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + + /* ask the host to execute the request */ + if (PMIX_SUCCESS != (rc = pmix_host_server.job_control(&proc, + cd->targets, cd->ntargets, + cd->info, cd->ninfo, + cbfunc, cd))) { + goto exit; + } + return PMIX_SUCCESS; + + exit: + PMIX_RELEASE(cd); + return rc; +} + +pmix_status_t pmix_server_monitor(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + pmix_status_t rc, error; + pmix_query_caddy_t *cd; + pmix_proc_t proc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd monitor request from client"); + + if (NULL == pmix_host_server.monitor) { + return PMIX_ERR_NOT_SUPPORTED; + } + + cd = PMIX_NEW(pmix_query_caddy_t); + cd->cbdata = cbdata; + + /* unpack the error code */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &error, &cnt, PMIX_STATUS))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + + /* unpack the number of directives */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + /* unpack the directives */ + if (0 < cd->ninfo) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + cnt = cd->ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + + /* setup the requesting peer name */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + + /* ask the host to execute the request */ + if (PMIX_SUCCESS != (rc = pmix_host_server.monitor(&proc, error, + cd->info, cd->ninfo, + cbfunc, cd))) { + goto exit; + } + return PMIX_SUCCESS; + + exit: + PMIX_RELEASE(cd); + return rc; +} + /***** INSTANCE SERVER LIBRARY CLASSES *****/ static void tcon(pmix_server_trkr_t *t) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index faad880234..f502cd33a3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -218,6 +218,16 @@ pmix_status_t pmix_server_alloc(pmix_peer_t *peer, pmix_info_cbfunc_t cbfunc, void *cbdata); +pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata); + +pmix_status_t pmix_server_monitor(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata); + pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/error.c b/opal/mca/pmix/pmix2x/pmix/src/util/error.c index 0850e72edb..d75bc2cd78 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/error.c @@ -56,6 +56,8 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "INVALID-KEYVAL"; case PMIX_ERR_INVALID_NUM_PARSED: return "INVALID-NUM-PARSED"; + case PMIX_ERR_TAKE_NEXT_OPTION: + return "TAKE-NEXT-OPTION"; case PMIX_ERR_INVALID_ARGS: return "INVALID-ARGS"; @@ -157,6 +159,14 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "PMIX_ERR_WILDCARD"; case PMIX_NOTIFY_ALLOC_COMPLETE: return "PMIX ALLOC OPERATION COMPLETE"; + case PMIX_JCTRL_CHECKPOINT: + return "PMIX JOB CONTROL CHECKPOINT"; + case PMIX_JCTRL_PREEMPT_ALERT: + return "PMIX PRE-EMPTION ALERT"; + case PMIX_MONITOR_HEARTBEAT_ALERT: + return "PMIX HEARTBEAT ALERT"; + case PMIX_MONITOR_FILE_ALERT: + return "PMIX FILE MONITOR ALERT"; case PMIX_SUCCESS: return "SUCCESS"; default: diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/error.h b/opal/mca/pmix/pmix2x/pmix/src/util/error.h index b72cecf518..1883c442e4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/error.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/error.h @@ -37,6 +37,7 @@ #define PMIX_ERR_NETWORK_NOT_PARSEABLE (PMIX_INTERNAL_ERR_BASE - 33) #define PMIX_ERR_FILE_OPEN_FAILURE (PMIX_INTERNAL_ERR_BASE - 34) #define PMIX_ERR_FILE_READ_FAILURE (PMIX_INTERNAL_ERR_BASE - 35) +#define PMIX_ERR_TAKE_NEXT_OPTION (PMIX_INTERNAL_ERR_BASE - 36) #define PMIX_ERROR_LOG(r) \ do { \ diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 10f6a5e772..28d638f3b5 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -493,6 +493,12 @@ int pmix2x_convert_rc(pmix_status_t rc) case PMIX_QUERY_PARTIAL_SUCCESS: return OPAL_ERR_PARTIAL_SUCCESS; + case PMIX_MONITOR_HEARTBEAT_ALERT: + return OPAL_ERR_HEARTBEAT_ALERT; + + case PMIX_MONITOR_FILE_ALERT: + return OPAL_ERR_FILE_ALERT; + case PMIX_ERROR: return OPAL_ERROR; case PMIX_SUCCESS: @@ -1333,6 +1339,22 @@ static void pmix2x_log(opal_list_t *info, OBJ_RELEASE(cd); } +opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir) +{ + switch (dir) { + case PMIX_ALLOC_NEW: + return OPAL_PMIX_ALLOC_NEW; + case PMIX_ALLOC_EXTEND: + return OPAL_PMIX_ALLOC_EXTEND; + case PMIX_ALLOC_RELEASE: + return OPAL_PMIX_ALLOC_RELEASE; + case PMIX_ALLOC_REAQUIRE: + return OPAL_PMIX_ALLOC_REAQCUIRE; + default: + return OPAL_PMIX_ALLOC_UNDEF; + } +} + /**** INSTANTIATE INTERNAL CLASSES ****/ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, opal_list_item_t, diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index 541978e482..e011000e64 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -279,6 +279,8 @@ OPAL_MODULE_DECLSPEC void pmix2x_value_load(pmix_value_t *v, OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, const pmix_value_t *v); +OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir); + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index c08cdf27ea..7f06b73b6b 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -45,63 +45,73 @@ /* These are the interfaces used by the embedded PMIx server * to call up into ORTE for service requests */ - static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, - int status, const char msg[], - pmix_proc_t procs[], size_t nprocs, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - char *data, size_t ndata, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_publish_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, +static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, - pmix_lookup_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, - const pmix_info_t job_info[], size_t ninfo, - const pmix_app_t apps[], size_t napps, - pmix_spawn_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_notify_event(pmix_status_t code, - const pmix_proc_t *source, - pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_query(pmix_proc_t *proct, - pmix_query_t *queryies, size_t nqueries, - pmix_info_cbfunc_t cbfunc, +static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_notify_event(pmix_status_t code, + const pmix_proc_t *source, + pmix_data_range_t range, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_query(pmix_proc_t *proct, + pmix_query_t *queryies, size_t nqueries, + pmix_info_cbfunc_t cbfunc, + void *cbdata); +static void server_tool_connection(pmix_info_t *info, size_t ninfo, + pmix_tool_connection_cbfunc_t cbfunc, void *cbdata); - static void server_tool_connection(pmix_info_t *info, size_t ninfo, - pmix_tool_connection_cbfunc_t cbfunc, - void *cbdata); static void server_log(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata); - pmix_server_module_t mymodule = { +static pmix_status_t server_allocate(const pmix_proc_t *client, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +static pmix_status_t server_job_control(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +pmix_server_module_t mymodule = { .client_connected = server_client_connected_fn, .client_finalized = server_client_finalized_fn, .abort = server_abort_fn, @@ -118,7 +128,11 @@ static void server_log(const pmix_proc_t *client, .notify_event = server_notify_event, .query = server_query, .tool_connected = server_tool_connection, - .log = server_log + .log = server_log, + .allocate = server_allocate, + .job_control = server_job_control + /* we do not support monitoring, but use the + * PMIx internal monitoring capability */ }; opal_pmix_server_module_t *host_module = NULL; @@ -1052,3 +1066,117 @@ static void server_log(const pmix_proc_t *proct, &opalcaddy->apps, opal_opcbfunc, opalcaddy); } + +static pmix_status_t server_allocate(const pmix_proc_t *proct, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_pmix_alloc_directive_t odir; + + if (NULL == host_module || NULL == host_module->allocate) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + requestor.vpid = pmix2x_convert_rank(proct->rank); + + /* convert the directive */ + odir = pmix2x_convert_allocdir(directive); + + /* convert the data */ + for (n=0; n < ndata; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &data[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->allocate(&requestor, odir, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; + +} + +static pmix_status_t server_job_control(const pmix_proc_t *proct, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_namelist_t *nm; + + if (NULL == host_module || NULL == host_module->job_control) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + requestor.vpid = pmix2x_convert_rank(proct->rank); + + /* convert the targets */ + for (n=0; n < ntargets; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, targets[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + nm->name.vpid = pmix2x_convert_rank(targets[n].rank); + } + + /* convert the directives */ + for (n=0; n < ndirs; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &directives[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->job_control(&requestor, + &opalcaddy->procs, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix_server.h b/opal/mca/pmix/pmix_server.h index d83ed39e88..8bfaf467bb 100644 --- a/opal/mca/pmix/pmix_server.h +++ b/opal/mca/pmix/pmix_server.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -230,6 +230,19 @@ typedef void (*opal_pmix_connection_cbfunc_t)(int incoming_sd); typedef int (*opal_pmix_server_listener_fn_t)(int listening_sd, opal_pmix_connection_cbfunc_t cbfunc); +/* Request allocation modifications on behalf of a client */ +typedef int (*opal_pmix_server_alloc_fn_t)(const opal_process_name_t *client, + opal_pmix_alloc_directive_t directive, + opal_list_t *data, + opal_pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* Execute a job control action on behalf of a client */ +typedef int (*opal_pmix_server_job_control_fn_t)(const opal_process_name_t *requestor, + opal_list_t *targets, opal_list_t *directives, + opal_pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* we do not provide a monitoring capability */ + typedef struct opal_pmix_server_module_1_0_0_t { opal_pmix_server_client_connected_fn_t client_connected; opal_pmix_server_client_finalized_fn_t client_finalized; @@ -249,6 +262,8 @@ typedef struct opal_pmix_server_module_1_0_0_t { opal_pmix_server_tool_connection_fn_t tool_connected; opal_pmix_server_log_fn_t log; opal_pmix_server_listener_fn_t listener; + opal_pmix_server_alloc_fn_t allocate; + opal_pmix_server_job_control_fn_t job_control; } opal_pmix_server_module_t; diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index 392c3401e4..113ea02c33 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -32,6 +32,11 @@ BEGIN_C_DECLS * that key */ #define OPAL_PMIX_RANK_WILDCARD UINT32_MAX-1 +/* other special rank values will be used to define + * groups of ranks for use in collectives */ +#define OPAL_PMIX_RANK_LOCAL_NODE UINT32_MAX-2 // all ranks on local node + + /* define a set of "standard" attributes that can * be queried. Implementations (and users) are free to extend as * desired, so the get functions need to be capable @@ -55,12 +60,15 @@ BEGIN_C_DECLS #define OPAL_PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to // a local system-level PMIx server #define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first +#define OPAL_PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data +#define OPAL_PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server /* identification attributes */ #define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id #define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id + /* attributes for the rendezvous socket */ #define OPAL_PMIX_USOCK_DISABLE "pmix.usock.disable" // (bool) disable legacy usock support #define OPAL_PMIX_SOCKET_MODE "pmix.sockmode" // (uint32_t) POSIX mode_t (9 bits valid) @@ -76,6 +84,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_TCP_DISABLE_IPV4 "pmix.tcp.disipv4" // (bool) true to disable IPv4 family #define OPAL_PMIX_TCP_DISABLE_IPV6 "pmix.tcp.disipv6" // (bool) true to disable IPv6 family + /* general proc-level attributes */ #define OPAL_PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch #define OPAL_PMIX_CREDENTIAL "pmix.cred" // (char*) security credential assigned to proc @@ -89,6 +98,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc #define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories + /* information about relative ranks as assigned by the RM */ #define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier #define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job @@ -104,25 +114,26 @@ BEGIN_C_DECLS #define OPAL_PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job #define OPAL_PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job #define OPAL_PMIX_PROC_PID "pmix.ppid" // (pid_t) pid of specified proc - -/**** no PMIx equivalent ****/ -#define OPAL_PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs -#define OPAL_PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string -#define OPAL_PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location -#define OPAL_PMIX_AVAIL_PHYS_MEMORY "pmix.pmem" // (uint64_t) total available physical memory on this node - +#define OPAL_PMIX_SESSION_ID "pmix.session.id" // (uint32_t) session identifier #define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for the specified nspace #define OPAL_PMIX_ALLOCATED_NODELIST "pmix.alist" // (char*) comma-delimited list of all nodes in this allocation regardless of // whether or not they currently host procs. #define OPAL_PMIX_HOSTNAME "pmix.hname" // (char*) name of the host the specified proc is on #define OPAL_PMIX_NODEID "pmix.nodeid" // (uint32_t) node identifier #define OPAL_PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace +#define OPAL_PMIX_LOCAL_PROCS "pmix.lprocs" // (opal_list_t*) list of opal_namelist_t of procs on the specified node #define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace #define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and // thus cannot be prefixed with "pmix" +#define OPAL_PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs + + +/* Memory info */ +#define OPAL_PMIX_AVAIL_PHYS_MEMORY "pmix.pmem" // (uint64_t) total available physical memory on this node #define OPAL_PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon #define OPAL_PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes + /* size info */ #define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace #define OPAL_PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job @@ -133,11 +144,15 @@ BEGIN_C_DECLS #define OPAL_PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job #define OPAL_PMIX_NUM_NODES "pmix.num.nodes" // (uint32_t) #nodes in this nspace + /* topology info */ #define OPAL_PMIX_NET_TOPO "pmix.ntopo" // (char*) xml-representation of network topology #define OPAL_PMIX_LOCAL_TOPO "pmix.ltopo" // (char*) xml-representation of local node topology #define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for this job #define OPAL_PMIX_TOPOLOGY "pmix.topo" // (hwloc_topology_t) pointer to the PMIx client's internal topology object +#define OPAL_PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string +#define OPAL_PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location + /* request-related info */ #define OPAL_PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation @@ -156,16 +171,19 @@ BEGIN_C_DECLS #define OPAL_PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the // specified operation + /* attribute used by host server to pass data to the server convenience library - the * data will then be parsed and provided to the local clients */ #define OPAL_PMIX_PROC_DATA "pmix.pdata" // (pmix_value_array_t) starts with rank, then contains more data #define OPAL_PMIX_NODE_MAP "pmix.nmap" // (char*) regex of nodes containing procs for this job #define OPAL_PMIX_PROC_MAP "pmix.pmap" // (char*) regex describing procs on each node within this job + /* attributes used internally to communicate data from the server to the client */ #define OPAL_PMIX_PROC_BLOB "pmix.pblob" // (pmix_byte_object_t) packed blob of process data #define OPAL_PMIX_MAP_BLOB "pmix.mblob" // (pmix_byte_object_t) packed blob of process location + /* error handler registration and notification info keys */ #define OPAL_PMIX_EVENT_HDLR_NAME "pmix.evname" // (char*) string name identifying this handler #define OPAL_PMIX_EVENT_JOB_LEVEL "pmix.evjob" // (bool) register for job-specific events only @@ -187,7 +205,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_EVENT_ACTION_TIMEOUT "pmix.evtimeout" // (int) time in sec before RM will execute error response -/* attributes used to describe "spawm" attributes */ +/* attributes used to describe "spawn" attributes */ #define OPAL_PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use #define OPAL_PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs #define OPAL_PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs @@ -229,19 +247,89 @@ BEGIN_C_DECLS #define OPAL_PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only #define OPAL_PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // report average values #define OPAL_PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // report minimum and maximum value +#define OPAL_PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status + // is being requested +#define OPAL_PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation + // for the specified nspace /* log attributes */ -#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr -#define OPAL_PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout -#define OPAL_PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless -#define OPAL_PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr +#define OPAL_PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout +#define OPAL_PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless +#define OPAL_PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define OPAL_PMIX_LOG_EMAIL "pmix.log.email" // (pmix_data_array_t) log via email based on pmix_info_t containing directives +#define OPAL_PMIX_LOG_EMAIL_ADDR "pmix.log.emaddr" // (char*) comma-delimited list of email addresses that are to recv msg +#define OPAL_PMIX_LOG_EMAIL_SUBJECT "pmix.log.emsub" // (char*) subject line for email +#define OPAL_PMIX_LOG_EMAIL_MSG "pmix.log.emmsg" // (char*) msg to be included in email + /* debugger attributes */ -#define OPAL_PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start -#define OPAL_PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init -#define OPAL_PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification -#define OPAL_PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are -#define OPAL_PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release +#define OPAL_PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start +#define OPAL_PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init +#define OPAL_PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification +#define OPAL_PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are +#define OPAL_PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release + + +/* Resource Manager identification */ +#define OPAL_PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager +#define OPAL_PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string + + +/* attributes for setting envars */ +#define OPAL_PMIX_SET_ENVAR "pmix.set.envar" // (char*) string "key=value" value shall be put into the environment +#define OPAL_PMIX_UNSET_ENVAR "pmix.unset.envar" // (char*) unset envar specified in string + + +/* attributes relating to allocations */ +#define OPAL_PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request + // which can later be used to query status of the request +#define OPAL_PMIX_ALLOC_NUM_NODES "pmix.alloc.nnodes" // (uint64_t) number of nodes +#define OPAL_PMIX_ALLOC_NODE_LIST "pmix.alloc.nlist" // (char*) regex of specific nodes +#define OPAL_PMIX_ALLOC_NUM_CPUS "pmix.alloc.ncpus" // (uint64_t) number of cpus +#define OPAL_PMIX_ALLOC_NUM_CPU_LIST "pmix.alloc.ncpulist" // (char*) regex of #cpus for each node +#define OPAL_PMIX_ALLOC_CPU_LIST "pmix.alloc.cpulist" // (char*) regex of specific cpus indicating the cpus involved. +#define OPAL_PMIX_ALLOC_MEM_SIZE "pmix.alloc.msize" // (float) number of Mbytes +#define OPAL_PMIX_ALLOC_NETWORK "pmix.alloc.net" // (array) array of pmix_info_t describing network resources. If not + // given as part of an info struct that identifies the + // impacted nodes, then the description will be applied + // across all nodes in the requestor's allocation +#define OPAL_PMIX_ALLOC_NETWORK_ID "pmix.alloc.netid" // (char*) name of network +#define OPAL_PMIX_ALLOC_BANDWIDTH "pmix.alloc.bw" // (float) Mbits/sec +#define OPAL_PMIX_ALLOC_NETWORK_QOS "pmix.alloc.netqos" // (char*) quality of service level +#define OPAL_PMIX_ALLOC_TIME "pmix.alloc.time" // (uint32_t) time in seconds + + +/* job control attributes */ +#define OPAL_PMIX_JOB_CTRL_ID "pmix.jctrl.id" // (char*) provide a string identifier for this request +#define OPAL_PMIX_JOB_CTRL_PAUSE "pmix.jctrl.pause" // (bool) pause the specified processes +#define OPAL_PMIX_JOB_CTRL_RESUME "pmix.jctrl.resume" // (bool) "un-pause" the specified processes +#define OPAL_PMIX_JOB_CTRL_CANCEL "pmix.jctrl.cancel" // (char*) cancel the specified request + // (NULL => cancel all requests from this requestor) +#define OPAL_PMIX_JOB_CTRL_KILL "pmix.jctrl.kill" // (bool) forcibly terminate the specified processes and cleanup +#define OPAL_PMIX_JOB_CTRL_RESTART "pmix.jctrl.restart" // (char*) restart the specified processes using the given checkpoint ID +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT "pmix.jctrl.ckpt" // (char*) checkpoint the specified processes and assign the given ID to it +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT_EVENT "pmix.jctrl.ckptev" // (bool) use event notification to trigger process checkpoint +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT_SIGNAL "pmix.jctrl.ckptsig" // (int) use the given signal to trigger process checkpoint +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT_TIMEOUT "pmix.jctrl.ckptsig" // (int) time in seconds to wait for checkpoint to complete +#define OPAL_PMIX_JOB_CTRL_SIGNAL "pmix.jctrl.sig" // (int) send given signal to specified processes +#define OPAL_PMIX_JOB_CTRL_PROVISION "pmix.jctrl.pvn" // (char*) regex identifying nodes that are to be provisioned +#define OPAL_PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned +#define OPAL_PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted + +/* monitoring attributes */ +#define OPAL_PMIX_MONITOR_HEARTBEAT "pmix.monitor.mbeat" // (void) register to have the server monitor the requestor for heartbeats +#define OPAL_PMIX_SEND_HEARTBEAT "pmix.monitor.beat" // (void) send heartbeat to local server +#define OPAL_PMIX_MONITOR_HEARTBEAT_TIME "pmix.monitor.btime" // (uint32_t) time in seconds before declaring heartbeat missed +#define OPAL_PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before taking + // specified action +#define OPAL_PMIX_MONITOR_FILE "pmix.monitor.fmon" // (char*) register to monitor file for signs of life +#define OPAL_PMIX_MONITOR_FILE_SIZE "pmix.monitor.fsize" // (bool) monitor size of given file is growing to determine app is running +#define OPAL_PMIX_MONITOR_FILE_ACCESS "pmix.monitor.faccess" // (char*) monitor time since last access of given file to determine app is running +#define OPAL_PMIX_MONITOR_FILE_MODIFY "pmix.monitor.fmod" // (char*) monitor time since last modified of given file to determine app is running +#define OPAL_PMIX_MONITOR_FILE_CHECK_TIME "pmix.monitor.ftime" // (uint32_t) time in seconds between checking file +#define OPAL_PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before taking + // specified action /* define a scope for data "put" by PMI per the following: @@ -285,6 +373,16 @@ typedef enum { } opal_pmix_persistence_t; +/* define allocation request flags */ +typedef enum { + OPAL_PMIX_ALLOC_UNDEF = 0, + OPAL_PMIX_ALLOC_NEW, + OPAL_PMIX_ALLOC_EXTEND, + OPAL_PMIX_ALLOC_RELEASE, + OPAL_PMIX_ALLOC_REAQCUIRE +} opal_pmix_alloc_directive_t; + + /**** PMIX INFO STRUCT ****/ /* NOTE: the pmix_info_t is essentially equivalent to the opal_value_t diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index fc11e826af..3ac42f5b83 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -292,6 +292,12 @@ opal_err2str(int errnum, const char **errmsg) case OPAL_ERR_EVENT_REGISTRATION: retval = "Event registration"; break; + case OPAL_ERR_HEARTBEAT_ALERT: + retval = "Heartbeat not received"; + break; + case OPAL_ERR_FILE_ALERT: + retval = "File alert - proc may have stalled"; + break; default: retval = "UNRECOGNIZED"; } diff --git a/orte/mca/schizo/base/base.h b/orte/mca/schizo/base/base.h index ad5d9ffc63..8f5ab569ee 100644 --- a/orte/mca/schizo/base/base.h +++ b/orte/mca/schizo/base/base.h @@ -76,7 +76,7 @@ ORTE_DECLSPEC int orte_schizo_base_setup_child(orte_job_t *jobdat, orte_app_context_t *app, char ***env); ORTE_DECLSPEC orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void); -ORTE_DECLSPEC long orte_schizo_base_get_remaining_time(void); +ORTE_DECLSPEC int orte_schizo_base_get_remaining_time(uint32_t *timeleft); ORTE_DECLSPEC void orte_schizo_base_finalize(void); END_C_DECLS diff --git a/orte/mca/schizo/base/schizo_base_stubs.c b/orte/mca/schizo/base/schizo_base_stubs.c index 173ca1c2bf..8b7068434e 100644 --- a/orte/mca/schizo/base/schizo_base_stubs.c +++ b/orte/mca/schizo/base/schizo_base_stubs.c @@ -162,20 +162,20 @@ orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void) return ORTE_SCHIZO_UNDETERMINED; } -long orte_schizo_base_get_remaining_time(void) +int orte_schizo_base_get_remaining_time(uint32_t *timeleft) { - long rc; + int rc; orte_schizo_base_active_module_t *mod; OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { if (NULL != mod->module->get_remaining_time) { - rc = mod->module->get_remaining_time(); + rc = mod->module->get_remaining_time(timeleft); if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { return rc; } } } - return -1; + return ORTE_ERR_NOT_SUPPORTED; } void orte_schizo_base_finalize(void) diff --git a/orte/mca/schizo/schizo.h b/orte/mca/schizo/schizo.h index 77b1782fc2..56f47e4e62 100644 --- a/orte/mca/schizo/schizo.h +++ b/orte/mca/schizo/schizo.h @@ -118,7 +118,7 @@ typedef void (*orte_schizo_base_module_finalize_fn_t)(void); * and decides it cannot provide the info in the current situation, * then it can return ORTE_ERR_TAKE_NEXT_OPTION to indicate that * another module should be tried */ -typedef long (*orte_schizo_base_module_get_rem_time_fn_t)(void); +typedef int (*orte_schizo_base_module_get_rem_time_fn_t)(uint32_t *timeleft); /* * schizo module version 1.3.0 diff --git a/orte/mca/schizo/slurm/schizo_slurm.c b/orte/mca/schizo/slurm/schizo_slurm.c index fbfd4a1f79..e88a8d9970 100644 --- a/orte/mca/schizo/slurm/schizo_slurm.c +++ b/orte/mca/schizo/slurm/schizo_slurm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * @@ -29,10 +29,12 @@ #include "schizo_slurm.h" static orte_schizo_launch_environ_t check_launch_environment(void); +static int get_remaining_time(uint32_t *timeleft); static void finalize(void); orte_schizo_base_module_t orte_schizo_slurm_module = { .check_launch_environment = check_launch_environment, + .get_remaining_time = get_remaining_time, .finalize = finalize }; @@ -123,6 +125,58 @@ static orte_schizo_launch_environ_t check_launch_environment(void) return myenv; } +static int get_remaining_time(uint32_t *timeleft) +{ + char output[256], *cmd, *jobid, **res; + FILE *fp; + uint32_t tleft; + size_t cnt; + + /* set the default */ + *timeleft = UINT32_MAX; + + if (NULL == (jobid = getenv("SLURM_JOBID"))) { + return ORTE_ERR_TAKE_NEXT_OPTION; + } + if (0 > asprintf(&cmd, "squeue -h -j %s -o %%L", jobid)) { + return ORTE_ERR_OUT_OF_RESOURCE; + } + fp = popen(cmd, "r"); + if (NULL == fp) { + free(cmd); + return ORTE_ERR_FILE_OPEN_FAILURE; + } + if (NULL == fgets(output, 256, fp)) { + free(cmd); + return ORTE_ERR_FILE_READ_FAILURE; + } + free(cmd); + /* the output is returned in a colon-delimited set of fields */ + res = opal_argv_split(output, ':'); + cnt = opal_argv_count(res); + tleft = strtol(res[cnt-1], NULL, 10); // has to be at least one field + /* the next field would be minutes */ + if (1 < cnt) { + tleft += 60 * strtol(res[cnt-2], NULL, 10); + } + /* next field would be hours */ + if (2 < cnt) { + tleft += 3600 * strtol(res[cnt-3], NULL, 10); + } + /* next field is days */ + if (3 < cnt) { + tleft += 24*3600 * strtol(res[cnt-4], NULL, 10); + } + /* if there are more fields than that, then it is infinite */ + if (4 < cnt) { + tleft = UINT32_MAX; + } + opal_argv_free(res); + + *timeleft = tleft; + return ORTE_SUCCESS; +} + static void finalize(void) { int i; diff --git a/orte/mca/schizo/slurm/schizo_slurm_component.c b/orte/mca/schizo/slurm/schizo_slurm_component.c index 32d4bfbead..180bf9a3c5 100644 --- a/orte/mca/schizo/slurm/schizo_slurm_component.c +++ b/orte/mca/schizo/slurm/schizo_slurm_component.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,8 +38,8 @@ orte_schizo_base_component_t mca_schizo_slurm_component = { static int component_query(mca_base_module_t **module, int *priority) { - /* disqualify ourselves if we are not an app or under slurm */ - if (!ORTE_PROC_IS_APP) { + /* disqualify ourselves if we are not under slurm */ + if (NULL == getenv("SLURM_JOBID")) { *priority = 0; *module = NULL; return OPAL_ERROR; @@ -49,4 +49,3 @@ static int component_query(mca_base_module_t **module, int *priority) *priority = 50; return ORTE_SUCCESS; } - diff --git a/orte/mca/sensor/base/base.h b/orte/mca/sensor/base/base.h deleted file mode 100644 index c01cf9ed4d..0000000000 --- a/orte/mca/sensor/base/base.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_SENSOR_BASE_H -#define MCA_SENSOR_BASE_H - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_sensor_base_framework; -/* select a component */ -ORTE_DECLSPEC int orte_sensor_base_select(void); - - -END_C_DECLS -#endif diff --git a/orte/mca/sensor/base/sensor_base_fns.c b/orte/mca/sensor/base/sensor_base_fns.c deleted file mode 100644 index 81f9bbf69a..0000000000 --- a/orte/mca/sensor/base/sensor_base_fns.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/dss/dss.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" - -static bool mods_active = false; - -void orte_sensor_base_start(orte_jobid_t job) -{ - orte_sensor_active_module_t *i_module; - int i; - - if (0 < orte_sensor_base.rate.tv_sec) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: starting sensors", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* call the start function of all modules in priority order */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - mods_active = true; - if (NULL != i_module->module->start) { - i_module->module->start(job); - } - } - - if (mods_active && !orte_sensor_base.active) { - /* setup a buffer to collect samples */ - orte_sensor_base.samples = OBJ_NEW(opal_buffer_t); - /* startup a timer to wake us up periodically - * for a data sample - */ - orte_sensor_base.active = true; - opal_event_evtimer_set(orte_event_base, &orte_sensor_base.sample_ev, - orte_sensor_base_sample, NULL); - opal_event_evtimer_add(&orte_sensor_base.sample_ev, &orte_sensor_base.rate); - } - } - return; -} - -void orte_sensor_base_stop(orte_jobid_t job) -{ - orte_sensor_active_module_t *i_module; - int i; - - if (!mods_active) { - return; - } - - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: stopping sensors", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - if (orte_sensor_base.active) { - opal_event_del(&orte_sensor_base.sample_ev); - orte_sensor_base.active = false; - } - - /* call the stop function of all modules in priority order */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (NULL != i_module->module->stop) { - i_module->module->stop(job); - } - } - - return; -} - -void orte_sensor_base_sample(int fd, short args, void *cbdata) -{ - orte_sensor_active_module_t *i_module; - int i; - - if (!mods_active) { - return; - } - - /* see if we were ordered to stop */ - if (!orte_sensor_base.active) { - return; - } - - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: sampling sensors", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* call the sample function of all modules in priority order from - * highest to lowest - the heartbeat should always be the lowest - * priority, so it will send any collected data - */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (NULL != i_module->module->sample) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: sampling component %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - i_module->component->base_version.mca_component_name); - i_module->module->sample(); - } - } - - /* restart the timer */ - opal_event_evtimer_add(&orte_sensor_base.sample_ev, &orte_sensor_base.rate); - - return; -} - -void orte_sensor_base_log(char *comp, opal_buffer_t *data) -{ - int i; - orte_sensor_active_module_t *i_module; - - if (NULL == comp) { - /* nothing we can do */ - return; - } - - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: logging sensor %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp); - - /* find the specified module */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (0 == strcmp(comp, i_module->component->base_version.mca_component_name)) { - if (NULL != i_module->module->log) { - i_module->module->log(data); - } - return; - } - } -} diff --git a/orte/mca/sensor/base/sensor_base_frame.c b/orte/mca/sensor/base/sensor_base_frame.c deleted file mode 100644 index 73c6cdf79c..0000000000 --- a/orte/mca/sensor/base/sensor_base_frame.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/class/opal_pointer_array.h" - -#ifdef HAVE_STRING_H -#include <string.h> -#endif - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/sensor/base/static-components.h" - -/* - * Global variables - */ -orte_sensor_base_API_module_t orte_sensor = { - orte_sensor_base_start, - orte_sensor_base_stop -}; -orte_sensor_base_t orte_sensor_base = {{{0}}}; - -/* - * Local variables - */ -static int orte_sensor_base_sample_rate = 0; - -static int orte_sensor_base_register(mca_base_register_flag_t flags) -{ - int var_id; - - orte_sensor_base_sample_rate = 0; - var_id = mca_base_var_register("orte", "sensor", "base", "sample_rate", - "Sample rate in seconds", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_sensor_base_sample_rate); - mca_base_var_register_synonym(var_id, "orte", "sensor", NULL, "sample_rate", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - - /* see if we want samples logged */ - orte_sensor_base.log_samples = false; - var_id = mca_base_var_register("orte", "sensor", "base", "log_samples", - "Log samples to database", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_sensor_base.log_samples); - mca_base_var_register_synonym(var_id, "orte", "sensor", NULL, "log_samples", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - - return ORTE_SUCCESS; -} - -static int orte_sensor_base_close(void) -{ - orte_sensor_active_module_t *i_module; - int i; - - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (NULL != i_module->module->finalize) { - i_module->module->finalize(); - } - } - OBJ_DESTRUCT(&orte_sensor_base.modules); - - /* Close all remaining available components */ - return mca_base_framework_components_close(&orte_sensor_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_sensor_base_open(mca_base_open_flag_t flags) -{ - /* initialize globals */ - orte_sensor_base.active = false; - - /* construct the array of modules */ - OBJ_CONSTRUCT(&orte_sensor_base.modules, opal_pointer_array_t); - opal_pointer_array_init(&orte_sensor_base.modules, 3, INT_MAX, 1); - - /* get the sample rate */ - orte_sensor_base.rate.tv_sec = orte_sensor_base_sample_rate; - orte_sensor_base.rate.tv_usec = 0; - - /* Open up all available components */ - return mca_base_framework_components_open(&orte_sensor_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, sensor, "ORTE Monitoring Sensors", - orte_sensor_base_register, - orte_sensor_base_open, orte_sensor_base_close, - mca_sensor_base_static_components, 0); - -static void cons(orte_sensor_active_module_t *t) -{ - t->sampling = true; -} -OBJ_CLASS_INSTANCE(orte_sensor_active_module_t, - opal_object_t, - cons, NULL); diff --git a/orte/mca/sensor/base/sensor_base_select.c b/orte/mca/sensor/base/sensor_base_select.c deleted file mode 100644 index 353414b7ee..0000000000 --- a/orte/mca/sensor/base/sensor_base_select.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#ifdef HAVE_STRING_H -#include <string.h> -#endif - -#include "orte/constants.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" - - -static bool selected = false; - -/** - * Function for weeding out sensor components that don't want to run. - * - * Call the init function on all available components to find out if - * they want to run. Select all components that don't fail. Failing - * components will be closed and unloaded. The selected modules will - * be returned to the caller in a opal_list_t. - */ -int orte_sensor_base_select(void) -{ - mca_base_component_list_item_t *cli = NULL; - orte_sensor_base_component_t *component = NULL; - mca_base_module_t *module = NULL; - orte_sensor_active_module_t *i_module; - int priority = 0, i, j, low_i; - opal_pointer_array_t tmp_array; - bool none_found; - orte_sensor_active_module_t *tmp_module = NULL, *tmp_module_sw = NULL; - bool duplicate; - - if (selected) { - return ORTE_SUCCESS; - } - selected = true; - - OBJ_CONSTRUCT(&tmp_array, opal_pointer_array_t); - - opal_output_verbose(10, orte_sensor_base_framework.framework_output, - "sensor:base:select: Auto-selecting components"); - - /* - * Traverse the list of available components. - * For each call their 'query' functions to determine relative priority. - */ - none_found = true; - OPAL_LIST_FOREACH(cli, &orte_sensor_base_framework.framework_components, mca_base_component_list_item_t) { - component = (orte_sensor_base_component_t *) cli->cli_component; - - /* - * If there is a query function then use it. - */ - if (NULL == component->base_version.mca_query_component) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Skipping component [%s]. It does not implement a query function", - component->base_version.mca_component_name ); - continue; - } - - /* - * Query this component for the module and priority - */ - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Querying component [%s]", - component->base_version.mca_component_name); - - component->base_version.mca_query_component(&module, &priority); - - /* - * If no module was returned or negative priority, then skip component - */ - if (NULL == module || priority < 0) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Skipping component [%s]. Query failed to return a module", - component->base_version.mca_component_name ); - continue; - } - - /* check to see if we already have someone who senses the - * same things - if so, take the higher priority one - */ - duplicate = false; - for (i=0; i < tmp_array.size; i++) { - tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i); - if (NULL == tmp_module) { - continue; - } - if (0 == strcmp(component->data_measured, tmp_module->component->data_measured)) { - if (tmp_module->priority < priority) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Replacing component %s with %s - both measure %s", - tmp_module->component->base_version.mca_component_name, - component->base_version.mca_component_name, - component->data_measured); - OBJ_RELEASE(tmp_module); - opal_pointer_array_set_item(&tmp_array, i, NULL); - break; - } else { - duplicate = true; - } - } - } - if (duplicate) { - /* ignore this component */ - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Ignoring component %s - duplicate with higher priority measures %s", - component->base_version.mca_component_name, - component->data_measured); - continue; - } - - /* - * Append them to the temporary list, we will sort later - */ - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Query of component [%s] set priority to %d", - component->base_version.mca_component_name, priority); - tmp_module = OBJ_NEW(orte_sensor_active_module_t); - tmp_module->component = component; - tmp_module->module = (orte_sensor_base_module_t*)module; - tmp_module->priority = priority; - - opal_pointer_array_add(&tmp_array, (void*)tmp_module); - none_found = false; - } - - if (none_found) { - /* okay for no modules to be found */ - return ORTE_SUCCESS; - } - - /* - * Sort the list by decending priority - */ - priority = 0; - for(j = 0; j < tmp_array.size; ++j) { - tmp_module_sw = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, j); - if( NULL == tmp_module_sw ) { - continue; - } - - low_i = -1; - priority = tmp_module_sw->priority; - - for(i = 0; i < tmp_array.size; ++i) { - tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i); - if( NULL == tmp_module ) { - continue; - } - if( tmp_module->priority > priority ) { - low_i = i; - priority = tmp_module->priority; - } - } - - if( low_i >= 0 ) { - tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, low_i); - opal_pointer_array_set_item(&tmp_array, low_i, NULL); - j--; /* Try this entry again, if it is not the lowest */ - } else { - tmp_module = tmp_module_sw; - opal_pointer_array_set_item(&tmp_array, j, NULL); - } - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Add module with priority [%s] %d", - tmp_module->component->base_version.mca_component_name, tmp_module->priority); - opal_pointer_array_add(&orte_sensor_base.modules, tmp_module); - } - OBJ_DESTRUCT(&tmp_array); - - /* - * Initialize each of the modules in priority order from - * highest to lowest - */ - for(i = 0; i < orte_sensor_base.modules.size; ++i) { - i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i); - if( NULL == i_module ) { - continue; - } - if( NULL != i_module->module->init ) { - if (ORTE_SUCCESS != i_module->module->init()) { - /* can't sample - however, if we are the HNP, - * then we need this module - * anyway so we can log incoming data - */ - if (ORTE_PROC_IS_HNP) { - i_module->sampling = false; - } else { - opal_pointer_array_set_item(&orte_sensor_base.modules, i, NULL); - OBJ_RELEASE(i_module); - } - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/base/sensor_private.h b/orte/mca/sensor/base/sensor_private.h deleted file mode 100644 index 3178b05bf5..0000000000 --- a/orte/mca/sensor/base/sensor_private.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_SENSOR_PRIVATE_H -#define MCA_SENSOR_PRIVATE_H - -/* - * includes - */ -#include "orte_config.h" - -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif /* HAVE_UNISTD_H */ - -#include "opal/class/opal_pointer_array.h" -#include "opal/mca/event/event.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/sensor/sensor.h" - - -/* - * Global functions for MCA overall collective open and close - */ -BEGIN_C_DECLS - -/* define a struct to hold framework-global values */ -typedef struct { - opal_pointer_array_t modules; - bool log_samples; - bool active; - struct timeval rate; - opal_event_t sample_ev; - opal_buffer_t *samples; -} orte_sensor_base_t; - -typedef struct { - opal_object_t super; - orte_sensor_base_component_t *component; - orte_sensor_base_module_t *module; - int priority; - bool sampling; -} orte_sensor_active_module_t; -OBJ_CLASS_DECLARATION(orte_sensor_active_module_t); - - -ORTE_DECLSPEC extern orte_sensor_base_t orte_sensor_base; -ORTE_DECLSPEC void orte_sensor_base_start(orte_jobid_t job); -ORTE_DECLSPEC void orte_sensor_base_stop(orte_jobid_t job); -ORTE_DECLSPEC void orte_sensor_base_sample(int fd, short args, void *cbdata); -ORTE_DECLSPEC void orte_sensor_base_log(char *comp, opal_buffer_t *data); - -END_C_DECLS -#endif diff --git a/orte/mca/sensor/file/configure.m4 b/orte/mca/sensor/file/configure.m4 deleted file mode 100644 index 67f19d12e5..0000000000 --- a/orte/mca/sensor/file/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_file_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_file_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/file/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/file/sensor_file.c b/orte/mca/sensor/file/sensor_file.c deleted file mode 100644 index 958a6a97a0..0000000000 --- a/orte/mca/sensor/file/sensor_file.c +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include <stdio.h> -#include <stddef.h> -#include <ctype.h> -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -#ifdef HAVE_NETDB_H -#include <netdb.h> -#endif -#ifdef HAVE_SYS_PARAM_H -#include <sys/param.h> -#endif -#include <fcntl.h> -#include <errno.h> -#include <signal.h> -#ifdef HAVE_TIME_H -#include <time.h> -#endif -#include <sys/stat.h> -#include <sys/types.h> - -#include "opal_stdint.h" -#include "opal/util/output.h" - -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/state/state.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" -#include "sensor_file.h" - -/* declare the API functions */ -static int init(void); -static void finalize(void); -static void start(orte_jobid_t job); -static void stop(orte_jobid_t job); -static void file_sample(void); -static void file_log(opal_buffer_t *sample); - -/* instantiate the module */ -orte_sensor_base_module_t orte_sensor_file_module = { - init, - finalize, - start, - stop, - file_sample, - file_log -}; - -/* define a tracking object */ -typedef struct { - opal_list_item_t super; - orte_jobid_t jobid; - orte_vpid_t vpid; - char *file; - int tick; - bool check_size; - bool check_access; - bool check_mod; - int32_t file_size; - time_t last_access; - time_t last_mod; - int limit; -} file_tracker_t; -static void ft_constructor(file_tracker_t *ft) -{ - ft->file = NULL; - ft->tick = 0; - ft->file_size = 0; - ft->last_access = 0; - ft->last_mod = 0; - ft->limit = 0; -} -static void ft_destructor(file_tracker_t *ft) -{ - if (NULL != ft->file) { - free(ft->file); - } -} -OBJ_CLASS_INSTANCE(file_tracker_t, - opal_list_item_t, - ft_constructor, ft_destructor); - -/* local globals */ -static opal_list_t jobs; - -static int init(void) -{ - OBJ_CONSTRUCT(&jobs, opal_list_t); - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first(&jobs))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&jobs); - - return; -} - -static bool find_value(orte_app_context_t *app, - char *pattern, char **value) -{ - int i; - char *ptr; - - for (i=0; NULL != app->env[i]; i++) { - if (0 == strncmp(app->env[i], pattern, strlen(pattern))) { - ptr = strchr(app->env[i], '='); - ptr++; - if (NULL != value) { - *value = strdup(ptr); - } - return true; - } - } - return false; -} - -/* - * Start monitoring of local processes - */ -static void start(orte_jobid_t jobid) -{ - orte_job_t *jobdat; - orte_app_context_t *app, *aptr; - int i; - char *filename; - file_tracker_t *ft; - char *ptr; - - /* cannot monitor my own job */ - if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) { - return; - } - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s starting file monitoring for job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid))); - - /* get the local jobdat for this job */ - if (NULL == (jobdat = orte_get_job_data_object(jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* must be at least one app_context, so use the first one found */ - app = NULL; - for (i=0; i < jobdat->apps->size; i++) { - if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, i))) { - app = aptr; - break; - } - } - if (NULL == app) { - /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* search the environ to get the filename */ - if (!find_value(app, "OMPI_MCA_sensor_file_filename", &filename)) { - /* was a default file given */ - if (NULL == mca_sensor_file_component.file) { - /* can't do anything without a file */ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sensor:file no file for job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid))); - return; - } - filename = mca_sensor_file_component.file; - } - - /* create the tracking object */ - ft = OBJ_NEW(file_tracker_t); - ft->jobid = jobid; - ft->file = strdup(filename); - - /* search the environ to see what we are checking */ - if (!find_value(app, "OMPI_MCA_sensor_file_check_size", &ptr)) { - /* was a default value given */ - if (0 < mca_sensor_file_component.check_size) { - ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size); - } - } else { - ft->check_size = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10)); - free(ptr); - } - - if (!find_value(app, "OMPI_MCA_sensor_file_check_access", &ptr)) { - /* was a default value given */ - if (0 < mca_sensor_file_component.check_access) { - ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access); - } - } else { - ft->check_access = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10)); - free(ptr); - } - - if (!find_value(app, "OMPI_MCA_sensor_file_check_mod", &ptr)) { - /* was a default value given */ - if (0 < mca_sensor_file_component.check_mod) { - ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod); - } - } else { - ft->check_mod = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10)); - free(ptr); - } - - if (!find_value(app, "OMPI_MCA_sensor_file_limit", &ptr)) { - ft->limit = mca_sensor_file_component.limit; - } else { - ft->limit = strtol(ptr, NULL, 10); - free(ptr); - } - opal_list_append(&jobs, &ft->super); - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s file %s monitored for %s%s%s with limit %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ft->file, ft->check_size ? "SIZE:" : " ", - ft->check_access ? "ACCESS TIME:" : " ", - ft->check_mod ? "MOD TIME" : " ", ft->limit)); - return; -} - - -static void stop(orte_jobid_t jobid) -{ - opal_list_item_t *item; - file_tracker_t *ft; - - /* cannot monitor my own job */ - if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) { - return; - } - - for (item = opal_list_get_first(&jobs); - item != opal_list_get_end(&jobs); - item = opal_list_get_next(item)) { - ft = (file_tracker_t*)item; - if (jobid == ft->jobid || ORTE_JOBID_WILDCARD == jobid) { - opal_list_remove_item(&jobs, item); - OBJ_RELEASE(item); - } - } - return; -} - -static void file_sample(void) -{ - struct stat buf; - opal_list_item_t *item; - file_tracker_t *ft; - orte_job_t *jdata; - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sampling files", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - for (item = opal_list_get_first(&jobs); - item != opal_list_get_end(&jobs); - item = opal_list_get_next(item)) { - ft = (file_tracker_t*)item; - - /* stat the file and get its size */ - if (0 > stat(ft->file, &buf)) { - /* cannot stat file */ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ft->file)); - continue; - } - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s size %lu access %s\tmod %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (unsigned long)buf.st_size, ctime(&buf.st_atime), ctime(&buf.st_mtime))); - - if (ft->check_size) { - if (buf.st_size == ft->file_size) { - ft->tick++; - goto CHECK; - } else { - ft->tick = 0; - ft->file_size = buf.st_size; - } - } - if (ft->check_access) { - if (buf.st_atime == ft->last_access) { - ft->tick++; - goto CHECK; - } else { - ft->tick = 0; - ft->last_access = buf.st_atime; - } - } - if (ft->check_mod) { - if (buf.st_mtime == ft->last_mod) { - ft->tick++; - goto CHECK; - } else { - ft->tick = 0; - ft->last_mod = buf.st_mtime; - } - } - - CHECK: - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sampled file %s tick %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ft->file, ft->tick)); - - if (ft->tick == ft->limit) { - orte_show_help("help-orte-sensor-file.txt", "file-stalled", true, - ft->file, ft->file_size, ctime(&ft->last_access), ctime(&ft->last_mod)); - jdata = orte_get_job_data_object(ft->jobid); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED); - } - } -} - -static void file_log(opal_buffer_t *sample) -{ -} diff --git a/orte/mca/sensor/file/sensor_file.h b/orte/mca/sensor/file/sensor_file.h deleted file mode 100644 index d923ee6aa6..0000000000 --- a/orte/mca/sensor/file/sensor_file.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * File movement sensor - */ -#ifndef ORTE_SENSOR_FILE_H -#define ORTE_SENSOR_FILE_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -struct orte_sensor_file_component_t { - orte_sensor_base_component_t super; - int sample_rate; - char *file; - bool check_size; - bool check_access; - bool check_mod; - int limit; -}; -typedef struct orte_sensor_file_component_t orte_sensor_file_component_t; - -ORTE_MODULE_DECLSPEC extern orte_sensor_file_component_t mca_sensor_file_component; -extern orte_sensor_base_module_t orte_sensor_file_module; - - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/file/sensor_file_component.c b/orte/mca/sensor/file/sensor_file_component.c deleted file mode 100644 index e3b930a59e..0000000000 --- a/orte/mca/sensor/file/sensor_file_component.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_file.h" - -/* - * Local functions - */ -static int orte_sensor_file_register (void); -static int orte_sensor_file_open(void); -static int orte_sensor_file_close(void); -static int orte_sensor_file_query(mca_base_module_t **module, int *priority); - -orte_sensor_file_component_t mca_sensor_file_component = { - { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "file", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_file_open, /* component open */ - orte_sensor_file_close, /* component close */ - orte_sensor_file_query, /* component query */ - orte_sensor_file_register - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - "filemods" // data being sensed - } -}; - - -/** - * component register/open/close/init function - */ -static int orte_sensor_file_register (void) -{ - mca_base_component_t *c = &mca_sensor_file_component.super.base_version; - - /* lookup parameters */ - mca_sensor_file_component.file = NULL; - (void) mca_base_component_var_register (c, "filename", "File to be monitored", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.file); - - mca_sensor_file_component.check_size = false; - (void) mca_base_component_var_register (c, "check_size", "Check the file size", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.check_size); - - mca_sensor_file_component.check_access = false; - (void) mca_base_component_var_register (c, "check_access", "Check access time", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.check_access); - - mca_sensor_file_component.check_mod = false; - (void) mca_base_component_var_register (c, "check_mod", "Check modification time", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.check_mod); - - mca_sensor_file_component.limit = 3; - (void) mca_base_component_var_register (c, "limit", - "Number of times the sensor can detect no motion before declaring error (default=3)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.limit); - return ORTE_SUCCESS; -} - -static int orte_sensor_file_open(void) -{ - return ORTE_SUCCESS; -} - - -static int orte_sensor_file_query(mca_base_module_t **module, int *priority) -{ - *priority = 20; /* higher than heartbeat */ - *module = (mca_base_module_t *)&orte_sensor_file_module; - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_file_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/ft_tester/Makefile.am b/orte/mca/sensor/ft_tester/Makefile.am deleted file mode 100644 index 83cf127770..0000000000 --- a/orte/mca/sensor/ft_tester/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - sensor_ft_tester.c \ - sensor_ft_tester.h \ - sensor_ft_tester_component.c - -# Make the output library in this directory, and name it either -# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la -# (for static builds). - -if MCA_BUILD_orte_sensor_ft_tester_DSO -component_noinst = -component_install = mca_sensor_ft_tester.la -else -component_noinst = libmca_sensor_ft_tester.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_ft_tester_la_SOURCES = $(sources) -mca_sensor_ft_tester_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_ft_tester_la_SOURCES =$(sources) -libmca_sensor_ft_tester_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/ft_tester/configure.m4 b/orte/mca/sensor/ft_tester/configure.m4 deleted file mode 100644 index a88d34280c..0000000000 --- a/orte/mca/sensor/ft_tester/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_ft_tester_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_ft_tester_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/ft_tester/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/ft_tester/sensor_ft_tester.h b/orte/mca/sensor/ft_tester/sensor_ft_tester.h deleted file mode 100644 index 241f04d51f..0000000000 --- a/orte/mca/sensor/ft_tester/sensor_ft_tester.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Process Resource Utilization sensor - */ -#ifndef ORTE_SENSOR_FT_TESTER_H -#define ORTE_SENSOR_FT_TESTER_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" -#include "opal/util/alfg.h" - -BEGIN_C_DECLS - -struct orte_sensor_ft_tester_component_t { - orte_sensor_base_component_t super; - float fail_prob; - float daemon_fail_prob; - bool multi_fail; -}; -typedef struct orte_sensor_ft_tester_component_t orte_sensor_ft_tester_component_t; - -ORTE_MODULE_DECLSPEC extern orte_sensor_ft_tester_component_t mca_sensor_ft_tester_component; -extern orte_sensor_base_module_t orte_sensor_ft_tester_module; - -extern opal_rng_buff_t orte_sensor_ft_rng_buff; - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/ft_tester/sensor_ft_tester_component.c b/orte/mca/sensor/ft_tester/sensor_ft_tester_component.c deleted file mode 100644 index 5f57bdf905..0000000000 --- a/orte/mca/sensor/ft_tester/sensor_ft_tester_component.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_ft_tester.h" - -/* - * Local functions - */ -static int orte_sensor_ft_tester_register (void); -static int orte_sensor_ft_tester_open(void); -static int orte_sensor_ft_tester_close(void); -static int orte_sensor_ft_tester_query(mca_base_module_t **module, int *priority); - -orte_sensor_ft_tester_component_t mca_sensor_ft_tester_component = { - { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "ft_tester", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_ft_tester_open, /* component open */ - orte_sensor_ft_tester_close, /* component close */ - orte_sensor_ft_tester_query, /* component query */ - orte_sensor_ft_tester_register - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - NULL - } -}; - -static char *daemon_fail_prob = NULL; -static char *fail_prob = NULL; -opal_rng_buff_t orte_sensor_ft_rng_buff; - -/** - * component register/open/close/init function - */ -static int orte_sensor_ft_tester_register (void) -{ - mca_base_component_t *c = &mca_sensor_ft_tester_component.super.base_version; - - fail_prob = NULL; - (void) mca_base_component_var_register (c, "fail_prob", "Probability of killing a single executable", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &fail_prob); - - mca_sensor_ft_tester_component.multi_fail = false; - (void) mca_base_component_var_register (c, "multi_allowed", "Allow multiple executables to be killed at one time", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_ft_tester_component.multi_fail); - - daemon_fail_prob = NULL; - (void) mca_base_component_var_register (c, "daemon_fail_prob", "Probability of killing a daemon", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &daemon_fail_prob); - - return ORTE_SUCCESS; -} - -static int orte_sensor_ft_tester_open(void) -{ - /* lookup parameters */ - if (NULL != fail_prob) { - mca_sensor_ft_tester_component.fail_prob = strtof(fail_prob, NULL); - if (1.0 < mca_sensor_ft_tester_component.fail_prob) { - /* given in percent */ - mca_sensor_ft_tester_component.fail_prob /= 100.0; - } - } else { - mca_sensor_ft_tester_component.fail_prob = 0.0; - } - - if (NULL != daemon_fail_prob) { - mca_sensor_ft_tester_component.daemon_fail_prob = strtof(daemon_fail_prob, NULL); - if (1.0 < mca_sensor_ft_tester_component.daemon_fail_prob) { - /* given in percent */ - mca_sensor_ft_tester_component.daemon_fail_prob /= 100.0; - } - } else { - mca_sensor_ft_tester_component.daemon_fail_prob = 0.0; - } - - return ORTE_SUCCESS; -} - - -static int orte_sensor_ft_tester_query(mca_base_module_t **module, int *priority) -{ - if (0.0 < mca_sensor_ft_tester_component.fail_prob || - 0.0 < mca_sensor_ft_tester_component.daemon_fail_prob) { - *priority = 1; /* at the bottom */ - *module = (mca_base_module_t *)&orte_sensor_ft_tester_module; - /* seed the RNG --- Not sure if we should assume all procs use - * the same seed? - */ - opal_srand(&orte_sensor_ft_rng_buff, (uint32_t) getpid()); - return ORTE_SUCCESS; - } - *priority = 0; - *module = NULL; - return ORTE_ERROR; - -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_ft_tester_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/heartbeat/Makefile.am b/orte/mca/sensor/heartbeat/Makefile.am deleted file mode 100644 index c6246e666d..0000000000 --- a/orte/mca/sensor/heartbeat/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ompidata_DATA = help-orte-sensor-heartbeat.txt - -sources = \ - sensor_heartbeat.c \ - sensor_heartbeat.h \ - sensor_heartbeat_component.c - -# Make the output library in this directory, and name it either -# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la -# (for static builds). - -if MCA_BUILD_orte_sensor_heartbeat_DSO -component_noinst = -component_install = mca_sensor_heartbeat.la -else -component_noinst = libmca_sensor_heartbeat.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_heartbeat_la_SOURCES = $(sources) -mca_sensor_heartbeat_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_heartbeat_la_SOURCES =$(sources) -libmca_sensor_heartbeat_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/heartbeat/configure.m4 b/orte/mca/sensor/heartbeat/configure.m4 deleted file mode 100644 index ce8daf427a..0000000000 --- a/orte/mca/sensor/heartbeat/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_heartbeat_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_heartbeat_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/heartbeat/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/heartbeat/sensor_heartbeat.c b/orte/mca/sensor/heartbeat/sensor_heartbeat.c deleted file mode 100644 index f5ceb60d5c..0000000000 --- a/orte/mca/sensor/heartbeat/sensor_heartbeat.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights - * reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include <errno.h> -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include <string.h> -#endif /* HAVE_STRING_H */ -#include <stdio.h> - -#include "opal_stdint.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/event/event.h" - -#include "orte/util/show_help.h" -#include "orte/util/proc_info.h" -#include "orte/util/name_fns.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" -#include "orte/runtime/orte_wait.h" -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" -#include "sensor_heartbeat.h" - -/* declare the API functions */ -static int init(void); -static void finalize(void); -static void start(orte_jobid_t job); -static void sample(void); - -/* instantiate the module */ -orte_sensor_base_module_t orte_sensor_heartbeat_module = { - init, - finalize, - start, - NULL, - sample, - NULL -}; - -/* declare the local functions */ -static void check_heartbeat(int fd, short event, void *arg); -static void recv_beats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata); - -/* local globals */ -static orte_job_t *daemons=NULL; -static opal_event_t check_ev; -static bool check_active = false; -static struct timeval check_time; - -static int init(void) -{ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s initializing heartbeat recvs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* setup to receive heartbeats */ - if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_AGGREGATOR) { - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_HEARTBEAT, - ORTE_RML_PERSISTENT, - recv_beats, NULL); - } - - if (ORTE_PROC_IS_HNP) { - daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - } - - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_HEARTBEAT); - if (check_active) { - opal_event_del(&check_ev); - check_active = false; - } - return; -} - -static void start(orte_jobid_t job) -{ - if (!check_active && NULL != daemons) { - /* setup the check event */ - check_time.tv_sec = 3 * orte_sensor_base.rate.tv_sec; - check_time.tv_usec = 0; - opal_event_evtimer_set(orte_event_base, &check_ev, check_heartbeat, &check_ev); - opal_event_evtimer_add(&check_ev, &check_time); - check_active = true; - } -} - -static void sample(void) -{ - opal_buffer_t *buf; - int rc; - orte_process_name_t *tgt; - - /* if we are aborting or shutting down, ignore this */ - if (orte_abnormal_term_ordered || orte_finalizing || !orte_initialized) { - return; - } - - if (ORTE_PROC_IS_CM) { - /* we send to our daemon */ - tgt = ORTE_PROC_MY_DAEMON; - } else { - tgt = ORTE_PROC_MY_HNP; - } - /* if my target hasn't been defined yet, ignore - nobody listening yet */ - if (ORTE_JOBID_INVALID ==tgt->jobid || - ORTE_VPID_INVALID == tgt->vpid) { - opal_output_verbose(1, orte_sensor_base_framework.framework_output, - "%s sensor:heartbeat: HNP is not defined", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return; - } - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sending heartbeat", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* if we want sampled data included, point to the bucket */ - buf = OBJ_NEW(opal_buffer_t); - if (orte_sensor_base.log_samples) { - opal_dss.copy_payload(buf, orte_sensor_base.samples); - OBJ_RELEASE(orte_sensor_base.samples); - /* start a new sample bucket */ - orte_sensor_base.samples = OBJ_NEW(opal_buffer_t); - } - - /* send heartbeat */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(tgt, buf, - ORTE_RML_TAG_HEARTBEAT, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - } -} - -/* this function automatically gets periodically called - * by the event library so we can check on the state - * of the various orteds - */ -static void check_heartbeat(int fd, short dummy, void *arg) -{ - int v; - orte_proc_t *proc; - opal_event_t *tmp = (opal_event_t*)arg; - - OPAL_OUTPUT_VERBOSE((3, orte_sensor_base_framework.framework_output, - "%s sensor:check_heartbeat", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* if we are aborting or shutting down, ignore this */ - if (orte_abnormal_term_ordered || orte_finalizing || !orte_initialized) { - OPAL_OUTPUT_VERBOSE((3, orte_sensor_base_framework.framework_output, - "%s IGNORING CHECK abnorm_term %s fin %s init %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_abnormal_term_ordered ? "TRUE" : "FALSE", - orte_finalizing ? "TRUE" : "FALSE", - orte_initialized ? "TRUE" : "FALSE")); - check_active = false; - return; - } - - for (v=0; v < daemons->procs->size; v++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, v))) { - continue; - } - /* ignore myself */ - if (proc->name.vpid == ORTE_PROC_MY_NAME->vpid) { - continue; - } - if (ORTE_PROC_STATE_RUNNING != proc->state) { - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sensor:heartbeat DAEMON %s IS NOT RUNNING", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name))); - continue; - } - - if (0 == proc->beat) { - /* no heartbeat recvd in last window */ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sensor:check_heartbeat FAILED for daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name))); - ORTE_ACTIVATE_PROC_STATE(&proc->name, ORTE_PROC_STATE_HEARTBEAT_FAILED); - } else { - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s HEARTBEAT DETECTED FOR %s: NUM BEATS %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), proc->beat)); - } - /* reset for next period */ - proc->beat = 0; - } - - /* reset the timer */ - opal_event_evtimer_add(tmp, &check_time); -} - -static void recv_beats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata) -{ - orte_proc_t *proc; - int rc, n; - char *component=NULL; - opal_buffer_t *buf; - - opal_output_verbose(1, orte_sensor_base_framework.framework_output, - "%s received beat from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender)); - - /* if we are aborting or shutting down, ignore this */ - if (orte_abnormal_term_ordered || orte_finalizing || !orte_initialized) { - return; - } - - /* get this daemon's object */ - if (NULL != daemons) { - if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, sender->vpid))) { - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s marked beat from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender))); - proc->beat++; - /* if this daemon has reappeared, reset things */ - if (ORTE_PROC_STATE_HEARTBEAT_FAILED == proc->state) { - proc->state = ORTE_PROC_STATE_RUNNING; - } - } - } - - /* unload any sampled data */ - n=1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &buf, &n, OPAL_BUFFER))) { - if (NULL != buf) { - n=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &component, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - break; - } - orte_sensor_base_log(component, buf); - OBJ_RELEASE(buf); - free(component); - n=1; - } - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - ORTE_ERROR_LOG(rc); - } -} diff --git a/orte/mca/sensor/heartbeat/sensor_heartbeat.h b/orte/mca/sensor/heartbeat/sensor_heartbeat.h deleted file mode 100644 index 08aad98f2d..0000000000 --- a/orte/mca/sensor/heartbeat/sensor_heartbeat.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Heartbeat sensor - */ -#ifndef ORTE_SENSOR_HEARTBEAT_H -#define ORTE_SENSOR_HEARTBEAT_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_sensor_base_component_t mca_sensor_heartbeat_component; -extern orte_sensor_base_module_t orte_sensor_heartbeat_module; - - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/heartbeat/sensor_heartbeat_component.c b/orte/mca/sensor/heartbeat/sensor_heartbeat_component.c deleted file mode 100644 index c2b38c6739..0000000000 --- a/orte/mca/sensor/heartbeat/sensor_heartbeat_component.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_heartbeat.h" - -/* - * Local functions - */ - -static int orte_sensor_heartbeat_open(void); -static int orte_sensor_heartbeat_close(void); -static int orte_sensor_heartbeat_query(mca_base_module_t **module, int *priority); - -orte_sensor_base_component_t mca_sensor_heartbeat_component = { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "heartbeat", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_heartbeat_open, /* component open */ - orte_sensor_heartbeat_close, /* component close */ - orte_sensor_heartbeat_query /* component query */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - "heartbeat" -}; - - -/** - * component open/close/init function - */ -static int orte_sensor_heartbeat_open(void) -{ - return ORTE_SUCCESS; -} - - -static int orte_sensor_heartbeat_query(mca_base_module_t **module, int *priority) -{ - *priority = 5; /* lower than all other samplers so that their data gets included in heartbeat */ - *module = (mca_base_module_t *)&orte_sensor_heartbeat_module; - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_heartbeat_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/resusage/Makefile.am b/orte/mca/sensor/resusage/Makefile.am deleted file mode 100644 index 8641c15757..0000000000 --- a/orte/mca/sensor/resusage/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ompidata_DATA = help-orte-sensor-resusage.txt - -sources = \ - sensor_resusage.c \ - sensor_resusage.h \ - sensor_resusage_component.c - -# Make the output library in this directory, and name it either -# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la -# (for static builds). - -if MCA_BUILD_orte_sensor_resusage_DSO -component_noinst = -component_install = mca_sensor_resusage.la -else -component_noinst = libmca_sensor_resusage.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_resusage_la_SOURCES = $(sources) -mca_sensor_resusage_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_resusage_la_SOURCES =$(sources) -libmca_sensor_resusage_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/resusage/configure.m4 b/orte/mca/sensor/resusage/configure.m4 deleted file mode 100644 index d53c50b012..0000000000 --- a/orte/mca/sensor/resusage/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_resusage_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_resusage_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/resusage/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/resusage/help-orte-sensor-resusage.txt b/orte/mca/sensor/resusage/help-orte-sensor-resusage.txt deleted file mode 100644 index 2fa38bf331..0000000000 --- a/orte/mca/sensor/resusage/help-orte-sensor-resusage.txt +++ /dev/null @@ -1,21 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for the memory usage sensor -# -[mem-limit-exceeded] -A process has exceeded the specified limit on memory usage: - -Node: %s -Process rank: %s -Memory used: %luGbytes -Memory limit: %luGbytes - diff --git a/orte/mca/sensor/resusage/sensor_resusage.c b/orte/mca/sensor/resusage/sensor_resusage.c deleted file mode 100644 index 49d78187c7..0000000000 --- a/orte/mca/sensor/resusage/sensor_resusage.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights - * reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include <errno.h> -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include <string.h> -#endif /* HAVE_STRING_H */ -#include <stdio.h> - -#include "opal_stdint.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/class/opal_ring_buffer.h" -#include "opal/dss/dss.h" -#include "opal/util/output.h" -#include "opal/mca/pstat/pstat.h" -#include "opal/mca/db/db.h" - -#include "orte/util/proc_info.h" -#include "orte/util/name_fns.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/odls/base/odls_private.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" -#include "orte/runtime/orte_globals.h" -#include "orte/orted/orted.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" -#include "sensor_resusage.h" - -/* declare the API functions */ -static int init(void); -static void finalize(void); -static void sample(void); -static void res_log(opal_buffer_t *sample); - -/* instantiate the module */ -orte_sensor_base_module_t orte_sensor_resusage_module = { - init, - finalize, - NULL, - NULL, - sample, - res_log -}; - -static bool log_enabled = true; -static orte_node_t *my_node; -static orte_proc_t *my_proc; - -static int init(void) -{ - orte_job_t *jdata; - - /* ensure my_proc and my_node are available on the global arrays */ - if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { - my_proc = OBJ_NEW(orte_proc_t); - my_node = OBJ_NEW(orte_node_t); - } else { - if (NULL == (my_proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, ORTE_PROC_MY_NAME->vpid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - if (NULL == (my_node = my_proc->node)) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - /* protect the objects */ - OBJ_RETAIN(my_proc); - OBJ_RETAIN(my_node); - } - - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - if (NULL != my_proc) { - OBJ_RELEASE(my_proc); - } - if (NULL != my_node) { - OBJ_RELEASE(my_node); - } - return; -} - -static void sample(void) -{ - opal_pstats_t *stats, *st; - opal_node_stats_t *nstats, *nst; - int rc, i; - orte_proc_t *child, *hog=NULL; - float in_use, max_mem; - opal_buffer_t buf, *bptr; - char *comp; - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "sample:resusage sampling resource usage")); - - /* setup a buffer for our stats */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - /* pack our name */ - comp = strdup("resusage"); - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &comp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - free(comp); - - /* update stats on ourself and the node */ - stats = OBJ_NEW(opal_pstats_t); - nstats = OBJ_NEW(opal_node_stats_t); - if (ORTE_SUCCESS != (rc = opal_pstat.query(orte_process_info.pid, stats, nstats))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(stats); - OBJ_RELEASE(nstats); - OBJ_DESTRUCT(&buf); - return; - } - - /* the stats framework can't know nodename or rank */ - strncpy(stats->node, orte_process_info.nodename, OPAL_PSTAT_MAX_STRING_LEN); - stats->rank = ORTE_PROC_MY_NAME->vpid; - /* locally save the stats */ - if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&my_proc->stats, stats))) { - OBJ_RELEASE(st); - } - if (NULL != (nst = (opal_node_stats_t*)opal_ring_buffer_push(&my_node->stats, nstats))) { - /* release the popped value */ - OBJ_RELEASE(nst); - } - - /* pack them */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &nstats, 1, OPAL_NODE_STAT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - - /* loop through our children and update their stats */ - if (NULL != orte_local_children) { - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } - if (!child->alive) { - continue; - } - if (0 == child->pid) { - /* race condition */ - continue; - } - stats = OBJ_NEW(opal_pstats_t); - if (ORTE_SUCCESS != opal_pstat.query(child->pid, stats, NULL)) { - /* may hit a race condition where the process has - * terminated, so just ignore any error - */ - OBJ_RELEASE(stats); - continue; - } - /* the stats framework can't know nodename or rank */ - strncpy(stats->node, orte_process_info.nodename, OPAL_PSTAT_MAX_STRING_LEN); - stats->rank = child->name.vpid; - /* store it */ - if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&child->stats, stats))) { - OBJ_RELEASE(st); - } - /* pack them */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - } - } - - /* xfer any data for transmission */ - if (0 < buf.bytes_used) { - bptr = &buf; - if (OPAL_SUCCESS != (rc = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - } - OBJ_DESTRUCT(&buf); - - /* are there any issues with node-level usage? */ - nst = (opal_node_stats_t*)opal_ring_buffer_poke(&my_node->stats, -1); - if (NULL != nst && 0.0 < mca_sensor_resusage_component.node_memory_limit) { - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s CHECKING NODE MEM", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* compute the percentage of node memory in-use */ - in_use = 1.0 - (nst->free_mem / nst->total_mem); - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s PERCENT USED: %f LIMIT: %f", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - in_use, mca_sensor_resusage_component.node_memory_limit)); - if (mca_sensor_resusage_component.node_memory_limit <= in_use) { - /* loop through our children and find the biggest hog */ - hog = NULL; - max_mem = 0.0; - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } - if (!child->alive) { - continue; - } - if (0 == child->pid) { - /* race condition */ - continue; - } - if (NULL == (st = (opal_pstats_t*)opal_ring_buffer_poke(&child->stats, -1))) { - continue; - } - OPAL_OUTPUT_VERBOSE((5, orte_sensor_base_framework.framework_output, - "%s PROC %s AT VSIZE %f", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name), st->vsize)); - if (max_mem < st->vsize) { - hog = child; - max_mem = st->vsize; - } - } - if (NULL == hog) { - /* if all children dead and we are still too big, - * then we must be the culprit - abort - */ - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s NO CHILD: COMMITTING SUICIDE", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - orte_errmgr.abort(ORTE_ERR_MEM_LIMIT_EXCEEDED, NULL); - } else { - /* report the problem */ - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s REPORTING %s TO ERRMGR FOR EXCEEDING LIMITS", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&hog->name))); - ORTE_ACTIVATE_PROC_STATE(&hog->name, ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED); - } - /* since we have ordered someone to die, we've done enough for this - * time around - don't check proc limits as well - */ - return; - } - } - - /* check proc limits */ - if (0.0 < mca_sensor_resusage_component.proc_memory_limit) { - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s CHECKING PROC MEM", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* check my children first */ - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } - if (!child->alive) { - continue; - } - if (0 == child->pid) { - /* race condition */ - continue; - } - if (NULL == (st = (opal_pstats_t*)opal_ring_buffer_poke(&child->stats, -1))) { - continue; - } - OPAL_OUTPUT_VERBOSE((5, orte_sensor_base_framework.framework_output, - "%s PROC %s AT VSIZE %f", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name), st->vsize)); - if (mca_sensor_resusage_component.proc_memory_limit <= st->vsize) { - /* report the problem */ - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED); - } - } - } -} - -static void res_log(opal_buffer_t *sample) -{ - opal_pstats_t *st=NULL; - opal_node_stats_t *nst=NULL; - int rc, n, i; - opal_value_t kv[14]; - char *node; - - if (!log_enabled) { - return; - } - - /* unpack the node name */ - n=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &node, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* unpack the node stats */ - n=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &nst, &n, OPAL_NODE_STAT))) { - ORTE_ERROR_LOG(rc); - return; - } - - if (mca_sensor_resusage_component.log_node_stats) { - /* convert this into an array of opal_value_t's - no clean way - * to do this, so have to just manually map each field - */ - for (i=0; i < 13; i++) { - OBJ_CONSTRUCT(&kv[i], opal_value_t); - } - i=0; - kv[i].key = strdup("ctime"); - kv[i].type = OPAL_TIMEVAL; - kv[i].data.tv.tv_sec = nst->sample_time.tv_sec; - kv[i++].data.tv.tv_usec = nst->sample_time.tv_usec; - - kv[i].key = "hostname"; - kv[i].type = OPAL_STRING; - kv[i++].data.string = strdup(node); - - kv[i].key = strdup("total_mem"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->total_mem; - - kv[i].key = strdup("free_mem"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->free_mem; - - kv[i].key = strdup("buffers"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->buffers; - - kv[i].key = strdup("cached"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->cached; - - kv[i].key = strdup("swap_total"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->swap_total; - - kv[i].key = strdup("swap_free"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->swap_free; - - kv[i].key = strdup("mapped"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->mapped; - - kv[i].key = strdup("swap_cached"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->swap_cached; - - kv[i].key = strdup("la"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->la; - - kv[i].key = strdup("la5"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->la5; - - kv[i].key = strdup("la15"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->la15; - - /* store it */ - if (ORTE_SUCCESS != (rc = opal_db.add_log("nodestats", kv, 12))) { - /* don't bark about it - just quietly disable the log */ - log_enabled = false; - } - for (i=0; i < 12; i++) { - OBJ_DESTRUCT(&kv[i]); - } - } - - OBJ_RELEASE(nst); - - if (mca_sensor_resusage_component.log_process_stats) { - /* unpack all process stats */ - n=1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(sample, &st, &n, OPAL_PSTAT))) { - for (i=0; i < 14; i++) { - OBJ_CONSTRUCT(&kv[i], opal_value_t); - } - kv[0].key = strdup("node"); - kv[0].type = OPAL_STRING; - kv[0].data.string = strdup(st->node); - kv[1].key = strdup("rank"); - kv[1].type = OPAL_INT32; - kv[1].data.int32 = st->rank; - kv[2].key = strdup("pid"); - kv[2].type = OPAL_PID; - kv[2].data.pid = st->pid; - kv[3].key = strdup("cmd"); - kv[3].type = OPAL_STRING; - kv[3].data.string = strdup(st->cmd); - kv[4].key = strdup("state"); - kv[4].type = OPAL_STRING; - kv[4].data.string = (char*)malloc(3 * sizeof(char)); - kv[4].data.string[0] = st->state[0]; - kv[4].data.string[1] = st->state[1]; - kv[4].data.string[2] = '\0'; - kv[5].key = strdup("time"); - kv[5].type = OPAL_TIMEVAL; - kv[5].data.tv.tv_sec = st->time.tv_sec; - kv[5].data.tv.tv_usec = st->time.tv_usec; - kv[6].key = strdup("percent_cpu"); - kv[6].type = OPAL_FLOAT; - kv[6].data.fval = st->percent_cpu; - kv[7].key = strdup("priority"); - kv[7].type = OPAL_INT32; - kv[7].data.int32 = st->priority; - kv[8].key = strdup("num_threads"); - kv[8].type = OPAL_INT16; - kv[8].data.int16 = st->num_threads; - kv[9].key = strdup("vsize"); - kv[9].type = OPAL_FLOAT; - kv[9].data.fval = st->vsize; - kv[10].key = strdup("rss"); - kv[10].type = OPAL_FLOAT; - kv[10].data.fval = st->rss; - kv[11].key = strdup("peak_vsize"); - kv[11].type = OPAL_FLOAT; - kv[11].data.fval = st->peak_vsize; - kv[12].key = strdup("processor"); - kv[12].type = OPAL_INT16; - kv[12].data.int16 = st->processor; - kv[13].key = strdup("sample_time"); - kv[13].type = OPAL_TIMEVAL; - kv[13].data.tv.tv_sec = st->sample_time.tv_sec; - kv[13].data.tv.tv_usec = st->sample_time.tv_usec; - /* store it */ - if (ORTE_SUCCESS != (rc = opal_db.add_log("procstats", kv, 14))) { - log_enabled = false; - } - for (i=0; i < 14; i++) { - OBJ_DESTRUCT(&kv[i]); - } - OBJ_RELEASE(st); - n=1; - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - ORTE_ERROR_LOG(rc); - } - } -} diff --git a/orte/mca/sensor/resusage/sensor_resusage.h b/orte/mca/sensor/resusage/sensor_resusage.h deleted file mode 100644 index 83f326089f..0000000000 --- a/orte/mca/sensor/resusage/sensor_resusage.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Process Resource Utilization sensor - */ -#ifndef ORTE_SENSOR_RESUSAGE_H -#define ORTE_SENSOR_RESUSAGE_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -struct orte_sensor_resusage_component_t { - orte_sensor_base_component_t super; - int sample_rate; - float node_memory_limit; - float proc_memory_limit; - bool log_node_stats; - bool log_process_stats; -}; -typedef struct orte_sensor_resusage_component_t orte_sensor_resusage_component_t; - -ORTE_MODULE_DECLSPEC extern orte_sensor_resusage_component_t mca_sensor_resusage_component; -extern orte_sensor_base_module_t orte_sensor_resusage_module; - - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/resusage/sensor_resusage_component.c b/orte/mca/sensor/resusage/sensor_resusage_component.c deleted file mode 100644 index 2d9aafcaed..0000000000 --- a/orte/mca/sensor/resusage/sensor_resusage_component.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_resusage.h" - -/* - * Local functions - */ -static int orte_sensor_resusage_register (void); -static int orte_sensor_resusage_open(void); -static int orte_sensor_resusage_close(void); -static int orte_sensor_resusage_query(mca_base_module_t **module, int *priority); - -orte_sensor_resusage_component_t mca_sensor_resusage_component = { - { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "resusage", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_resusage_open, /* component open */ - orte_sensor_resusage_close, /* component close */ - orte_sensor_resusage_query, /* component query */ - orte_sensor_resusage_register - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - "procresource,noderesource" - } -}; - -static int node_memory_limit; -static int proc_memory_limit; - -/** - * component open/close/init function - */ -static int orte_sensor_resusage_register (void) -{ - mca_base_component_t *c = &mca_sensor_resusage_component.super.base_version; - - mca_sensor_resusage_component.sample_rate = 0; - (void) mca_base_component_var_register (c, "sample_rate", "Sample rate in seconds (default: 0)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_resusage_component.sample_rate); - if (mca_sensor_resusage_component.sample_rate < 0) { - opal_output(0, "Illegal value %d - must be > 0", mca_sensor_resusage_component.sample_rate); - return ORTE_ERR_BAD_PARAM; - } - - node_memory_limit = 0; - (void) mca_base_component_var_register (c, "node_memory_limit", - "Percentage of total memory that can be in-use", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &node_memory_limit); - mca_sensor_resusage_component.node_memory_limit = (float)node_memory_limit/100.0; - - proc_memory_limit = 0; - (void) mca_base_component_var_register (c, "proc_memory_limit", - "Max virtual memory size in MBytes", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &proc_memory_limit); - mca_sensor_resusage_component.proc_memory_limit = (float) proc_memory_limit; - - mca_sensor_resusage_component.log_node_stats = false; - (void) mca_base_component_var_register (c, "log_node_stats", "Log the node stats", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_resusage_component.log_node_stats); - - mca_sensor_resusage_component.log_process_stats = false; - (void) mca_base_component_var_register (c, "log_process_stats", "Log the process stats", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_resusage_component.log_process_stats); - - return ORTE_SUCCESS; -} - -static int orte_sensor_resusage_open(void) -{ - if (mca_sensor_resusage_component.sample_rate < 0) { - opal_output(0, "Illegal value %d - must be > 0", mca_sensor_resusage_component.sample_rate); - return ORTE_ERR_FATAL; - } - - mca_sensor_resusage_component.node_memory_limit = (float) node_memory_limit/100.0; - mca_sensor_resusage_component.proc_memory_limit = (float) proc_memory_limit; - - return ORTE_SUCCESS; -} - - -static int orte_sensor_resusage_query(mca_base_module_t **module, int *priority) -{ - *priority = 100; /* ahead of heartbeat */ - *module = (mca_base_module_t *)&orte_sensor_resusage_module; - - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_resusage_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/sensor.h b/orte/mca/sensor/sensor.h deleted file mode 100644 index e22852c438..0000000000 --- a/orte/mca/sensor/sensor.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * @file: - * - */ - -#ifndef MCA_SENSOR_H -#define MCA_SENSOR_H - -/* - * includes - */ - -#include "orte_config.h" -#include "orte/types.h" - -#include "opal/mca/mca.h" - -BEGIN_C_DECLS - -/* - * Component functions - all MUST be provided! - */ - -/* start collecting data */ -typedef void (*orte_sensor_API_module_start_fn_t)(orte_jobid_t job); - -/* stop collecting data */ -typedef void (*orte_sensor_API_module_stop_fn_t)(orte_jobid_t job); - -/* API module */ -/* - * Ver 1.0 - */ -struct orte_sensor_base_API_module_1_0_0_t { - orte_sensor_API_module_start_fn_t start; - orte_sensor_API_module_stop_fn_t stop; -}; - -typedef struct orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_1_0_0_t; -typedef orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_t; - -/* initialize the module */ -typedef int (*orte_sensor_base_module_init_fn_t)(void); - -/* finalize the module */ -typedef void (*orte_sensor_base_module_finalize_fn_t)(void); - -/* tell the module to sample its sensor */ -typedef void (*orte_sensor_base_module_sample_fn_t)(void); - -/* pass a buffer to the module for logging */ -typedef void (*orte_sensor_base_module_log_fn_t)(opal_buffer_t *sample); - -/* - * Component modules Ver 1.0 - */ -struct orte_sensor_base_module_1_0_0_t { - orte_sensor_base_module_init_fn_t init; - orte_sensor_base_module_finalize_fn_t finalize; - orte_sensor_API_module_start_fn_t start; - orte_sensor_API_module_stop_fn_t stop; - orte_sensor_base_module_sample_fn_t sample; - orte_sensor_base_module_log_fn_t log; -}; - -typedef struct orte_sensor_base_module_1_0_0_t orte_sensor_base_module_1_0_0_t; -typedef orte_sensor_base_module_1_0_0_t orte_sensor_base_module_t; - -/* - * the standard component data structure - */ -struct orte_sensor_base_component_1_0_0_t { - mca_base_component_t base_version; - mca_base_component_data_t base_data; - char *data_measured; -}; -typedef struct orte_sensor_base_component_1_0_0_t orte_sensor_base_component_1_0_0_t; -typedef orte_sensor_base_component_1_0_0_t orte_sensor_base_component_t; - - - -/* - * Macro for use in components that are of type sensor v1.0.0 - */ -#define ORTE_SENSOR_BASE_VERSION_1_0_0 \ - /* sensor v1.0 is chained to MCA v2.0 */ \ - MCA_BASE_VERSION_2_0_0, \ - /* sensor v1.0 */ \ - "sensor", 1, 0, 0 - -/* Global structure for accessing sensor functions - */ -ORTE_DECLSPEC extern orte_sensor_base_API_module_t orte_sensor; /* holds API function pointers */ - -END_C_DECLS - -#endif /* MCA_SENSOR_H */ diff --git a/orte/mca/sensor/sensor_types.h b/orte/mca/sensor/sensor_types.h deleted file mode 100644 index 8d27fb2a20..0000000000 --- a/orte/mca/sensor/sensor_types.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef ORTE_MCA_SENSOR_TYPES_H -#define ORTE_MCA_SENSOR_TYPES_H - -#include "orte_config.h" -#include "orte/constants.h" - -#ifdef HAVE_SYS_TIME_H -#include <sys/time.h> -#endif /* HAVE_SYS_TIME_H */ - -#include "opal/dss/dss_types.h" - -/* - * General SENSOR types - instanced in runtime/orte_globals.c - */ - -BEGIN_C_DECLS - -enum { - ORTE_SENSOR_SCALE_LINEAR, - ORTE_SENSOR_SCALE_LOG, - ORTE_SENSOR_SCALE_SIGMOID -}; - -/* - * Structure for passing data from sensors - */ -typedef struct { - opal_object_t super; - char *sensor; - struct timeval timestamp; - opal_byte_object_t data; -} orte_sensor_data_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_sensor_data_t); - -END_C_DECLS - -#endif diff --git a/orte/mca/state/state.h b/orte/mca/state/state.h index 4681af2e06..f1f4ece061 100644 --- a/orte/mca/state/state.h +++ b/orte/mca/state/state.h @@ -2,6 +2,7 @@ /* * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -85,27 +86,19 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_state_base_framework; ORTE_JOBID_PRINT(shadow->jobid), \ orte_job_state_to_str((s)), \ __FILE__, __LINE__); \ - /* sanity check */ \ - if ((s) < 0) { \ - assert(0); \ - } \ orte_state.activate_job_state(shadow, (s)); \ } while(0); #define ORTE_ACTIVATE_PROC_STATE(p, s) \ do { \ orte_process_name_t *shadow=(p); \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ + opal_output_verbose(1, orte_state_base_framework.framework_output, \ "%s ACTIVATE PROC %s STATE %s AT %s:%d", \ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ (NULL == shadow) ? "NULL" : \ ORTE_NAME_PRINT(shadow), \ orte_proc_state_to_str((s)), \ __FILE__, __LINE__); \ - /* sanity check */ \ - if ((s) < 0) { \ - assert(0); \ - } \ orte_state.activate_proc_state(shadow, (s)); \ } while(0); diff --git a/orte/mca/sensor/ft_tester/sensor_ft_tester.c b/orte/orted/ft_tester.c similarity index 99% rename from orte/mca/sensor/ft_tester/sensor_ft_tester.c rename to orte/orted/ft_tester.c index 1d7d62090c..f614c65c9f 100644 --- a/orte/mca/sensor/ft_tester/sensor_ft_tester.c +++ b/orte/orted/ft_tester.c @@ -1,13 +1,13 @@ /* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 25d37b3233..7cf0f5659a 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -102,7 +102,9 @@ static opal_pmix_server_module_t pmix_server = { .notify_event = pmix_server_notify_event, .query = pmix_server_query_fn, .tool_connected = pmix_tool_connected_fn, - .log = pmix_server_log_fn + .log = pmix_server_log_fn, + .allocate = pmix_server_alloc_fn, + .job_control = pmix_server_job_ctrl_fn }; void pmix_server_register_params(void) @@ -265,6 +267,12 @@ int pmix_server_init(void) kv->type = OPAL_BOOL; kv->data.flag = true; opal_list_append(&info, &kv->super); + /* tell the server to use its own internal monitoring */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_SERVER_ENABLE_MONITORING); + kv->type = OPAL_BOOL; + kv->data.flag = true; + opal_list_append(&info, &kv->super); /* setup the local server */ if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) { diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 0c3254b033..15f51e1155 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -511,3 +511,13 @@ int pmix_server_disconnect_fn(opal_list_t *procs, opal_list_t *info, return rc; } + +int pmix_server_alloc_fn(const opal_process_name_t *requestor, + opal_pmix_alloc_directive_t dir, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + /* ORTE currently has no way of supporting allocation requests */ + return ORTE_ERR_NOT_SUPPORTED; +} diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index fa88b92d8b..1ef0515c04 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -40,10 +40,12 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/iof/iof.h" #include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/schizo/schizo.h" #include "orte/mca/state/state.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/plm_private.h" #include "pmix_server_internal.h" @@ -611,7 +613,15 @@ static void _query(int sd, short args, void *cbdata) * and ask directly for the info - if rank=wildcard, then * we need to xcast the request and collect the results */ } - + } else if (0 == strcmp(q->keys[n], OPAL_PMIX_TIME_REMAINING)) { + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_TIME_REMAINING); + kv->type = OPAL_UINT32; + if (ORTE_SUCCESS != orte_schizo.get_remaining_time(&kv->data.uint32)) { + OBJ_RELEASE(kv); + } else { + opal_list_append(results, &kv->super); + } } } } @@ -813,3 +823,62 @@ void pmix_server_log_fn(opal_process_name_t *requestor, cbfunc(OPAL_SUCCESS, cbdata); } } + +int pmix_server_job_ctrl_fn(const opal_process_name_t *requestor, + opal_list_t *targets, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + opal_value_t *val; + int rc, n; + orte_proc_t *proc; + opal_pointer_array_t parray, *ptrarray; + opal_namelist_t *nm; + + opal_output_verbose(2, orte_pmix_server_globals.output, + "%s job control request from %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(requestor)); + + OPAL_LIST_FOREACH(val, info, opal_value_t) { + if (NULL == val->key) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + continue; + } + + if (0 == strcmp(val->key, OPAL_PMIX_JOB_CTRL_KILL)) { + /* convert the list of targets to a pointer array */ + if (NULL == targets) { + ptrarray = NULL; + } else { + OBJ_CONSTRUCT(&parray, opal_pointer_array_t); + OPAL_LIST_FOREACH(nm, targets, opal_namelist_t) { + /* get the proc object for this proc */ + if (NULL == (proc = orte_get_proc_object(&nm->name))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + continue; + } + OBJ_RETAIN(proc); + opal_pointer_array_add(&parray, proc); + } + ptrarray = &parray; + } + if (ORTE_SUCCESS != (rc = orte_plm.terminate_procs(ptrarray))) { + ORTE_ERROR_LOG(rc); + } + if (NULL != ptrarray) { + /* cleanup the array */ + for (n=0; n < parray.size; n++) { + if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(&parray, n))) { + OBJ_RELEASE(proc); + } + } + OBJ_DESTRUCT(&parray); + } + continue; + } + } + + return ORTE_SUCCESS; +} diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 31766eba01..3f232e7f42 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -206,6 +206,18 @@ extern void pmix_server_log_fn(opal_process_name_t *requestor, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +extern int pmix_server_alloc_fn(const opal_process_name_t *requestor, + opal_pmix_alloc_directive_t dir, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata); + +extern int pmix_server_job_ctrl_fn(const opal_process_name_t *requestor, + opal_list_t *targets, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata); + /* declare the RML recv functions for responses */ extern void pmix_server_launch_resp(int status, orte_process_name_t* sender, opal_buffer_t *buffer, diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index be0437bf20..d82f0601ce 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -705,7 +705,7 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) char *ndnames, *rmndr, **tmp; opal_list_t dids, slts, flgs;; opal_buffer_t *bptr=NULL; - orte_topology_t *t; + orte_topology_t *t2; orte_regex_range_t *rng, *drng, *srng, *frng; uint8_t ui8; @@ -978,14 +978,13 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) /* if no topology info was passed, then everyone shares our topology */ if (NULL == bptr) { - orte_topology_t *t; /* our topology is first in the array */ - t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); + t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); for (n=0; n < orte_node_pool->size; n++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { if (NULL == node->topology) { - OBJ_RETAIN(t); - node->topology = t; + OBJ_RETAIN(t2); + node->topology = t2; } } } @@ -1004,6 +1003,13 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) OBJ_RELEASE(bptr); goto cleanup; } + if (NULL == sig) { + rc = ORTE_ERR_BAD_PARAM; + ORTE_ERROR_LOG(rc); + opal_argv_free(tmp); + OBJ_RELEASE(bptr); + goto cleanup; + } n = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &topo, &n, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(rc); @@ -1013,11 +1019,12 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } /* see if we already have this topology - could be an update */ + t2 = NULL; for (n=0; n < orte_node_topologies->size; n++) { - if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { + if (NULL == (t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { continue; } - if (0 == strcmp(t->sig, sig)) { + if (0 == strcmp(t2->sig, sig)) { /* found a match */ free(sig); opal_hwloc_base_free_topology(topo); @@ -1025,11 +1032,12 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) break; } } - if (NULL != sig) { + if (NULL != sig || NULL == t2) { /* new topology - record it */ - t = OBJ_NEW(orte_topology_t); - t->sig = sig; - t->topo = topo; + t2 = OBJ_NEW(orte_topology_t); + t2->sig = sig; + t2->topo = topo; + opal_pointer_array_add(orte_node_topologies, t2); } /* point each of the nodes in the regex to this topology */ start = strtoul(tmp[nn], &rmndr, 10); @@ -1043,8 +1051,8 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) for (k=start; k <= endpt; k++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, k))) { if (NULL == node->topology) { - OBJ_RETAIN(t); - node->topology = t; + OBJ_RETAIN(t2); + node->topology = t2; } } }