diff --git a/orte/mca/ras/alps/ras_alps_module.c b/orte/mca/ras/alps/ras_alps_module.c index 18eec823f5..ed56895c53 100644 --- a/orte/mca/ras/alps/ras_alps_module.c +++ b/orte/mca/ras/alps/ras_alps_module.c @@ -49,7 +49,7 @@ typedef struct orte_ras_alps_sysconfig_t { } orte_ras_alps_sysconfig_t; /* /// Local Functions /// */ -static int orte_ras_alps_allocate(opal_list_t *nodes); +static int orte_ras_alps_allocate(orte_job_t *jdata, opal_list_t *nodes); static int orte_ras_alps_finalize(void); @@ -287,7 +287,7 @@ orte_ras_get_appinfo_path(void) * requested number of nodes/process slots to the job. */ static int -orte_ras_alps_allocate(opal_list_t *nodes) +orte_ras_alps_allocate(orte_job_t *jdata, opal_list_t *nodes) { int ret; char *appinfo_path = NULL; diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index e8afd0ce17..af5f15d7ec 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -141,7 +141,7 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata) */ if (NULL != orte_ras_base.active_module) { /* read the allocation */ - if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(&nodes))) { + if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(jdata, &nodes))) { if (ORTE_ERR_SYSTEM_WILL_BOOTSTRAP == rc) { /* this module indicates that nodes will be discovered * on a bootstrap basis, so all we do here is add our diff --git a/orte/mca/ras/ccp/ras_ccp_module.c b/orte/mca/ras/ccp/ras_ccp_module.c index ef091c4e57..fca815001a 100644 --- a/orte/mca/ras/ccp/ras_ccp_module.c +++ b/orte/mca/ras/ccp/ras_ccp_module.c @@ -41,7 +41,7 @@ /* * Local functions */ -static int orte_ras_ccp_allocate(opal_list_t *nodes); +static int orte_ras_ccp_allocate(orte_job_t *jdata, opal_list_t *nodes); static int orte_ras_ccp_finalize(void); static int discover(opal_list_t* nodelist, ICluster* pCluster); void ras_get_cluster_message(ICluster* pCluster); @@ -60,7 +60,7 @@ orte_ras_base_module_t orte_ras_ccp_module = { * Discover available (pre-allocated) nodes. Allocate the * requested number of nodes/process slots to the job. */ -static int orte_ras_ccp_allocate(opal_list_t *nodes) +static int orte_ras_ccp_allocate(orte_job_t *jdata, opal_list_t *nodes) { int ret, i; size_t len; diff --git a/orte/mca/ras/gridengine/ras_gridengine_module.c b/orte/mca/ras/gridengine/ras_gridengine_module.c index 727ca8b90d..a8840d3d52 100644 --- a/orte/mca/ras/gridengine/ras_gridengine_module.c +++ b/orte/mca/ras/gridengine/ras_gridengine_module.c @@ -37,7 +37,7 @@ /* * Local functions */ -static int orte_ras_gridengine_allocate(opal_list_t *nodes); +static int orte_ras_gridengine_allocate(orte_job_t *jdata, opal_list_t *nodes); static int orte_ras_gridengine_finalize(void); #if 0 static int get_slot_count(char* node_name, int* slot_cnt); @@ -56,7 +56,7 @@ orte_ras_base_module_t orte_ras_gridengine_module = { * requested number of nodes/process slots to the job. * */ -static int orte_ras_gridengine_allocate(opal_list_t *nodelist) +static int orte_ras_gridengine_allocate(orte_job_t *jdata, opal_list_t *nodelist) { char *pe_hostfile = getenv("PE_HOSTFILE"); char *job_id = getenv("JOB_ID"); diff --git a/orte/mca/ras/loadleveler/ras_loadleveler_module.c b/orte/mca/ras/loadleveler/ras_loadleveler_module.c index ff9cbccf31..abb5a8e9a7 100644 --- a/orte/mca/ras/loadleveler/ras_loadleveler_module.c +++ b/orte/mca/ras/loadleveler/ras_loadleveler_module.c @@ -38,7 +38,7 @@ /* * Local functions */ -static int orte_ras_loadleveler_allocate(opal_list_t *nodes); +static int orte_ras_loadleveler_allocate(orte_job_t *jdata, opal_list_t *nodes); static int orte_ras_loadleveler_finalize(void); static int orte_ras_loadleveler_discover(opal_list_t *nodelist); @@ -59,7 +59,7 @@ orte_ras_base_module_t orte_ras_loadleveler_module = { * Discover available (pre-allocated) nodes. Allocate the * requested number of nodes/process slots to the job. */ -static int orte_ras_loadleveler_allocate(opal_list_t *nodes) +static int orte_ras_loadleveler_allocate(orte_job_t *jdata, opal_list_t *nodes) { int ret = ORTE_SUCCESS; diff --git a/orte/mca/ras/lsf/ras_lsf_module.c b/orte/mca/ras/lsf/ras_lsf_module.c index ff78669087..2baadc22ee 100644 --- a/orte/mca/ras/lsf/ras_lsf_module.c +++ b/orte/mca/ras/lsf/ras_lsf_module.c @@ -38,7 +38,7 @@ /* * Local functions */ -static int allocate(opal_list_t *nodes); +static int allocate(orte_job_t *jdata, opal_list_t *nodes); static int finalize(void); @@ -51,7 +51,7 @@ orte_ras_base_module_t orte_ras_lsf_module = { }; -static int allocate(opal_list_t *nodes) +static int allocate(orte_job_t *jdata, opal_list_t *nodes) { char **nodelist; orte_node_t *node; diff --git a/orte/mca/ras/ras.h b/orte/mca/ras/ras.h index 797a7a206d..b2c78a0aa0 100644 --- a/orte/mca/ras/ras.h +++ b/orte/mca/ras/ras.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -28,7 +28,7 @@ * that ORTE users will execute an "mpirun" or other command that * invokes ORTE through one of two channels: * - * 1. local: the user will login to the computing resource they intend + * 1. the user will login to the computing resource they intend * to use, request a resource allocation from that system, and then * execute the mpirun or other command. Thus, the allocation has * already been obtained prior to ORTE's initialization. In most @@ -38,121 +38,12 @@ * seek to support (e.g., an LSF component should know that LSF passes * allocation parameters as a specific LSF-named entity). * - * 2. remote: the user issues an mpirun command on their notebook or - * desktop computer, indicating that the application is to be executed - * on a specific remote resource. In this case, the allocation may - * not have been previously requested or made. Thus, the associated + * 2. the user issues an mpirun command or an application that uses + * ORTE without obtaining an allocation in advance. Thus, the associated * RAS component must know how to request an allocation from the - * designated resource. To assist in this process, the RAS can turn to - * the information provided by the resource discovery subsystem (RDS) - * to learn what allocator resides on the designated resource. - * - * The RAS operates on a per-job basis - i.e., it serves to allocate - * the resources for a specific job. It takes several inputs, - * depending upon what is provided and desired: - * - * - the jobid for which the resources are to be allocated. There are - * two options here: (a) the jobid can be predefined and provided to - * the allocator. In this case, the allocator will simply allocate - * resources to the job; or (b) the jobid can be set by the allocator - * via a request to the ORTE name services (NS) subsystem. This option - * is selected by calling the allocate function with the illegal jobid - * of ORTE_JOBID_MAX. In this case, the new jobid (set by the - * allocator) will be returned in the provided address (the allocate - * function takes a pointer to the jobid as its argument). - * - * - MCA parameters specifying preallocated resources. These resources - * are allocated to the specified jobid (whether set by the allocator - * or not) on the first request. However, subsequent requests for - * allocation do NOT use these parameters - the parameters are "unset" - * after initial use. This is done to prevent subsequent allocation - * requests from unintentionally overloading the specified resources - * in cases where the univese is persistent and therefore servicing - * multiple applications. - * - * - MCA parameters specifying the name of the application(s) and the - * number of each application to be executed. These will usually be - * taken from the command line options, but could be provided via - * environmental parameters. - * - * - the resources defined in the ORTE_RESOURCE_SEGMENT by the - * RDS. When an allocation is requested for resources not previously - * allocated, the RAS will attempt to obtain an allocation that meets - * the specified requirements. For example, if the user specifies that - * the application must run on an Intel Itanium 2 resource under the - * Linux operating system, but doesn't provide the allocation or - * resource identification, then the allocator can (if possible) - * search the ORTE_RESOURCE_SEGMENT for resources meeting those - * specifications and attempt to obtain an allocation from them. - * - * The RAS outputs its results into three registry segments: - * - * (a) the ORTE_NODE_STATUS_SEGMENT. The segment consists of a - * registry container for each node that has been allocated to a job - - * for proper operation, each container MUST be described by the - * following set of tokens: - * - * - nodename: a unique name assigned to each node, usually obtained - * from the preallocated information in the environmental variables or - * the resource manager for the specified compute resource (e.g., - * LSF). For those cases where specific nodenames are not provided, - * the allocator can use the info provided by the RDS to attempt to - * determine the nodenames (e.g., if the RDS learned that the nodes - * are name q0-q1024 and we obtain an allocation of 100 nodes - * beginning at node 512, then the RAS can derive the nodenames from - * this information). - * - * For each node, the RAS stores the following information on the segment: - * - * - number of cpus allocated from this node to the user. This will - * normally be the number of cpus/node as obtained from the data - * provided by the RDS, but could differ in some systems. - * - * - the jobids that are utilizing this node. In systems that allow - * overloading of processes onto nodes, there may be multiple jobs - * sharing a given node. - * - * - the status of the node (up, down, rebooting, etc.). This - * information is provided and updated by the state-of-health (SOH) - * monitoring subsystem. - * - * (b) the ORTE_JOB_SEGMENT. The RAS preallocates this segment, - * initializing one container for each process plus one container to - * store information that spans the job. This latter container houses - * information such as the application names, number of processes per - * application, process context (including argv and enviro arrays), - * and i/o forwarding info. The RAS does NOT establish nor fill any of - * the individual process info containers - rather, it preallocates - * the storage for those containers and places some of the job-wide - * information into that container. This info includes: - * - * - application names and number of processes per application - * - * - process context - * - * The remainder of the information in that container will be supplied - * by other subsystems. - * - * (c) the ORTE_RESOURCE_SEGMENT. The RAS adds information to this - * segment to indicate consumption of an available resource. In - * particular, the RAS updates fields in the respective compute - * resource to indicate the portion of that resource that has been - * allocated and therefore can be presumed consumed. This includes - * info on the number of nodes and cpus allocated to existing jobs - - * these numbers are updated by the RAS when resources are deallocated - * at the completion of a job. - * - * The information provided by the RAS is consumed by the resource - * mapper subsystem (RMAPS) that defines which process is executed - * upon which node/cpu, the process launch subsystem (PLS) that - * actually launches each process, and others. - * - * Because the RAS operates as a multi-component framework (i.e., - * multiple components may be simultaneously instantiated), the RAS - * functions should NOT be called directly. Instead, they should be - * accessed via the ORTE resource manager (RMGR) subsystem. - * - * + * designated resource. If it doesn't, or it cannot obtain the allocation, + * then it shall indicate this by setting the system to an appropriate + * state. */ #ifndef ORTE_MCA_RAS_H @@ -185,7 +76,8 @@ ORTE_DECLSPEC extern opal_event_t orte_allocate_event; /** * Allocate resources to a job. */ -typedef int (*orte_ras_base_module_allocate_fn_t)(opal_list_t *nodes); +typedef int (*orte_ras_base_module_allocate_fn_t)(orte_job_t *jdata, + opal_list_t *nodes); /** * Cleanup module resources. diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index 184e2f871a..6014968e60 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -28,7 +28,7 @@ /* * Local functions */ -static int allocate(opal_list_t *nodes); +static int allocate(orte_job_t *jdata, opal_list_t *nodes); static int finalize(void); @@ -40,7 +40,7 @@ orte_ras_base_module_t orte_ras_sim_module = { finalize }; -static int allocate(opal_list_t *nodes) +static int allocate(orte_job_t *jdata, opal_list_t *nodes) { int i, n, val, dig, num_nodes; orte_node_t *node; diff --git a/orte/mca/ras/slurm/ras_slurm_module.c b/orte/mca/ras/slurm/ras_slurm_module.c index 9160661ce9..a455e752b4 100644 --- a/orte/mca/ras/slurm/ras_slurm_module.c +++ b/orte/mca/ras/slurm/ras_slurm_module.c @@ -40,7 +40,7 @@ /* * Local functions */ -static int orte_ras_slurm_allocate(opal_list_t *nodes); +static int orte_ras_slurm_allocate(orte_job_t *jdata, opal_list_t *nodes); static int orte_ras_slurm_finalize(void); static int orte_ras_slurm_discover(char *regexp, char* tasks_per_node, @@ -63,7 +63,7 @@ orte_ras_base_module_t orte_ras_slurm_module = { * requested number of nodes/process slots to the job. * */ -static int orte_ras_slurm_allocate(opal_list_t *nodes) +static int orte_ras_slurm_allocate(orte_job_t *jdata, opal_list_t *nodes) { int ret, cpus_per_task; char *slurm_node_str, *regexp; diff --git a/orte/mca/ras/tm/ras_tm_module.c b/orte/mca/ras/tm/ras_tm_module.c index 4b0b5f3fa2..306bac4125 100644 --- a/orte/mca/ras/tm/ras_tm_module.c +++ b/orte/mca/ras/tm/ras_tm_module.c @@ -38,7 +38,7 @@ /* * Local functions */ -static int allocate(opal_list_t *nodes); +static int allocate(orte_job_t *jdata, opal_list_t *nodes); static int finalize(void); static int discover(opal_list_t* nodelist, char *pbs_jobid); @@ -62,7 +62,7 @@ orte_ras_base_module_t orte_ras_tm_module = { * them back to the caller. * */ -static int allocate(opal_list_t *nodes) +static int allocate(orte_job_t *jdata, opal_list_t *nodes) { int ret; char *pbs_jobid;