2006-03-31 02:19:52 +00:00
|
|
|
/* -*- C -*-
|
|
|
|
*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* @file:
|
|
|
|
* xcpu Lancher to launch jobs on compute nodes..
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "orte_config.h"
|
|
|
|
#if HAVE_SYS_TYPES_H
|
|
|
|
#include <sys/types.h>
|
|
|
|
#endif /* HAVE_SYS_TYPES_H */
|
|
|
|
#ifdef HAVE_SYS_STAT_H
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#endif /* HAVE_SYS_STAT_H */
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif /* HAVE_UNISTD_H */
|
|
|
|
#include <errno.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#ifdef HAVE_FCNTL_H
|
|
|
|
#include <fcntl.h>
|
|
|
|
#endif /* HAVE_FCNTL_H */
|
|
|
|
#ifdef HAVE_STRING_H
|
|
|
|
#include <string.h>
|
|
|
|
#endif /* HAVE_STRING_H */
|
|
|
|
|
|
|
|
#include "opal/event/event.h"
|
|
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
|
|
#include "opal/util/argv.h"
|
|
|
|
#include "opal/util/output.h"
|
|
|
|
#include "opal/util/opal_environ.h"
|
|
|
|
#include "opal/util/path.h"
|
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
|
|
|
|
#include "orte/dss/dss.h"
|
|
|
|
#include "orte/util/sys_info.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
#include "orte/mca/gpr/base/base.h"
|
|
|
|
#include "orte/mca/iof/iof.h"
|
|
|
|
#include "orte/mca/ns/base/base.h"
|
|
|
|
#include "orte/mca/sds/base/base.h"
|
|
|
|
#include "orte/mca/oob/base/base.h"
|
|
|
|
#include "orte/mca/ras/base/base.h"
|
|
|
|
#include "orte/mca/rmgr/base/base.h"
|
|
|
|
#include "orte/mca/rmaps/base/base.h"
|
|
|
|
#include "orte/mca/rmaps/base/rmaps_base_map.h"
|
|
|
|
#include "orte/mca/rml/rml.h"
|
|
|
|
#include "orte/mca/soh/base/base.h"
|
|
|
|
#include "orte/runtime/orte_wait.h"
|
|
|
|
#include "orte/runtime/runtime.h"
|
|
|
|
|
|
|
|
#include "pls_xcpu.h"
|
|
|
|
|
2006-04-12 15:41:54 +00:00
|
|
|
/**
|
|
|
|
* Our current evironment
|
|
|
|
*/
|
|
|
|
extern char **environ;
|
|
|
|
|
2006-03-31 02:19:52 +00:00
|
|
|
/**
|
|
|
|
* Initialization of the xcpu module with all the needed function pointers
|
|
|
|
*/
|
|
|
|
orte_pls_base_module_t orte_pls_xcpu_module = {
|
|
|
|
orte_pls_xcpu_launch,
|
|
|
|
orte_pls_xcpu_terminate_job,
|
|
|
|
orte_pls_xcpu_terminate_proc,
|
|
|
|
orte_pls_xcpu_finalize
|
|
|
|
};
|
2006-04-11 14:33:17 +00:00
|
|
|
|
|
|
|
/** include a prototype for the xcpu launch function */
|
2006-04-12 22:42:41 +00:00
|
|
|
int lrx(int argc, char **argv ,char **env);
|
2006-03-31 02:19:52 +00:00
|
|
|
|
2006-04-11 14:33:17 +00:00
|
|
|
/** LOCAL SUPPORT FUNCTIONS **/
|
|
|
|
|
|
|
|
/** provide a local function to release the function stack
|
|
|
|
* required by xcpu
|
|
|
|
*/
|
2006-04-11 20:27:13 +00:00
|
|
|
static void orte_pls_xcpu_free_stack(orte_pls_xcpu_tid_stack *s){
|
2006-03-31 02:19:52 +00:00
|
|
|
if(s){
|
2006-04-11 14:33:17 +00:00
|
|
|
orte_pls_xcpu_free_stack(s->next);
|
2006-03-31 02:19:52 +00:00
|
|
|
free(s);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-04-11 14:33:17 +00:00
|
|
|
/** provide a function to setup the environment for the remote
|
|
|
|
* processes. We need to ensure that the remote processes know
|
2006-04-12 15:41:54 +00:00
|
|
|
* their gpr and ns replicas, the universe
|
2006-04-11 14:33:17 +00:00
|
|
|
* to which they belong, etc. - otherwise, they may run, but they
|
|
|
|
* will never actually join the rest of the job. This function
|
|
|
|
* creates the common environment for all the processes.
|
2006-04-12 15:41:54 +00:00
|
|
|
*
|
|
|
|
* @param env a pointer to the environment to setup
|
2006-04-11 14:33:17 +00:00
|
|
|
*/
|
|
|
|
static int orte_pls_xcpu_setup_env(char ***env)
|
|
|
|
{
|
2006-04-12 15:41:54 +00:00
|
|
|
char ** merged;
|
|
|
|
char * var;
|
|
|
|
char * param;
|
2006-04-11 20:27:13 +00:00
|
|
|
int rc;
|
2006-04-12 15:41:54 +00:00
|
|
|
int num_env;
|
|
|
|
|
2006-04-19 15:33:34 +00:00
|
|
|
/** merge in environment */
|
|
|
|
merged = opal_environ_merge(*env, environ);
|
|
|
|
opal_argv_free(*env);
|
|
|
|
*env = merged;
|
|
|
|
|
2006-04-12 15:41:54 +00:00
|
|
|
num_env = opal_argv_count(*env);
|
|
|
|
/** append mca parameters to our environment */
|
|
|
|
if(ORTE_SUCCESS != (rc = mca_base_param_build_env(env, &num_env, false))) {
|
2006-04-11 14:33:17 +00:00
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
}
|
2006-04-12 15:41:54 +00:00
|
|
|
|
|
|
|
/** ns replica contact info */
|
|
|
|
if (NULL != orte_process_info.ns_replica) {
|
|
|
|
param = strdup(orte_process_info.ns_replica_uri);
|
2006-04-11 14:33:17 +00:00
|
|
|
} else {
|
2006-04-12 15:41:54 +00:00
|
|
|
param = orte_rml.get_uri();
|
2006-04-11 14:33:17 +00:00
|
|
|
}
|
2006-04-12 15:41:54 +00:00
|
|
|
var = mca_base_param_environ_variable("ns","replica","uri");
|
|
|
|
opal_setenv(var, param, true, env);
|
|
|
|
free(var);
|
|
|
|
var = mca_base_param_environ_variable("ns","replica","uri");
|
|
|
|
opal_setenv(var, param, true, env);
|
|
|
|
free(var);
|
|
|
|
|
|
|
|
/** make sure the frontend hostname does not get pushed out to the backend */
|
|
|
|
var = mca_base_param_environ_variable("orte", "base", "nodename");
|
|
|
|
opal_unsetenv(var, env);
|
|
|
|
free(var);
|
|
|
|
opal_unsetenv("HOSTNAME", env);
|
|
|
|
|
|
|
|
/** gpr replica contact info */
|
|
|
|
if (NULL != orte_process_info.gpr_replica) {
|
|
|
|
param = strdup(orte_process_info.gpr_replica_uri);
|
2006-04-11 14:33:17 +00:00
|
|
|
} else {
|
2006-04-12 15:41:54 +00:00
|
|
|
param = orte_rml.get_uri();
|
2006-04-11 14:33:17 +00:00
|
|
|
}
|
2006-04-12 15:41:54 +00:00
|
|
|
var = mca_base_param_environ_variable("gpr","replica","uri");
|
|
|
|
opal_setenv(var, param, true, env);
|
|
|
|
free(param);
|
|
|
|
free(var);
|
|
|
|
|
|
|
|
/** universe name */
|
|
|
|
var = mca_base_param_environ_variable("universe", NULL, NULL);
|
|
|
|
asprintf(¶m, "%s@%s:%s", orte_universe_info.uid,
|
|
|
|
orte_universe_info.host, orte_universe_info.name);
|
|
|
|
opal_setenv(var, param, true, env);
|
2006-04-11 14:33:17 +00:00
|
|
|
free(param);
|
2006-04-12 15:41:54 +00:00
|
|
|
free(var);
|
|
|
|
|
|
|
|
/** make sure hostname doesn't get pushed to backend node */
|
|
|
|
opal_unsetenv("HOSTNAME", env);
|
|
|
|
|
|
|
|
return ORTE_SUCCESS;
|
2006-04-11 14:33:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/** LAUNCH **/
|
|
|
|
|
2006-03-31 02:19:52 +00:00
|
|
|
/* This is the main function that will launch jobs on remote compute modes
|
|
|
|
* @param jobid the jobid of the job to launch
|
|
|
|
* @retval ORTE_SUCCESS or error
|
|
|
|
*/
|
|
|
|
int orte_pls_xcpu_launch(orte_jobid_t jobid){
|
|
|
|
opal_list_t mapping;
|
2006-04-12 15:41:54 +00:00
|
|
|
char *param, *var;
|
2006-04-11 14:33:17 +00:00
|
|
|
char *header[] = {
|
|
|
|
"dummy",
|
|
|
|
NULL,
|
|
|
|
NULL};
|
2006-04-12 15:41:54 +00:00
|
|
|
int argc;
|
|
|
|
int rc;
|
2006-04-12 22:42:41 +00:00
|
|
|
int i;
|
2006-04-12 15:41:54 +00:00
|
|
|
size_t nprocs=0, proc_id=0;
|
2006-04-11 14:33:17 +00:00
|
|
|
orte_pls_xcpu_tid_stack *t_stack, *temp_stack;
|
2006-04-11 20:27:13 +00:00
|
|
|
opal_list_item_t *item;
|
2006-03-31 02:19:52 +00:00
|
|
|
orte_rmaps_base_map_t* map;
|
2006-04-11 14:33:17 +00:00
|
|
|
orte_rmaps_base_node_t *node;
|
|
|
|
orte_rmaps_base_proc_t *proc;
|
2006-04-13 16:41:47 +00:00
|
|
|
orte_vpid_t vpid_start, vpid_range;
|
2006-04-11 14:33:17 +00:00
|
|
|
|
|
|
|
/** first get the mapping we are going to use to launch job. The head
|
|
|
|
* of the list is OBJ_CONSTRUCT'd since it is not dynamically allocated. The
|
|
|
|
* get_map function, however, will dynamically allocate the items in the
|
|
|
|
* list itself - these will be released when we OBJ_DESTRUCT the list at
|
|
|
|
* the end
|
|
|
|
*/
|
2006-03-31 02:19:52 +00:00
|
|
|
/*fprintf(stdout, "\nxcpu launch called, job id: %d\n", jobid);*/
|
|
|
|
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
2006-04-11 14:33:17 +00:00
|
|
|
/** get the mapping from the registry. This will provide a linked list, one
|
|
|
|
* item for each mapping. Each item contains the full context of the application
|
|
|
|
* that is to be executed upon that node. In particular, we need to obtain
|
|
|
|
* the argv array that is included in that context as this tells us the application
|
|
|
|
* to launch plus any "flags" to pass to it.
|
|
|
|
*/
|
2006-03-31 02:19:52 +00:00
|
|
|
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) {
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
2006-04-11 14:33:17 +00:00
|
|
|
|
2006-04-13 16:41:47 +00:00
|
|
|
/** next, get the vpid_start and range info so we can pass it along */
|
|
|
|
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_vpid_range(jobid, &vpid_start, &vpid_range))) {
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2006-04-11 14:33:17 +00:00
|
|
|
/** we have to do the following so that we can use the opal_argv utilities
|
|
|
|
* to properly insert the header into the app's argv
|
|
|
|
*/
|
|
|
|
header[1] = strdup("dummy");
|
|
|
|
|
|
|
|
/** Now loop through all the provided maps to launch their associated apps */
|
2006-03-31 02:19:52 +00:00
|
|
|
t_stack=NULL;
|
2006-04-11 14:33:17 +00:00
|
|
|
nprocs = 0;
|
2006-03-31 02:19:52 +00:00
|
|
|
for(item = opal_list_get_first(&mapping);
|
|
|
|
item != opal_list_get_end(&mapping);
|
|
|
|
item = opal_list_get_next(item)) {
|
|
|
|
map = (orte_rmaps_base_map_t*) item;
|
2006-04-11 14:33:17 +00:00
|
|
|
|
|
|
|
/** xcpu requires an argv format that has a dummy filler in the
|
|
|
|
* first location, followed by the node name, and then the standard
|
|
|
|
* argv array we've all come to know and love (i.e., the application
|
|
|
|
* name followed by options). We use the opal_argv utilities to
|
|
|
|
* prepend this header info to the application's argv.
|
|
|
|
*
|
|
|
|
* Note: at this point, the header contains a dummy placeholder
|
|
|
|
* for the node name - we'll fill that in later.
|
|
|
|
*/
|
|
|
|
opal_argv_insert(&(map->app->argv), 0, header);
|
|
|
|
|
|
|
|
|
2006-04-12 15:41:54 +00:00
|
|
|
/** since it is possible that each node could be executing a different application,
|
|
|
|
* we cannot just do a mass launch - that would only be supported in the special
|
|
|
|
* case of all the application processes being identical. Instead, we are going to
|
|
|
|
* step our way through the list, launching each process individually.
|
2006-03-31 02:19:52 +00:00
|
|
|
*/
|
2006-04-12 15:41:54 +00:00
|
|
|
proc_id=0;
|
|
|
|
while (proc_id < map->num_procs){
|
|
|
|
proc = (orte_rmaps_base_proc_t*)(map->procs[proc_id]);
|
2006-04-11 14:33:17 +00:00
|
|
|
node = proc->proc_node;
|
2006-04-12 22:42:41 +00:00
|
|
|
proc_id++;
|
2006-04-11 14:33:17 +00:00
|
|
|
|
|
|
|
/** each proc_t entry contains the application to be executed,
|
|
|
|
* the node upon which it is to be executed, and its OpenRTE
|
|
|
|
* process name (plus a few other things). We use that
|
|
|
|
* info to build the launch command by inserting them into
|
|
|
|
* the argv array
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** start by pointing the proper location at the node name where
|
|
|
|
* this process is to be launched
|
|
|
|
*/
|
|
|
|
if (NULL != map->app->argv[1]) free(map->app->argv[1]);
|
|
|
|
map->app->argv[1] = strdup(node->node->node_name);
|
|
|
|
|
2006-04-12 15:41:54 +00:00
|
|
|
/** we also need to pass the proper environment to the remote
|
|
|
|
* process so it knows its universe, gpr and ns replicas, etc. Since this
|
|
|
|
* can be specified by the user for each app, we have to do this
|
|
|
|
* each time.
|
|
|
|
*/
|
|
|
|
if (ORTE_SUCCESS != (rc = orte_pls_xcpu_setup_env(&map->app->env))) {
|
2006-04-11 14:33:17 +00:00
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
2006-04-12 15:41:54 +00:00
|
|
|
|
2006-04-19 15:33:34 +00:00
|
|
|
/** now setup the process name in the environment so we can
|
2006-04-12 15:41:54 +00:00
|
|
|
* retrieve it on the other end
|
|
|
|
*/
|
2006-04-19 15:33:34 +00:00
|
|
|
if (ORTE_SUCCESS != (rc = orte_ns_nds_env_put(&(proc->proc_name),
|
|
|
|
vpid_start, map->num_procs,
|
|
|
|
&(map->app->env)))) {
|
2006-04-11 14:33:17 +00:00
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
2006-04-13 16:41:47 +00:00
|
|
|
|
2006-04-11 14:33:17 +00:00
|
|
|
/** the launcher wants to know how long the argv array is - get that now */
|
2006-04-11 20:27:13 +00:00
|
|
|
argc = opal_argv_count(map->app->argv);
|
2006-04-11 14:33:17 +00:00
|
|
|
|
|
|
|
/** add this process to the stack so we can track it */
|
|
|
|
temp_stack=(orte_pls_xcpu_tid_stack*)malloc(sizeof(orte_pls_xcpu_tid_stack));
|
|
|
|
temp_stack->next=t_stack;
|
|
|
|
t_stack=temp_stack;
|
|
|
|
|
|
|
|
/** launch the process */
|
2006-04-12 22:42:41 +00:00
|
|
|
t_stack->tid=lrx(argc, map->app->argv, map->app->env);
|
2006-03-31 02:19:52 +00:00
|
|
|
}
|
|
|
|
}
|
2006-04-12 15:41:54 +00:00
|
|
|
|
2006-04-11 14:33:17 +00:00
|
|
|
/** wait for all threads that have launched processes on remote nodes */
|
2006-03-31 02:19:52 +00:00
|
|
|
temp_stack=t_stack;
|
|
|
|
while(t_stack){
|
|
|
|
pthread_join(t_stack->tid, NULL);
|
|
|
|
t_stack=t_stack->next;
|
|
|
|
}
|
|
|
|
orte_soh.begin_monitoring_job(jobid);
|
2006-04-11 14:33:17 +00:00
|
|
|
|
|
|
|
/** cleanup local storage */
|
|
|
|
orte_pls_xcpu_free_stack(temp_stack);
|
2006-03-31 02:19:52 +00:00
|
|
|
OBJ_DESTRUCT(&mapping);
|
2006-04-11 14:33:17 +00:00
|
|
|
|
|
|
|
/** launch complete */
|
2006-03-31 02:19:52 +00:00
|
|
|
/*fprintf(stdout, "launch finished\n");*/
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int orte_pls_xcpu_terminate_job(orte_jobid_t jobid){
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
|
|
|
int orte_pls_xcpu_terminate_proc(const orte_process_name_t* proc_name){
|
2006-04-11 14:33:17 +00:00
|
|
|
return ORTE_SUCCESS;
|
2006-03-31 02:19:52 +00:00
|
|
|
}
|
|
|
|
int orte_pls_xcpu_finalize(void){
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|