diff --git a/orte/mca/plm/poe/Makefile.am b/orte/mca/plm/poe/Makefile.am deleted file mode 100644 index 7aae4b9499..0000000000 --- a/orte/mca/plm/poe/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -sources = \ - plm_poe.h \ - plm_poe_component.c \ - plm_poe_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_plm_poe_DSO -component_noinst = -component_install = mca_plm_poe.la -else -component_noinst = libmca_plm_poe.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_plm_poe_la_SOURCES = $(sources) -mca_plm_poe_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_plm_poe_la_SOURCES =$(sources) -libmca_plm_poe_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/plm/poe/configure.m4 b/orte/mca/plm/poe/configure.m4 deleted file mode 100644 index 3622e4c9c6..0000000000 --- a/orte/mca/plm/poe/configure.m4 +++ /dev/null @@ -1,38 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_plm_poe_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- - -AC_DEFUN([MCA_plm_poe_CONFIG],[ - # POE is only supported on AIX. We only need executables (no - # header files or libraries), but those can be found (or not) at - # run-time. So if we're on AIX, build this component. - AC_MSG_CHECKING([if on AIX]) - case $host_os in - aix3* | aix4* | aix5*) - happy=yes - ;; - *) - happy=no - ;; - esac - AC_MSG_RESULT([$happy]) - AS_IF([test "$happy" = "yes"], [$1], [$2]) -]) diff --git a/orte/mca/plm/poe/configure.params b/orte/mca/plm/poe/configure.params deleted file mode 100644 index 8fc44480a6..0000000000 --- a/orte/mca/plm/poe/configure.params +++ /dev/null @@ -1,22 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/plm/poe/plm_poe.h b/orte/mca/plm/poe/plm_poe.h deleted file mode 100644 index e3da437b38..0000000000 --- a/orte/mca/plm/poe/plm_poe.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_PLM_POE_EXPORT_H -#define ORTE_PLM_POE_EXPORT_H - -#include "orte_config.h" - -#include "opal/mca/mca.h" -#include "orte/mca/plm/plm.h" - -BEGIN_C_DECLS -/* - * Module open / close - */ -int orte_plm_poe_component_open(void); -int orte_plm_poe_component_close(void); -orte_plm_base_module_t* orte_plm_poe_component_init(int *priority); - -/** - * PLM Component - */ -struct orte_plm_poe_component_t { - orte_plm_base_component_t super; - orte_jobid_t jobid; - int priority; - int verbose; - char* path; - char* env; - char** argv; - int argc; - int debug; - char* orted; - char* class; - char* resource_allocation; - char* hostfile; - char* cmdfile; - char* mp_stdoutmode; - char* mp_labelio; - int mp_retry; - int mp_retrycount; - int mp_infolevel; -}; -typedef struct orte_plm_poe_component_t orte_plm_poe_component_t; - - -ORTE_MODULE_DECLSPEC extern orte_plm_poe_component_t mca_plm_poe_component; -extern orte_plm_base_module_t orte_plm_poe_module; - -END_C_DECLS - -#endif /* ORTE_PLM_POE_EXPORT_H */ diff --git a/orte/mca/plm/poe/plm_poe_component.c b/orte/mca/plm/poe/plm_poe_component.c deleted file mode 100644 index c1b3d6e1ad..0000000000 --- a/orte/mca/plm/poe/plm_poe_component.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "orte_config.h" - -#include "plm_poe.h" - -#include "opal/util/argv.h" -#include "opal/util/path.h" -#include "opal/util/opal_environ.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/constants.h" -#include "orte/mca/plm/plm.h" - - -/* - * Public string showing the plm ompi_poe component version number - */ -const char *mca_plm_poe_component_version_string = - "Open MPI poe plm MCA component version " ORTE_VERSION; - - -/* - * Local variable - */ - - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -orte_plm_poe_component_t mca_plm_poe_component = { - { - /* First, the mca_component_t struct containing meta information - about the component itself */ - - { - /* Indicate that we are a plm v1.0.0 component (which also - implies a specific MCA version) */ - - ORTE_PLM_BASE_VERSION_1_0_0, - - /* Component name and version */ - - "poe", - ORTE_MAJOR_VERSION, - ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION, - - /* Component open and close functions */ - - orte_plm_poe_component_open, - NULL - }, - - /* Next the MCA v1.0.0 component meta data */ - - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - /* Initialization / querying functions */ - - orte_plm_poe_component_init - } -}; - -/** -orte_plm_poe_component_open - open component and register all parameters -@return error number -*/ -int orte_plm_poe_component_open(void) -{ - char *param; - mca_base_component_t *c = &mca_plm_poe_component.super.plm_version; - - mca_base_param_reg_int(c, "mp_retry", - "specifies the interval (in seconds) to wait before repeating the node request", - true, false, 0, &mca_plm_poe_component.mp_retry); - mca_base_param_reg_int(c, "mp_retrycount", - "specifies the number of times the Partition Manager should make the request before returning", - true, false, 0, &mca_plm_poe_component.mp_retrycount); - mca_base_param_reg_int(c, "mp_infolevel", - "specify the level of messages you want from POE (0-6)", - true, false, 0, &mca_plm_poe_component.mp_infolevel); - mca_base_param_reg_string(c, "mp_labelio", - "Whether or not to label message output with task identifiers (yes or no)", - true, false, "no", &mca_plm_poe_component.mp_labelio); - mca_base_param_reg_string(c, "mp_stdoutmode", - "standard output mode (ordered, unordered or taskID)", - true, false, "unordered", &mca_plm_poe_component.mp_stdoutmode); - - mca_base_param_reg_int(c, "debug", - "Whether or not to enable debugging output for the poe plm component (0 or 1)", - false, false, 0, &mca_plm_poe_component.debug); - mca_base_param_reg_int(c, "verbose", - "Verbose level", - true, false, 0, &mca_plm_poe_component.verbose); - mca_base_param_reg_int(c, "priority", - "Priority of the poe plm component", - false , false, 100, &mca_plm_poe_component.priority); - mca_base_param_reg_string(c, "orted", - "The command name that the poe plm component will invoke for the ORTE daemon", - false, false, "orted", &mca_plm_poe_component.orted); - mca_base_param_reg_string(c, "class", - "class (interactive or batch)", - true, false, "interactive", &mca_plm_poe_component.class); - mca_base_param_reg_string(c, "resource_allocation", - "resource_allocation mode (hostfile or automatic)", - false, false, "hostfile", &mca_plm_poe_component.resource_allocation); - mca_base_param_reg_string(c, "progenv", - "The command name that setup environment", - false, false, "env", &mca_plm_poe_component.env); - mca_base_param_reg_string(c, "progpoe", - "The POE command", - false, false, "poe", ¶m); - mca_plm_poe_component.argv = opal_argv_split(param, ' '); - mca_plm_poe_component.argc = opal_argv_count(mca_plm_poe_component.argv); - if (mca_plm_poe_component.argc > 0) { - mca_plm_poe_component.path = strdup(mca_plm_poe_component.argv[0]); - return ORTE_SUCCESS; - } else { - mca_plm_poe_component.path = NULL; - return ORTE_ERR_BAD_PARAM; - } - - - return ORTE_SUCCESS; -} - -/** -orte_plm_poe_component_init - initialize component, check if we can run on this machine. -@return error number -*/ -orte_plm_base_module_t *orte_plm_poe_component_init(int *priority) -{ - - mca_plm_poe_component.path = opal_path_findv(mca_plm_poe_component.argv[0], 0, environ, NULL); - if (NULL == mca_plm_poe_component.path) { - return NULL; - } - mca_plm_poe_component.env = opal_path_findv(mca_plm_poe_component.env, 0, environ, NULL); - if (NULL == mca_plm_poe_component.env) { - return NULL; - } - *priority = mca_plm_poe_component.priority; - return &orte_plm_poe_module; -} diff --git a/orte/mca/plm/poe/plm_poe_module.c b/orte/mca/plm/poe/plm_poe_module.c deleted file mode 100644 index 390bbb0407..0000000000 --- a/orte/mca/plm/poe/plm_poe_module.c +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TIME_H -#include -#endif - -#include "opal/mca/base/mca_base_param.h" -#include "opal/util/argv.h" -#include "opal/util/opal_environ.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/rmaps.h" -#include "orte/mca/rml/rml.h" -#include "orte/util/univ_info.h" -#include "orte/util/session_dir.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" - -/* remove for ORTE 2.0 */ -#include "orte/mca/sds/base/base.h" - -#include "orte/mca/plm/plm.h" -#include "orte/mca/plm/base/base.h" -#include "orte/mca/plm/base/plm_private.h" -#include "orte/mca/plm/poe/plm_poe.h" - -/* - * Local functions - */ -static int plm_poe_init(void); -static int plm_poe_launch_job(orte_job_t *jdata); -static int plm_poe_terminate_job(orte_jobid_t jobid); -static int plm_poe_terminate_orteds(void); -static int plm_poe_signal_job(orte_jobid_t jobid, int32_t signal); -static int plm_poe_finalize(void); - -orte_plm_base_module_t orte_plm_poe_module = { - plm_poe_init, - orte_plm_base_set_hnp_name, - plm_poe_launch_job, - plm_poe_terminate_job, - plm_poe_terminate_orteds, - plm_poe_signal_job, - plm_poe_finalize -}; - -/** - * Init the module - */ -int plm_poe_init(void) -{ - return ORTE_SUCCESS; -} -/** -poe_set_handler_default - set signal handler to default -@param sig signal [IN] -*/ -static void poe_set_handler_default(int sig) -{ - struct sigaction act; - - act.sa_handler = SIG_DFL; - act.sa_flags = 0; - sigemptyset(&act.sa_mask); - sigaction(sig, &act, (struct sigaction *)0); -} - -/** -poe_argv_append_int - append integer variable to argument variable -@param argc argument count [OUT] -@param argv argument variable [OUT] -@param varname variable name [IN] -@param min minimum value [IN] -@param argname argument name [IN] -*/ -static inline int poe_argv_append_int(int *argc, char ***argv, int varname, int min, char *argname) -{ - char *tmp_string; - if(varname >= min) { - opal_argv_append(argc, argv, argname); - asprintf(&tmp_string, "%d", varname); - opal_argv_append(argc, argv, tmp_string); - free(tmp_string); - } else { - return ORTE_ERR_BAD_PARAM; - } - return ORTE_SUCCESS; -} - -/** -@warning - THIS FUNCTION IS NOT USED. IT WILL BE USED WHEN FAULT-TOLERANCE FEATURE IS NEEDED -*/ - -#ifdef __FOR_LATER - -int plm_poe_launch_interactive_orted(orte_job_t *jdata) -{ - opal_list_t nodes, mapping_list; - opal_list_item_t* item; - orte_std_cntr_t num_nodes; - orte_vpid_t vpid; - int node_name_index1; - int node_name_index2; - int proc_name_index; - char *tmp_string; - char *uri, *param; - char* name_string; - char** argv; - int argc; - int pid; - int rc; - int i; - int status; - FILE *hfp, *cfp; - - /* Query the list of nodes allocated and mapped to this job. - * We need the entire mapping for a couple of reasons: - * - need the prefix to start with. - * - need to know if we are launching on a subset of the allocated nodes - * All other mapping responsibilities fall to orted in the fork PLM - */ - - if((mca_plm_poe_component.hostfile=tempnam(NULL,NULL))==NULL) return ORTE_ERR_OUT_OF_RESOURCE; - if((mca_plm_poe_component.cmdfile=tempnam(NULL,NULL))==NULL) return ORTE_ERR_OUT_OF_RESOURCE; - if((hfp=fopen(mca_plm_poe_component.hostfile,"w"))==NULL) return ORTE_ERR_OUT_OF_RESOURCE; - if((cfp=fopen(mca_plm_poe_component.cmdfile,"w"))==NULL) return ORTE_ERR_OUT_OF_RESOURCE; - - OBJ_CONSTRUCT(&nodes, opal_list_t); - OBJ_CONSTRUCT(&mapping_list, opal_list_t); - rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid); - if(ORTE_SUCCESS != rc) { - goto cleanup; - } - - /* - * Allocate a range of vpids for the daemons. - */ - - num_nodes = opal_list_get_size(&nodes); - if(num_nodes == 0) { - return ORTE_ERR_BAD_PARAM; - } - rc = orte_ns.reserve_range(0, num_nodes, &vpid); - if(ORTE_SUCCESS != rc) { - goto cleanup; - } - - /* application */ - argv = opal_argv_copy(opal_argv_split(mca_plm_poe_component.orted, ' ')); - argc = opal_argv_count(argv); - if (mca_plm_poe_component.debug) { - opal_argv_append(&argc, &argv, "--debug"); - } - opal_argv_append(&argc, &argv, "--debug-daemons"); - - /* need integer value for command line parameter - NOT hex */ - asprintf(&tmp_string, "%lu", (unsigned long)jobid); - - /* Add basic orted command line options */ - orte_plm_base_orted_append_basic_args(&argc, &argv, - &proc_name_index, - &node_name_index2, - tmp_string, - num_nodes - ); - free(tmp_string); - - /* - * Iterate through each of the nodes and spin - * up a daemon. - */ - - for(item = opal_list_get_first(&nodes); - item != opal_list_get_end(&nodes); - item = opal_list_get_next(item)) { - orte_ras_node_t* node = (orte_ras_node_t*)item; - orte_process_name_t* name; - pid_t pid; - - /* setup node name */ - argv[node_name_index2] = node->node_name; - - fprintf(hfp,"%s\n",node->node_name); - - /* initialize daemons process name */ - rc = orte_ns.create_process_name(&name, 0, vpid); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* setup process name */ - rc = orte_ns.get_proc_name_string(&name_string, name); - if(ORTE_SUCCESS != rc) { - opal_output(0, "orte_plm_poe: unable to create process name"); - return rc; - } - argv[proc_name_index] = name_string; - for(i=0;i 10) opal_output(0, "%s: --- END rc(%d) ---\n", __FUNCTION__, rc); - return rc; -} - -#endif - -/** -poe_wait_job - call back when POE finish -@param pid pid -@param status status -@param cbdata call back data -@return error number -*/ -static void poe_wait_job(pid_t pid, int status, void* cbdata) -{ - orte_job_map_t *map; - orte_std_cntr_t nnode, nproc; - orte_node_t **nodes; - orte_proc_t **procs; - int rc; - - /* query allocation for the job */ - if (NULL == (map = orte_rmaps.get_job_map(mca_plm_poe_component.jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - } - nodes = (orte_node_t**)map->nodes->addr; - - for(nnode=0; nnode < map->num_nodes; nnode++) { - orte_node_t* node = nodes[nnode]; - - procs = (orte_proc_t**)node->procs->addr; - for (nproc=0; nproc < node->num_procs; nproc++) { - orte_proc_t* proc = procs[nproc]; - - orte_session_dir_finalize(&(proc->name)); - proc->state = ORTE_PROC_STATE_ABORTED,; - } - } -} - -/** -poe_create_cmd_file - create POE command file -@param cfp command file pointer [IN] -@param context context [IN] -@param proc proc [IN] -@param vpid_range vpid range [IN] -@return error number -*/ -static int poe_create_cmd_file( - FILE *cfp, - orte_app_context_t* context, - orte_proc_t* proc, - orte_vpid_t vpid_range) -{ - int i; - - char* param; - char* param2; - char **environ_copy; - - /* setup base environment */ - environ_copy = NULL; - - /* setup hnp contact info */ - param2 = orte_rml.get_contact_info(); - param = mca_base_param_environ_variable("orte","hnp","uri"); - opal_setenv(param, param2, true, &environ_copy); - free(param); - free(param2); - - /* push data into environment */ - orte_sds_env_put(vpid_range, ORTE_VPID_INVALID, &environ_copy); - - /* pass the jobid */ - orte_util_convert_jobid_to_string(¶m2, proc->name.jobid); - param = mca_base_param_environ_variable("orte","sds","jobid"); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(param2); - - /* pass the vpid */ - orte_util_convert_vpid_to_string(¶m2, proc->name.vpid); - param = mca_base_param_environ_variable("orte","sds","vpid"); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(param2); - - if (context->argv == NULL) { - context->argv = malloc(sizeof(char*)*2); - context->argv[0] = strdup(context->app); - context->argv[1] = NULL; - } - - i=0; - fprintf(cfp,"%s",mca_plm_poe_component.env); - while(environ_copy[i]!=NULL) { - fprintf(cfp," %s",environ_copy[i++]); - } - opal_argv_free(environ_copy); - fprintf(cfp," %s",context->app); - i=1; - while(context->argv[i]!=NULL) { - fprintf(cfp," %s",context->argv[i++]); - } - - /* POE will upset if the file doesn't contain end of line. */ - fprintf(cfp,"\n"); - - return ORTE_SUCCESS; -} - -/** -poe_launch_interactive - launch an interactive job -@param jobid JOB Identifier [IN] -@return error number -*/ -static inline int poe_launch_interactive_job(orte_job_t *jdata) -{ - orte_job_map_t *map; - orte_std_cntr_t nnode, nproc; - FILE *hfp, *cfp; - char** argv; - int argc; - int rc, pid; - sigset_t sigs; - orte_node_t **nodes; - orte_proc_t **procs; - orte_app_context_t **apps; - - if( (NULL==(mca_plm_poe_component.cmdfile=tempnam(NULL,NULL))) || - (NULL==(cfp=fopen(mca_plm_poe_component.cmdfile,"w"))) ) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - mca_plm_poe_component.jobid = jdata->jobid; - - /* get the map for this job */ - if (NULL == (map = orte_rmaps.get_job_map(active_job))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - nodes = (orte_node_t**)map->nodes->addr; - apps = (orte_app_context_t**)jdata->apps->addr; - - if(!strncmp(mca_plm_poe_component.resource_allocation,"hostfile",8)) { - - /* Create a temporary hostlist file if user specify */ - - if( (NULL==(mca_plm_poe_component.hostfile=tempnam(NULL,NULL))) || - (NULL==(hfp=fopen(mca_plm_poe_component.hostfile,"w"))) ) { - return ORTE_ERR_OUT_OF_RESOURCE; - } - for(nnode=0; nnode < map->num_nodes; nnode++) { - fprintf(hfp,"%s\n",nodes[nnode]->name); - } - fclose(hfp); - } - - /* Create a temporary POE command file */ - - for(nnode=0; nnode < map->num_nodes; nnode++) { - orte_node_t* node = nodes[nnode]; - procs = (orte_proc_t**)node->procs->addr; - for (nproc=0; nproc < node->num_procs; nproc++) { - rc = poe_create_cmd_file(cfp, apps[procs[nproc]->app_idx], procs[nproc], jdata->num_procs); - if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - } - } - fclose(cfp); - - /* Generate POE command line */ - - argv = opal_argv_copy(mca_plm_poe_component.argv); - argc = mca_plm_poe_component.argc; - - if(!strncmp(mca_plm_poe_component.resource_allocation,"hostfile",8)) { - opal_argv_append(&argc, &argv, "-hostfile"); - opal_argv_append(&argc, &argv, mca_plm_poe_component.hostfile); - opal_argv_append(&argc, &argv, "-resd"); - opal_argv_append(&argc, &argv, "no"); - rc=poe_argv_append_int(&argc, &argv, map->num_nodes, 1, "-nodes"); - if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - } - - opal_argv_append(&argc, &argv, "-pgmmodel"); - opal_argv_append(&argc, &argv, "mpmd"); - opal_argv_append(&argc, &argv, "-cmdfile"); - opal_argv_append(&argc, &argv, mca_plm_poe_component.cmdfile); - opal_argv_append(&argc, &argv, "-labelio"); - opal_argv_append(&argc, &argv, mca_plm_poe_component.mp_labelio); - opal_argv_append(&argc, &argv, "-stdoutmode"); - opal_argv_append(&argc, &argv, mca_plm_poe_component.mp_stdoutmode); - - rc=poe_argv_append_int(&argc, &argv, jdata->num_procs, 1, "-procs"); - if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc=poe_argv_append_int(&argc, &argv, mca_plm_poe_component.mp_retry, 0, "-retry"); - if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc=poe_argv_append_int(&argc, &argv, mca_plm_poe_component.mp_retrycount, 0, "-retrycount"); - if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc=poe_argv_append_int(&argc, &argv, mca_plm_poe_component.mp_infolevel, 0, "-infolevel"); - if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - - if(mca_plm_poe_component.verbose>10) { - opal_output(0, "POE cmdline %s\n", opal_argv_join(argv, ' ')); - } - - /* Start job with POE */ - - pid = fork(); - if(pid < 0) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if(pid == 0) { - poe_set_handler_default(SIGTERM); - poe_set_handler_default(SIGINT); - poe_set_handler_default(SIGHUP); - poe_set_handler_default(SIGCHLD); - poe_set_handler_default(SIGPIPE); - sigprocmask(0, 0, &sigs); - sigprocmask(SIG_UNBLOCK, &sigs, 0); - execv(mca_plm_poe_component.path, argv); - opal_output(0, "orte_plm_poe: execv failed with errno=%d\n", errno); - exit(-1); - } else { - orte_wait_cb(pid, poe_wait_job, NULL); - } - - -cleanup: - OBJ_RELEASE(map); - - return rc; -} - -/** -plm_poe_launch - launch a POE job -@warning current support interactive class only!. -@param jobid JOB Identifier [IN] -@return error number -*/ -static int plm_poe_launch_job(orte_job_t *jdata) -{ - if(0 == strncmp(mca_plm_poe_component.class,"interactive",11)) { - return poe_launch_interactive_job(jdata); - } - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int plm_poe_terminate_job(orte_jobid_t jobid) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -static int plm_poe_terminate_orteds(void) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int plm_poe_signal_job(orte_jobid_t jobid, int32_t signal) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - - -/** -plm_poe_finalize - clean up temporary files -@return error number -*/ -static int plm_poe_finalize(void) -{ - unlink(mca_plm_poe_component.cmdfile); - unlink(mca_plm_poe_component.hostfile); - return ORTE_SUCCESS; -}