From 1af69c389dca267197c584baf93db52f528c056e Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Mon, 9 May 2005 16:28:05 +0000 Subject: [PATCH] * merge xgrid pls/ras components from bwbw-xgrid tmp branch into the trunk This commit was SVN r5651. --- src/mca/pls/xgrid/.ompi_ignore | 0 src/mca/pls/xgrid/.ompi_unignore | 2 + src/mca/pls/xgrid/Makefile.am | 56 +++++ src/mca/pls/xgrid/VERSION | 37 +++ src/mca/pls/xgrid/configure.params | 19 ++ src/mca/pls/xgrid/configure.stub | 39 +++ src/mca/pls/xgrid/ompi_objc.m4 | 25 ++ src/mca/pls/xgrid/src/pls_xgrid.h | 37 +++ src/mca/pls/xgrid/src/pls_xgrid_client.h | 71 ++++++ src/mca/pls/xgrid/src/pls_xgrid_client.m | 265 ++++++++++++++++++++ src/mca/pls/xgrid/src/pls_xgrid_component.m | 171 +++++++++++++ src/mca/pls/xgrid/src/pls_xgrid_module.m | 230 +++++++++++++++++ src/mca/ras/xgrid/.ompi_ignore | 0 src/mca/ras/xgrid/.ompi_unignore | 2 + src/mca/ras/xgrid/Makefile.am | 52 ++++ src/mca/ras/xgrid/VERSION | 37 +++ src/mca/ras/xgrid/configure.params | 19 ++ src/mca/ras/xgrid/configure.stub | 31 +++ src/mca/ras/xgrid/src/ras_xgrid.h | 38 +++ src/mca/ras/xgrid/src/ras_xgrid_component.c | 80 ++++++ src/mca/ras/xgrid/src/ras_xgrid_module.c | 167 ++++++++++++ 21 files changed, 1378 insertions(+) create mode 100644 src/mca/pls/xgrid/.ompi_ignore create mode 100644 src/mca/pls/xgrid/.ompi_unignore create mode 100644 src/mca/pls/xgrid/Makefile.am create mode 100644 src/mca/pls/xgrid/VERSION create mode 100644 src/mca/pls/xgrid/configure.params create mode 100644 src/mca/pls/xgrid/configure.stub create mode 100644 src/mca/pls/xgrid/ompi_objc.m4 create mode 100644 src/mca/pls/xgrid/src/pls_xgrid.h create mode 100644 src/mca/pls/xgrid/src/pls_xgrid_client.h create mode 100644 src/mca/pls/xgrid/src/pls_xgrid_client.m create mode 100644 src/mca/pls/xgrid/src/pls_xgrid_component.m create mode 100644 src/mca/pls/xgrid/src/pls_xgrid_module.m create mode 100644 src/mca/ras/xgrid/.ompi_ignore create mode 100644 src/mca/ras/xgrid/.ompi_unignore create mode 100644 src/mca/ras/xgrid/Makefile.am create mode 100644 src/mca/ras/xgrid/VERSION create mode 100644 src/mca/ras/xgrid/configure.params create mode 100644 src/mca/ras/xgrid/configure.stub create mode 100644 src/mca/ras/xgrid/src/ras_xgrid.h create mode 100644 src/mca/ras/xgrid/src/ras_xgrid_component.c create mode 100644 src/mca/ras/xgrid/src/ras_xgrid_module.c diff --git a/src/mca/pls/xgrid/.ompi_ignore b/src/mca/pls/xgrid/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/mca/pls/xgrid/.ompi_unignore b/src/mca/pls/xgrid/.ompi_unignore new file mode 100644 index 0000000000..e4f6cc316d --- /dev/null +++ b/src/mca/pls/xgrid/.ompi_unignore @@ -0,0 +1,2 @@ +brbarret +bbarrett diff --git a/src/mca/pls/xgrid/Makefile.am b/src/mca/pls/xgrid/Makefile.am new file mode 100644 index 0000000000..97d71783bb --- /dev/null +++ b/src/mca/pls/xgrid/Makefile.am @@ -0,0 +1,56 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Use the top-level Makefile.options + +include $(top_ompi_srcdir)/config/Makefile.options + +EXTRA_DIST = VERSION + +include $(top_ompi_srcdir)/config/Makefile.options + +AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include \ + -I$(top_ompi_srcdir)/src -I$(top_ompi_srcdir)/src/include + +xgrid_sources = \ + src/pls_xgrid.h \ + src/pls_xgrid_component.m \ + src/pls_xgrid_module.m \ + src/pls_xgrid_client.h \ + src/pls_xgrid_client.m + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_pls_xgrid_DSO +component_noinst = +component_install = mca_pls_xgrid.la +else +component_noinst = libmca_pls_xgrid.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_pls_xgrid_la_SOURCES = $(xgrid_sources) +mca_pls_xgrid_la_LIBADD = +mca_pls_xgrid_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pls_xgrid_la_SOURCES = $(xgrid_sources) +libmca_pls_xgrid_la_LIBADD = +libmca_pls_xgrid_la_LDFLAGS = -module -avoid-version diff --git a/src/mca/pls/xgrid/VERSION b/src/mca/pls/xgrid/VERSION new file mode 100644 index 0000000000..ea4eb96e38 --- /dev/null +++ b/src/mca/pls/xgrid/VERSION @@ -0,0 +1,37 @@ +# This is the VERSION file for Open MPI, describing the precise +# version of Open MPI in this distribution. The various components of +# the version number below are combined to form a single version +# number string. + +# major, minor, and release are generally combined in the form +# ... If minor and release are both zero, then +# release is omitted. + +major=1 +minor=0 +release=0 + +# Only one of alpha or beta can be nonzero. If both are zero, alpha +# will be preferred. If alpha is nonzero, it will be appended to the +# version string as "a". If beta is nonzero (and alpha is +# zero), it will be appended to the version string as "b". + +alpha=0 +beta=0 + +# If want_svn=1, then the SVN r number will be included in the overall +# Open MPI version number in some form. + +want_svn=1 + +# If svn_r=-1, then the SVN r numbere will be obtained dynamically at +# run time, either 1) via the "svnversion" command (if this is a +# Subversion checkout) in the form "r", or b) with the date (if +# this is not a Subversion checkout, and the svnversion command cannot +# be used) in the form of "svn". Alternatively, if svn_r is not +# -1, the value of svn_r will be directly appended to the version +# string. This happens during "make dist", for example: if the +# distribution tarball is being made from an SVN checkout, the value +# of svn_r in this file is replaced with the output of "svnversion". + +svn_r=-1 diff --git a/src/mca/pls/xgrid/configure.params b/src/mca/pls/xgrid/configure.params new file mode 100644 index 0000000000..e925602e59 --- /dev/null +++ b/src/mca/pls/xgrid/configure.params @@ -0,0 +1,19 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_INIT_FILE=src/pls_xgrid_component.m +PARAM_CONFIG_FILES="Makefile" diff --git a/src/mca/pls/xgrid/configure.stub b/src/mca/pls/xgrid/configure.stub new file mode 100644 index 0000000000..6778e2bfcb --- /dev/null +++ b/src/mca/pls/xgrid/configure.stub @@ -0,0 +1,39 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sinclude(ompi_objc.m4) + +AC_DEFUN([MCA_CONFIGURE_STUB],[ + +AC_PROG_OBJC + +AC_MSG_CHECKING([For XGridFoundation framework]) +save_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS -framework XGridFoundation" +AC_TRY_LINK([],[;],[HAPPY="yes"],[HAPPY="no"]) +CFLAGS="$save_CFLAGS" +AC_MSG_RESULT([$HAPPY]) + +if test "$HAPPY" = "no" ; then + AC_MSG_ERROR([*** Can not build xgrid pls]) +fi + +OBJCFLAGS="-F XGridFoundation" +LDFLAGS="-framework XGridFoundation -framework Foundation" + + +])dnl diff --git a/src/mca/pls/xgrid/ompi_objc.m4 b/src/mca/pls/xgrid/ompi_objc.m4 new file mode 100644 index 0000000000..d1ba0084f1 --- /dev/null +++ b/src/mca/pls/xgrid/ompi_objc.m4 @@ -0,0 +1,25 @@ +# -*- autoconf -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_MAKEVAR([OBJC], [AC_PROG_OBJC]) + +AC_DEFUN([AC_PROG_OBJC], [ + AC_ARG_VAR([OBJC], [Objective C compiler command]) + AC_ARG_VAR([OBJCFLAGS], [Objective C compiler flags]) + AC_CHECK_TOOLS(OBJC, [$CCC m4_default([$1], [gcc cc objc])], gcc) + _AM_DEPENDENCIES([OBJC]) +])dnl diff --git a/src/mca/pls/xgrid/src/pls_xgrid.h b/src/mca/pls/xgrid/src/pls_xgrid.h new file mode 100644 index 0000000000..00e920fb52 --- /dev/null +++ b/src/mca/pls/xgrid/src/pls_xgrid.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + * NOTE: This header is an Objective-C file. It might not do what + * you intend with a C/C++ compiler + */ + +#import "mca/pls/pls.h" +#import "pls_xgrid_client.h" + +/** + * PLS Component + */ +struct orte_pls_xgrid_component_t { + orte_pls_base_component_t super; + PlsXgridClient *client; + NSAutoreleasePool *pool; +}; +typedef struct orte_pls_xgrid_component_t orte_pls_xgrid_component_t; +extern orte_pls_xgrid_component_t mca_pls_xgrid_component; +extern orte_pls_base_module_1_0_0_t orte_pls_xgrid_module; + +int orte_pls_xgrid_progress(void); diff --git a/src/mca/pls/xgrid/src/pls_xgrid_client.h b/src/mca/pls/xgrid/src/pls_xgrid_client.h new file mode 100644 index 0000000000..85f1f4e4ce --- /dev/null +++ b/src/mca/pls/xgrid/src/pls_xgrid_client.h @@ -0,0 +1,71 @@ +/* -*- ObjC -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#import +#import +#import + +#import "threads/condition.h" +#include "mca/ns/ns_types.h" + +@interface PlsXgridClient : NSObject +{ + NSString *orted; + NSString *controller_hostname; + NSString *controller_password; + + ompi_condition_t startup_cond; + volatile int startup_val; + + /* state of the world... */ + ompi_condition_t state_cond; + ompi_mutex_t state_mutex; + + XGConnection *connection; + XGTwoWayRandomAuthenticator *authenticator; + XGController *controller; + XGGrid *grid; +} + +/* init / finalize */ +-(id) init; +-(void) dealloc; + +/* accessors */ +-(NSString*) getOrted; + +-(void) setOrtedAsCString: (char*) name; +-(void) setControllerPasswordAsCString: (char*) name; +-(void) setControllerHostnameAsCString: (char*) password; + + +/* interface for launch */ +-(int) connect; +-(int) launchJob:(orte_jobid_t) jobid; + +/* delegate for changes */ +-(void) connectionDidOpen:(XGConnection*) connection; +-(void) connectionDidNotOpen:(XGConnection*)connection withError:(NSError*) error; +-(void) connectionDidClose:(XGConnection *) connection; + +@end diff --git a/src/mca/pls/xgrid/src/pls_xgrid_client.m b/src/mca/pls/xgrid/src/pls_xgrid_client.m new file mode 100644 index 0000000000..96afc6dea5 --- /dev/null +++ b/src/mca/pls/xgrid/src/pls_xgrid_client.m @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#import "ompi_config.h" +#import "pls_xgrid_config.h" + +#import + +#import "mca/pls/base/base.h" +#import "include/orte_constants.h" +#import "include/constants.h" +#import "mca/ns/ns.h" +#import "mca/ras/base/ras_base_node.h" +#import "mca/rml/rml.h" +#import "util/path.h" + +#import "pls_xgrid_client.h" + +char **environ; + +@implementation PlsXgridClient + +/* init / finalize */ +-(id) init +{ + if (self = [super init]) { + /* class-specific initialization goes here */ + OBJ_CONSTRUCT(&state_cond, ompi_condition_t); + OBJ_CONSTRUCT(&state_mutex, ompi_mutex_t); + } + return self; +} + + +-(void) dealloc +{ + /* need to shut down connection */ + [connection finalize]; + + OBJ_DESTRUCT(&state_mutex); + OBJ_DESTRUCT(&state_cond); + [super dealloc]; +} + + +/* accessors */ +-(NSString*) getOrted +{ + return orted; +} + + +-(void) setOrtedAsCString: (char*) name +{ + orted = [NSString stringWithCString: name]; +} + + +-(void) setControllerPasswordAsCString: (char*) name +{ + controller_password = [NSString stringWithCString: name]; +} + + +-(void) setControllerHostnameAsCString: (char*) password +{ + controller_hostname = [NSString stringWithCString: password]; +} + + +/* interface for launch */ +-(int) connect +{ + connection = [[[XGConnection alloc] initWithHostname: controller_hostname + portnumber:0] autorelease]; + authenticator = [[[XGTwoWayRandomAuthenticator alloc] init] autorelease]; + + /* this seems to be hard coded */ + [authenticator setUsername:@"one-xgrid-client"]; + [authenticator setPassword:controller_password]; + + [connection setAuthenticator:authenticator]; + [connection setDelegate: self]; + + /* get us connected */ + ompi_mutex_lock(&state_mutex); + [connection open]; + while ([connection state] == XGConnectionStateOpening) { + ompi_condition_wait(&state_cond, &state_mutex); + } + ompi_mutex_unlock(&state_mutex); + + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: connection name: %s", [[connection name] cString]); + + controller = [[XGController alloc] initWithConnection:connection]; + ompi_progress(); + grid = [controller defaultGrid]; + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: grid name: %s", [[grid name] cString]); + + return ORTE_SUCCESS; +} + + +-(int) launchJob:(orte_jobid_t) jobid +{ + ompi_list_t nodes; + ompi_list_item_t *item; + int ret; + size_t num_nodes; + orte_vpid_t vpid; + int i = 0; + char *orted_path; + + /* find orted */ + orted_path = ompi_path_findv((char*) [orted cString], 0, environ, NULL); + + /* query the list of nodes allocated to the job */ + OBJ_CONSTRUCT(&nodes, ompi_list_t); + ret = orte_ras_base_node_query_alloc(&nodes, jobid); + if (ORTE_SUCCESS != ret) goto cleanup; + + /* allocate vpids for the daemons */ + num_nodes = ompi_list_get_size(&nodes); + if (num_nodes == 0) return OMPI_ERR_BAD_PARAM; + ret = orte_ns.reserve_range(0, num_nodes, &vpid); + if (ORTE_SUCCESS != ret) goto cleanup; + + /* build up the array of task specifications */ + NSMutableDictionary *taskSpecifications = [NSMutableDictionary dictionary]; + + for (item = ompi_list_get_first(&nodes); + item != ompi_list_get_end(&nodes); + item = ompi_list_get_next(item)) { + orte_ras_base_node_t* node = (orte_ras_base_node_t*)item; + orte_process_name_t* name; + char *name_str, *nsuri, *gpruri; + + ret = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid); + if(ORTE_SUCCESS != ret) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + ret = orte_ns.get_proc_name_string(&name_str, name); + if (ORTE_SUCCESS != ret) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + if (NULL != orte_process_info.ns_replica_uri) { + nsuri = strdup(orte_process_info.ns_replica_uri); + } else { + nsuri = orte_rml.get_uri(); + } + + if (NULL != orte_process_info.gpr_replica_uri) { + gpruri = strdup(orte_process_info.gpr_replica_uri); + } else { + gpruri = orte_rml.get_uri(); + } + + NSMutableDictionary *task = [NSMutableDictionary dictionary]; + [task setObject: [NSString stringWithCString: orted_path] + forKey: XGJobSpecificationCommandKey]; + NSArray *taskArguments = + [NSArray arrayWithObjects: @"--no-daemonize", + @"--bootproxy", [NSString stringWithFormat: @"%d", jobid], + @"--name", [NSString stringWithCString: name_str], + @"--nodename", [NSString stringWithFormat: @"xgrid-node-%d", i], + @"--nsreplica", [NSString stringWithCString: nsuri], + @"--gprreplica", [NSString stringWithCString: gpruri], + nil]; + [task setObject: taskArguments forKey: XGJobSpecificationArgumentsKey]; + + [taskSpecifications setObject: task + forKey: [NSString stringWithFormat: @"%d", i]]; + + free(name_str); free(nsuri); free(gpruri); + + vpid++; i++; + } + + /* job specification */ + NSMutableDictionary *jobSpecification = [NSMutableDictionary dictionary]; + [jobSpecification setObject:XGJobSpecificationTypeTaskListValue + forKey:XGJobSpecificationTypeKey]; + [jobSpecification setObject: [NSString stringWithFormat: + @"org.open-mpi.pls.xgrid"] + forKey:XGJobSpecificationSubmissionIdentifierKey]; + [jobSpecification setObject: [NSString stringWithFormat: @"Open MPI Job %d", jobid] + forKey:XGJobSpecificationNameKey]; + [jobSpecification setObject:taskSpecifications + forKey:XGJobSpecificationTaskSpecificationsKey]; + + /* Submit the request and get our monitor */ + XGActionMonitor *actionMonitor = + [controller performSubmitJobActionWithJobSpecification: jobSpecification + gridIdentifier: nil]; + + /* wait until we have some idea if job succeeded or not */ + while (XGActionMonitorOutcomeNone == [actionMonitor outcome]) { + ompi_progress(); + } + + /* we should have a result - find out if it worked */ + if (XGActionMonitorOutcomeSuccess == [actionMonitor outcome]) { + ret = OMPI_SUCCESS; + } else { + NSError *err = [actionMonitor error]; + printf("launch failed: %s\n", [[err localizedFailureReason] cString]); + ret = OMPI_ERROR; + } + +cleanup: + while(NULL != (item = ompi_list_remove_first(&nodes))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&nodes); + return ret; +} + + +/* delegate for changes */ +-(void) connectionDidOpen:(XGConnection*) connection +{ + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: got connectionDidOpen message"); + ompi_condition_broadcast(&state_cond); +} + +-(void) connectionDidNotOpen:(XGConnection*) connection withError: (NSError*) error +{ + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: got connectionDidNotOpen message"); + ompi_condition_broadcast(&state_cond); +} + +-(void) connectionDidClose:(XGConnection*) connection; +{ + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: got connectionDidClose message"); + ompi_condition_broadcast(&state_cond); +} + +@end diff --git a/src/mca/pls/xgrid/src/pls_xgrid_component.m b/src/mca/pls/xgrid/src/pls_xgrid_component.m new file mode 100644 index 0000000000..538f18b32e --- /dev/null +++ b/src/mca/pls/xgrid/src/pls_xgrid_component.m @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#import "ompi_config.h" + +#import +#import + +#import "include/orte_constants.h" +#import "util/argv.h" +#import "util/path.h" +#import "util/basename.h" +#import "mca/pls/pls.h" +#import "mca/pls/base/base.h" +#import "mca/pls/xgrid/pls-xgrid-version.h" +#import "mca/base/mca_base_param.h" +#import "mca/rml/rml.h" + +#import "pls_xgrid.h" +#import "pls_xgrid_client.h" + + +/* + * Public string showing the pls ompi_xgrid component version number + */ +const char *mca_pls_xgrid_component_version_string = + "Open MPI xgrid pls MCA component version " MCA_pls_xgrid_VERSION; + +int orte_pls_xgrid_component_open(void); +int orte_pls_xgrid_component_close(void); +orte_pls_base_module_t * orte_pls_xgrid_component_init(int *priority); + + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +orte_pls_xgrid_component_t mca_pls_xgrid_component = { + { + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + /* Indicate that we are a pls v1.0.0 component (which also + implies a specific MCA version) */ + + ORTE_PLS_BASE_VERSION_1_0_0, + + /* Component name and version */ + + "xgrid", + MCA_pls_xgrid_MAJOR_VERSION, + MCA_pls_xgrid_MINOR_VERSION, + MCA_pls_xgrid_RELEASE_VERSION, + + /* Component open and close functions */ + + orte_pls_xgrid_component_open, + orte_pls_xgrid_component_close + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* Whether the component is checkpointable or not */ + + true + }, + + /* Initialization / querying functions */ + + orte_pls_xgrid_component_init + } +}; + + +int +orte_pls_xgrid_component_open(void) +{ + mca_base_param_register_string("pls", "xgrid", "orted", NULL, "orted"); + mca_base_param_register_int("pls", "xgrid", "priority", NULL, 20); + + return ORTE_SUCCESS; +} + + +int +orte_pls_xgrid_component_close(void) +{ + return ORTE_SUCCESS; +} + + +orte_pls_base_module_t * +orte_pls_xgrid_component_init(int *priority) +{ + int param; + char *string; + int ret; + + if (NULL == getenv("XGRID_CONTROLLER_HOSTNAME") || + NULL == getenv("XGRID_CONTROLLER_PASSWORD")) { + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: controller info not set"); + return NULL; + } + + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: initializing PlsXgridClient"); + mca_pls_xgrid_component.pool = [[NSAutoreleasePool alloc] init]; + mca_pls_xgrid_component.client = [[PlsXgridClient alloc] init]; + + /* setup daemon name */ + param = mca_base_param_find("pls", "xgrid", "orted"); + mca_base_param_lookup_string(param, &string); + [mca_pls_xgrid_component.client setOrtedAsCString: string]; + if (NULL != string) free(string); + + /* setup contact information */ + [mca_pls_xgrid_component.client setControllerPasswordAsCString: + getenv("XGRID_CONTROLLER_PASSWORD")]; + [mca_pls_xgrid_component.client setControllerHostnameAsCString: + getenv("XGRID_CONTROLLER_HOSTNAME")]; + + /* info we need */ + param = mca_base_param_find("pls", "xgrid", "priority"); + mca_base_param_lookup_int(param, priority); + + ompi_progress_register(orte_pls_xgrid_progress); + + ompi_output(orte_pls_base.pls_output, "pls: xgrid: initialized"); + + ret = [mca_pls_xgrid_component.client connect]; + if (ret != ORTE_SUCCESS) { + ompi_output(orte_pls_base.pls_output, + "pls: xgrid: connection failed"); + orte_pls_xgrid_finalize(); + } + + return &orte_pls_xgrid_module; +} + + +int +orte_pls_xgrid_progress(void) +{ + /* tick the event loop */ + [[NSRunLoop currentRunLoop] runUntilDate: + [NSDate dateWithTimeIntervalSinceNow:1]]; +} diff --git a/src/mca/pls/xgrid/src/pls_xgrid_module.m b/src/mca/pls/xgrid/src/pls_xgrid_module.m new file mode 100644 index 0000000000..d164f387b5 --- /dev/null +++ b/src/mca/pls/xgrid/src/pls_xgrid_module.m @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#import "ompi_config.h" +#import +#import +#import +#import +#import +#import +#import +#import + +#import "include/orte_constants.h" +#import "util/argv.h" +#import "util/output.h" +#import "util/session_dir.h" +#import "event/event.h" +#import "runtime/orte_wait.h" +#import "mca/ns/ns.h" +#import "mca/pls/pls.h" +#import "mca/rml/rml.h" +#import "mca/gpr/gpr.h" +#import "mca/errmgr/errmgr.h" +#import "mca/ras/base/ras_base_node.h" +#import "mca/rmaps/base/rmaps_base_map.h" +#import "mca/rmgr/base/base.h" +#import "mca/soh/soh.h" +#import "mca/soh/base/base.h" +#import "pls_xgrid.h" + +int orte_pls_xgrid_launch(orte_jobid_t jobid); +int orte_pls_xgrid_terminate_job(orte_jobid_t jobid); +int orte_pls_xgrid_terminate_proc(const orte_process_name_t* proc); +int orte_pls_xgrid_finalize(void); + + +orte_pls_base_module_1_0_0_t orte_pls_xgrid_module = { + orte_pls_xgrid_launch, + orte_pls_xgrid_terminate_job, + orte_pls_xgrid_terminate_proc, + orte_pls_xgrid_finalize +}; + +/** + * Launch a daemon (bootproxy) on each node. The daemon will be responsible + * for launching the application. + */ +int +orte_pls_xgrid_launch(orte_jobid_t jobid) +{ + return [mca_pls_xgrid_component.client launchJob:jobid]; +} + + +/** + * Wait for a pending job to complete. + */ + +static void +orte_pls_xgrid_terminate_job_rsp(int status, + orte_process_name_t* peer, + orte_buffer_t* rsp, + orte_rml_tag_t tag, + void* cbdata) +{ + int rc; + if (ORTE_SUCCESS != (rc = orte_rmgr_base_unpack_rsp(rsp))) { + ORTE_ERROR_LOG(rc); + } +} + + +static void +orte_pls_xgrid_terminate_job_cb(int status, + orte_process_name_t* peer, + orte_buffer_t* req, + orte_rml_tag_t tag, + void* cbdata) +{ + /* wait for response */ + int rc; + if(status < 0) { + ORTE_ERROR_LOG(status); + OBJ_RELEASE(req); + return; + } + + if(0 > (rc = orte_rml.recv_buffer_nb(peer, ORTE_RML_TAG_RMGR_CLNT, + 0, orte_pls_xgrid_terminate_job_rsp, NULL))) { + ORTE_ERROR_LOG(rc); + } + OBJ_RELEASE(req); +} + + +/** + * Query the registry for all nodes participating in the job + */ +int +orte_pls_xgrid_terminate_job(orte_jobid_t jobid) +{ + char *keys[2]; + char *jobid_string; + orte_gpr_value_t** values = NULL; + size_t i, j, num_values = 0; + int rc; + + if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); + keys[1] = NULL; + + rc = orte_gpr.get( + ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR, + ORTE_NODE_SEGMENT, + NULL, + keys, + &num_values, + &values + ); + if(rc != ORTE_SUCCESS) { + free(jobid_string); + return rc; + } + if(0 == num_values) { + rc = ORTE_ERR_NOT_FOUND; + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + for(i=0; icnt; j++) { + orte_gpr_keyval_t* keyval = value->keyvals[j]; + orte_buffer_t *cmd = OBJ_NEW(orte_buffer_t); + int ret; + if(cmd == NULL) { + rc = ORTE_ERR_OUT_OF_RESOURCE; + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if(strcmp(keyval->key, keys[0]) != 0) + continue; + + /* construct command */ + ret = orte_rmgr_base_pack_cmd(cmd, ORTE_RMGR_CMD_TERM_JOB, jobid); + if(ORTE_SUCCESS != ret) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(cmd); + rc = ret; + continue; + } + + /* send a terminate message to the bootproxy on each node */ + if(0 > (ret = orte_rml.send_buffer_nb( + &keyval->value.proc, + cmd, + ORTE_RML_TAG_RMGR_SVC, + 0, + orte_pls_xgrid_terminate_job_cb, + NULL))) { + + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(cmd); + rc = ret; + continue; + } + } + } + +cleanup: + + free(jobid_string); + free(keys[0]); + + if(NULL != values) { + for(i=0; i_.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_ras_xgrid_DSO +component_noinst = +component_install = mca_ras_xgrid.la +else +component_noinst = libmca_ras_xgrid.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_ras_xgrid_la_SOURCES = $(xgrid_sources) +mca_ras_xgrid_la_LIBADD = +mca_ras_xgrid_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_ras_xgrid_la_SOURCES = $(xgrid_sources) +libmca_ras_xgrid_la_LIBADD = +libmca_ras_xgrid_la_LDFLAGS = -module -avoid-version diff --git a/src/mca/ras/xgrid/VERSION b/src/mca/ras/xgrid/VERSION new file mode 100644 index 0000000000..ea4eb96e38 --- /dev/null +++ b/src/mca/ras/xgrid/VERSION @@ -0,0 +1,37 @@ +# This is the VERSION file for Open MPI, describing the precise +# version of Open MPI in this distribution. The various components of +# the version number below are combined to form a single version +# number string. + +# major, minor, and release are generally combined in the form +# ... If minor and release are both zero, then +# release is omitted. + +major=1 +minor=0 +release=0 + +# Only one of alpha or beta can be nonzero. If both are zero, alpha +# will be preferred. If alpha is nonzero, it will be appended to the +# version string as "a". If beta is nonzero (and alpha is +# zero), it will be appended to the version string as "b". + +alpha=0 +beta=0 + +# If want_svn=1, then the SVN r number will be included in the overall +# Open MPI version number in some form. + +want_svn=1 + +# If svn_r=-1, then the SVN r numbere will be obtained dynamically at +# run time, either 1) via the "svnversion" command (if this is a +# Subversion checkout) in the form "r", or b) with the date (if +# this is not a Subversion checkout, and the svnversion command cannot +# be used) in the form of "svn". Alternatively, if svn_r is not +# -1, the value of svn_r will be directly appended to the version +# string. This happens during "make dist", for example: if the +# distribution tarball is being made from an SVN checkout, the value +# of svn_r in this file is replaced with the output of "svnversion". + +svn_r=-1 diff --git a/src/mca/ras/xgrid/configure.params b/src/mca/ras/xgrid/configure.params new file mode 100644 index 0000000000..cf74752b5c --- /dev/null +++ b/src/mca/ras/xgrid/configure.params @@ -0,0 +1,19 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_INIT_FILE=src/ras_xgrid_component.c +PARAM_CONFIG_FILES="Makefile" diff --git a/src/mca/ras/xgrid/configure.stub b/src/mca/ras/xgrid/configure.stub new file mode 100644 index 0000000000..1343031fea --- /dev/null +++ b/src/mca/ras/xgrid/configure.stub @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AC_DEFUN([MCA_CONFIGURE_STUB],[ + +AC_MSG_CHECKING([For XGridFoundation framework]) +save_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS -framework XGridFoundation" +AC_TRY_LINK([],[;],[HAPPY="yes"],[HAPPY="no"]) +CFLAGS="$save_CFLAGS" +AC_MSG_RESULT([$HAPPY]) + +if test "$HAPPY" = "no" ; then + AC_MSG_ERROR([*** Can not build xgrid ras]) +fi + +])dnl diff --git a/src/mca/ras/xgrid/src/ras_xgrid.h b/src/mca/ras/xgrid/src/ras_xgrid.h new file mode 100644 index 0000000000..f8f2cc7833 --- /dev/null +++ b/src/mca/ras/xgrid/src/ras_xgrid.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Resource Allocation (xgrid) + */ +#ifndef ORTE_RAS_xgrid_H +#define ORTE_RAS_xgrid_H + +#include "mca/ras/ras.h" +#include "mca/ras/base/base.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + OMPI_COMP_EXPORT extern orte_ras_base_component_1_0_0_t mca_ras_xgrid_component; + OMPI_COMP_EXPORT extern orte_ras_base_module_t orte_ras_xgrid_module; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/src/mca/ras/xgrid/src/ras_xgrid_component.c b/src/mca/ras/xgrid/src/ras_xgrid_component.c new file mode 100644 index 0000000000..67d9ee9243 --- /dev/null +++ b/src/mca/ras/xgrid/src/ras_xgrid_component.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include "include/orte_constants.h" +#include "mca/base/base.h" +#include "mca/base/mca_base_param.h" +#include "mca/ras/xgrid/ras-xgrid-version.h" +#include "ras_xgrid.h" + + +/* + * Local functions + */ +static orte_ras_base_module_t *ras_xgrid_init(int*); + + +orte_ras_base_component_1_0_0_t mca_ras_xgrid_component = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a iof v1.0.0 component (which also + implies a specific MCA version) */ + + ORTE_RAS_BASE_VERSION_1_0_0, + + /* Component name and version */ + + "xgrid", + MCA_ras_xgrid_MAJOR_VERSION, + MCA_ras_xgrid_MINOR_VERSION, + MCA_ras_xgrid_RELEASE_VERSION, + + /* Component open and close functions */ + + NULL, + NULL + }, + + /* Next the MCA v1.0.0 component meta data */ + { + /* Whether the component is checkpointable or not */ + false + }, + + ras_xgrid_init +}; + + +static orte_ras_base_module_t *ras_xgrid_init(int* priority) +{ + /* Are we running under a xgrid job? */ + int id = mca_base_param_register_int("ras","xgrid","priority",NULL,100); + mca_base_param_lookup_int(id,priority); + + if (NULL != getenv("XGRID_CONTROLLER_HOSTNAME") && + NULL != getenv("XGRID_CONTROLLER_PASSWORD")) { + ompi_output(orte_ras_base.ras_output, "ras:xgrid: available for selection"); + return &orte_ras_xgrid_module; + } + + /* Sadly, no */ + ompi_output(orte_ras_base.ras_output, "ras:xgrid: NOT available for selection"); + return NULL; +} diff --git a/src/mca/ras/xgrid/src/ras_xgrid_module.c b/src/mca/ras/xgrid/src/ras_xgrid_module.c new file mode 100644 index 0000000000..a0a6caaeaf --- /dev/null +++ b/src/mca/ras/xgrid/src/ras_xgrid_module.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "orte_config.h" + +#include +#include +#include + +#include "include/orte_constants.h" +#include "include/orte_types.h" +#include "util/argv.h" +#include "util/output.h" +#include "mca/ras/base/base.h" +#include "mca/ras/base/ras_base_node.h" +#include "mca/rmgr/base/base.h" +#include "ras_xgrid.h" + + +/* + * Local functions + */ +static int allocate(orte_jobid_t jobid); +static int deallocate(orte_jobid_t jobid); +static int finalize(void); + +static int discover(orte_jobid_t jobid, ompi_list_t* nodelist); + + +/* + * Global variable + */ +orte_ras_base_module_t orte_ras_xgrid_module = { + allocate, + deallocate, + finalize +}; + + +/** + * Discover available (pre-allocated) nodes. Allocate the + * requested number of nodes/process slots to the job. + * + */ +#include "mca/gpr/gpr.h" +static int allocate(orte_jobid_t jobid) +{ + int ret; + ompi_list_t nodes; + ompi_list_item_t* item; + + OBJ_CONSTRUCT(&nodes, ompi_list_t); + if (ORTE_SUCCESS != (ret = discover(jobid, &nodes))) { + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:allocate: discover failed!"); + return ret; + } + ret = orte_ras_base_allocate_nodes(jobid, &nodes); + + while (NULL != (item = ompi_list_remove_first(&nodes))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&nodes); + + /* All done */ + + if (ORTE_SUCCESS == ret) { + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:allocate: success"); + } else { + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:allocate: failure (base_allocate_nodes=%d)", ret); + } + + return ret; +} + + +/* + * There's really nothing to do here + */ +static int deallocate(orte_jobid_t jobid) +{ + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:deallocate: success (nothing to do)"); + return ORTE_SUCCESS; +} + + +/* + * There's really nothing to do here + */ +static int finalize(void) +{ + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:finalize: success (nothing to do)"); + return ORTE_SUCCESS; +} + + +/* discover number of available resouces. Always exactly what asked for (surprise...) */ +static int discover(orte_jobid_t jobid, ompi_list_t* nodelist) +{ + int ret; + orte_ras_base_node_t *node; + ompi_list_item_t* item; + ompi_list_t new_nodes; + size_t num_requested = 0; + size_t i; + char *hostname; + + /* how many slots do we need? */ + if(ORTE_SUCCESS != (ret = orte_rmgr_base_get_job_slots(jobid, &num_requested))) { + return ret; + } + + /* create a "node" for each slot */ + OBJ_CONSTRUCT(&new_nodes, ompi_list_t); + for (i = 0 ; i < num_requested ; ++i) { + asprintf(&hostname, "xgrid-node-%d", (int) i); + node = OBJ_NEW(orte_ras_base_node_t); + node->node_name = hostname; + node->node_arch = strdup("unknown"); + node->node_state = ORTE_NODE_STATE_UP; + node->node_cellid = 0; + node->node_slots_inuse = 0; + node->node_slots_max = 1; + node->node_slots = 1; + ompi_list_append(&new_nodes, &node->super); + } + + /* Add these nodes to the registry, and return all the values */ + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:allocate:discover: done -- adding to registry"); + ret = orte_ras_base_node_insert(&new_nodes); + for (item = ompi_list_remove_first(&new_nodes); + NULL != item; item = ompi_list_remove_first(&new_nodes)) { + if (ORTE_SUCCESS == ret) { + ompi_list_append(nodelist, item); + } else { + OBJ_RELEASE(item); + } + } + + /* All done */ + if (ORTE_SUCCESS == ret) { + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:allocate:discover: success"); + } else { + ompi_output(orte_ras_base.ras_output, + "ras:xgrid:allocate:discover: failed (rc=%d)", ret); + } + OBJ_DESTRUCT(&new_nodes); + return ret; +}