Eliminate stale code
This commit was SVN r26244.
Этот коммит содержится в:
родитель
abf60337de
Коммит
ed197acaa2
@ -1,60 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Use the top-level Makefile.options
|
||||
|
||||
AM_CPPFLAGS = -I$(top_ompi_builddir)/include
|
||||
AM_OBJCFLAGS = $(plm_xgrid_OBJCFLAGS)
|
||||
|
||||
# Automake and Libtool don't completely speak Objective C. Since the
|
||||
# only Objective C we'll be using is GCC on Mac OS X, we can pretend
|
||||
# to be C instead of ObjC for libtool and it works well enough. If CC
|
||||
# and OBJC aren't the same, Libtool doesn't automatically infer that
|
||||
# we're using C and Automake doesn't add the --tag, so we need to
|
||||
# explicitly pass the --tag=CC flag to libtool.
|
||||
AM_LIBTOOLFLAGS = --tag=CC
|
||||
|
||||
xgrid_sources = \
|
||||
src/plm_xgrid.h \
|
||||
src/plm_xgrid_component.m \
|
||||
src/plm_xgrid_module.m \
|
||||
src/plm_xgrid_client.h \
|
||||
src/plm_xgrid_client.m
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_plm_xgrid_DSO
|
||||
component_noinst =
|
||||
component_install = mca_plm_xgrid.la
|
||||
else
|
||||
component_noinst = libmca_plm_xgrid.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_plm_xgrid_la_SOURCES = $(xgrid_sources)
|
||||
mca_plm_xgrid_la_LDFLAGS = -module -avoid-version $(plm_xgrid_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_plm_xgrid_la_SOURCES = $(xgrid_sources)
|
||||
libmca_plm_xgrid_la_LIBADD =
|
||||
libmca_plm_xgrid_la_LDFLAGS = -module -avoid-version $(plm_xgrid_LDFLAGS)
|
@ -1,44 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_orte_plm_xgrid_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_plm_xgrid_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/plm/xgrid/Makefile])
|
||||
|
||||
ORTE_CHECK_XGRID([plm_xgrid], [plm_xgrid_good=1], [plm_xgrid_good=0])
|
||||
|
||||
# For very dumb reasons involving linking, it's near impossible
|
||||
# to build the XGrid components as static libraries. Disable if that's
|
||||
# the case.
|
||||
AS_IF([test "$plm_xgrid_good" = "0" -a "$orte_without_full_support" = 0], [$2],
|
||||
[AS_IF([test "$compile_mode" = "dso"],
|
||||
[ # plm_xgrid_LDFLAGS will be set by ORTE_CHECK_XGRID
|
||||
plm_xgrid_WRAPPER_EXTRA_LDFLAGS="$plm_xgrid_LDFLAGS"
|
||||
$1],
|
||||
[AC_MSG_WARN([XGrid components must be built as DSOs. Disabling])
|
||||
$2])])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([plm_xgrid_OBJCFLAGS])
|
||||
AC_SUBST([plm_xgrid_LDFLAGS])
|
||||
])dnl
|
@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: This header is an Objective-C file. It might not do what
|
||||
* you intend with a C/C++ compiler
|
||||
*/
|
||||
|
||||
#import "orte/mca/plm/plm.h"
|
||||
#import "plm_xgrid_client.h"
|
||||
|
||||
/**
|
||||
* PLM Component
|
||||
*/
|
||||
struct orte_plm_xgrid_component_t {
|
||||
orte_plm_base_component_t super;
|
||||
PlmXGridClient *client;
|
||||
NSAutoreleasePool *pool;
|
||||
};
|
||||
typedef struct orte_plm_xgrid_component_t orte_plm_xgrid_component_t;
|
||||
extern orte_plm_xgrid_component_t mca_plm_xgrid_component;
|
||||
extern orte_plm_base_module_1_0_0_t orte_plm_xgrid_module;
|
||||
|
||||
int orte_plm_xgrid_progress(void);
|
@ -1,76 +0,0 @@
|
||||
/* -*- ObjC -*-
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
#import <XgridFoundation/XgridFoundation.h>
|
||||
#import <Foundation/NSString.h>
|
||||
|
||||
#import "opal/threads/condition.h"
|
||||
|
||||
@interface PlmXGridClient : NSObject
|
||||
{
|
||||
NSString *orted;
|
||||
NSString *controller_hostname;
|
||||
NSString *controller_password;
|
||||
|
||||
/* state of the world... */
|
||||
opal_condition_t state_cond;
|
||||
opal_mutex_t state_mutex;
|
||||
|
||||
XGConnection *connection;
|
||||
XGController *controller;
|
||||
XGGrid *grid;
|
||||
int cleanup;
|
||||
|
||||
NSMutableDictionary *active_xgrid_jobs;
|
||||
}
|
||||
|
||||
/* init / finalize */
|
||||
-(id) init;
|
||||
-(id) initWithControllerHostname: (char*) hostnam
|
||||
AndControllerPassword: (char*) password
|
||||
AndOrted: (char*) ortedname
|
||||
AndCleanup: (int) val;
|
||||
-(void) dealloc;
|
||||
|
||||
/* accessors */
|
||||
-(NSString*) getOrted;
|
||||
|
||||
-(void) setOrtedAsCString: (char*) name;
|
||||
-(void) setControllerPasswordAsCString: (char*) name;
|
||||
-(void) setControllerHostnameAsCString: (char*) password;
|
||||
-(void) setCleanUp: (int) val;
|
||||
|
||||
-(NSString*)servicePrincipal;
|
||||
|
||||
/* interface for launch */
|
||||
-(int) connect;
|
||||
-(int) launchOrteds:(orte_job_t*) jdata;
|
||||
-(int) terminateOrteds;
|
||||
|
||||
/* delegate for changes */
|
||||
-(void) connectionDidOpen:(XGConnection*) connection;
|
||||
-(void) connectionDidNotOpen:(XGConnection*)connection withError:(NSError*) error;
|
||||
-(void) connectionDidClose:(XGConnection *) connection;
|
||||
|
||||
/* Helper function */
|
||||
-(NSMutableArray*) getArgumentsForOrtedLaunch;
|
||||
|
||||
@end
|
@ -1,454 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#import "orte_config.h"
|
||||
|
||||
#import <stdio.h>
|
||||
|
||||
#import "opal/util/path.h"
|
||||
|
||||
#import "orte/constants.h"
|
||||
#import "orte/mca/rml/rml.h"
|
||||
#import "orte/mca/plm/base/base.h"
|
||||
#import "orte/mca/plm/base/plm_private.h"
|
||||
#import "orte/mca/plm/plm.h"
|
||||
#import "orte/mca/errmgr/errmgr.h"
|
||||
#import "orte/mca/ras/ras_types.h"
|
||||
#import "orte/mca/rmaps/rmaps.h"
|
||||
|
||||
#import "plm_xgrid_client.h"
|
||||
|
||||
|
||||
@implementation PlmXGridClient
|
||||
|
||||
/* init / finalize */
|
||||
-(id) init
|
||||
{
|
||||
return [self initWithControllerHostname: NULL
|
||||
AndControllerPassword: NULL
|
||||
AndOrted: NULL
|
||||
AndCleanup: 1];
|
||||
}
|
||||
|
||||
-(id) initWithControllerHostname: (char*) hostname
|
||||
AndControllerPassword: (char*) password
|
||||
AndOrted: (char*) ortedname
|
||||
AndCleanup: (int) val
|
||||
{
|
||||
if (self = [super init]) {
|
||||
/* class-specific initialization goes here */
|
||||
OBJ_CONSTRUCT(&state_cond, opal_condition_t);
|
||||
OBJ_CONSTRUCT(&state_mutex, opal_mutex_t);
|
||||
|
||||
if (NULL != password) {
|
||||
controller_password = [NSString stringWithUTF8String: password];
|
||||
}
|
||||
if (NULL != hostname) {
|
||||
controller_hostname = [NSString stringWithUTF8String: hostname];
|
||||
}
|
||||
cleanup = val;
|
||||
if (NULL != ortedname) {
|
||||
orted = [NSString stringWithUTF8String: ortedname];
|
||||
}
|
||||
|
||||
active_xgrid_jobs = [NSMutableDictionary dictionary];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
-(void) dealloc
|
||||
{
|
||||
/* if supposed to clean up jobs, do so */
|
||||
if (cleanup) {
|
||||
NSArray *keys = [active_xgrid_jobs allKeys];
|
||||
NSEnumerator *enumerator = [keys objectEnumerator];
|
||||
NSString *key;
|
||||
XGJob *job;
|
||||
XGActionMonitor *actionMonitor;
|
||||
|
||||
while (key = [enumerator nextObject]) {
|
||||
job = [grid jobForIdentifier: [active_xgrid_jobs objectForKey: key]];
|
||||
|
||||
actionMonitor = [job performDeleteAction];
|
||||
while (XGActionMonitorOutcomeNone == [actionMonitor outcome]) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* we should have a result - find out if it worked */
|
||||
if (XGActionMonitorOutcomeSuccess != [actionMonitor outcome]) {
|
||||
NSError *err = [actionMonitor error];
|
||||
fprintf(stderr, "orte:plm:xgrid: cleanup failed: %s\n",
|
||||
[[err localizedDescription] UTF8String]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* need to shut down connection */
|
||||
[connection finalize];
|
||||
|
||||
OBJ_DESTRUCT(&state_mutex);
|
||||
OBJ_DESTRUCT(&state_cond);
|
||||
|
||||
[super dealloc];
|
||||
}
|
||||
|
||||
|
||||
/* accessors */
|
||||
-(NSString*) getOrted
|
||||
{
|
||||
return orted;
|
||||
}
|
||||
|
||||
|
||||
-(void) setOrtedAsCString: (char*) name
|
||||
{
|
||||
orted = [NSString stringWithUTF8String: name];
|
||||
}
|
||||
|
||||
|
||||
-(void) setControllerPasswordAsCString: (char*) name
|
||||
{
|
||||
controller_password = [NSString stringWithUTF8String: name];
|
||||
}
|
||||
|
||||
|
||||
-(void) setControllerHostnameAsCString: (char*) password
|
||||
{
|
||||
controller_hostname = [NSString stringWithUTF8String: password];
|
||||
}
|
||||
|
||||
|
||||
-(void) setCleanUp: (int) val
|
||||
{
|
||||
cleanup = val;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)servicePrincipal;
|
||||
{
|
||||
NSString *myServicePrincipal = [connection servicePrincipal];
|
||||
|
||||
if (myServicePrincipal == nil) {
|
||||
myServicePrincipal = [NSString stringWithFormat:@"xgrid/%@", [connection name]];
|
||||
}
|
||||
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Kerberos servicePrincipal: %s",
|
||||
[myServicePrincipal UTF8String]);
|
||||
|
||||
return myServicePrincipal;
|
||||
}
|
||||
|
||||
|
||||
/* interface for launch */
|
||||
-(int) connect
|
||||
{
|
||||
connection = [[[XGConnection alloc] initWithHostname: controller_hostname
|
||||
portnumber:0] autorelease];
|
||||
|
||||
if (nil == controller_password) {
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Using Kerberos authentication");
|
||||
|
||||
XGGSSAuthenticator *authenticator =
|
||||
[[[XGGSSAuthenticator alloc] init] autorelease];
|
||||
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Kerberos principal: %s",
|
||||
[[self servicePrincipal] UTF8String]);
|
||||
|
||||
[authenticator setServicePrincipal:[self servicePrincipal]];
|
||||
[connection setAuthenticator:authenticator];
|
||||
|
||||
} else {
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Using password authentication");
|
||||
|
||||
XGTwoWayRandomAuthenticator *authenticator =
|
||||
[[[XGTwoWayRandomAuthenticator alloc] init] autorelease];
|
||||
|
||||
/* this seems to be hard coded */
|
||||
[authenticator setUsername:@"one-xgrid-client"];
|
||||
[authenticator setPassword:controller_password];
|
||||
|
||||
[connection setAuthenticator:authenticator];
|
||||
}
|
||||
[connection setDelegate: self];
|
||||
|
||||
/* get us connected */
|
||||
opal_mutex_lock(&state_mutex);
|
||||
[connection open];
|
||||
while ([connection state] == XGConnectionStateOpening) {
|
||||
opal_condition_wait(&state_cond, &state_mutex);
|
||||
}
|
||||
opal_mutex_unlock(&state_mutex);
|
||||
|
||||
/* if we're not connected when the condition is triggered, we
|
||||
dont' have a connection and can't start. exit. */
|
||||
if ([connection state] != XGConnectionStateOpen) {
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: connection name: %s",
|
||||
[[connection name] UTF8String]);
|
||||
|
||||
controller = [[XGController alloc] initWithConnection:connection];
|
||||
/* need to call progress exactly once for some reason to get the
|
||||
controller happy enough to allow us to assign the grid */
|
||||
opal_progress();
|
||||
grid = [controller defaultGrid];
|
||||
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"plm: xgrid: grid name: %s",
|
||||
[[grid identifier] UTF8String]);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
-(int) launchOrteds:(orte_job_t*) jdata
|
||||
{
|
||||
orte_job_map_t *map = NULL;
|
||||
opal_list_item_t *item;
|
||||
int rc = ORTE_SUCCESS;
|
||||
char *orted_path = NULL;
|
||||
bool failed_launch = true;
|
||||
orte_node_t **nodes;
|
||||
orte_std_cntr_t nnode;
|
||||
char *vpid_string;
|
||||
|
||||
/* Get the map for this job */
|
||||
if (NULL == (map = orte_rmaps.get_job_map(jdata->jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* get the nodes list */
|
||||
nodes = (orte_node_t**)map->nodes->addr;
|
||||
|
||||
/* Shortcut out of here */
|
||||
if (0 == map->num_new_daemons) {
|
||||
/* have all the daemons we need - launch app */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* find orted */
|
||||
orted_path = opal_path_findv((char*) [orted UTF8String], 0, environ, NULL);
|
||||
|
||||
/* build up the array of task specifications */
|
||||
NSMutableDictionary *taskSpecifications = [NSMutableDictionary dictionary];
|
||||
for (nnode=0 ; nnode < map->num_nodes ; nnode++) {
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: launching on node %s",
|
||||
nodes[nnode]->name);
|
||||
|
||||
/* Create the task */
|
||||
NSMutableDictionary *task = [NSMutableDictionary dictionary];
|
||||
|
||||
/* fill in applicaton to start */
|
||||
[task setObject: [NSString stringWithUTF8String: orted_path]
|
||||
forKey: XGJobSpecificationCommandKey];
|
||||
|
||||
/* fill in task arguments */
|
||||
NSMutableArray *taskArguments = [self getArgumentsForOrtedLaunch];
|
||||
|
||||
[taskArguments addObject: @"-mca"];
|
||||
[taskArguments addObject: @"orte_ess_vpid"];
|
||||
rc = orte_util_convert_vpid_to_string(&vpid_string,
|
||||
nodes[nnode]->daemon->name.vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "orte_plm_rsh: unable to get daemon vpid as string");
|
||||
goto cleanup;
|
||||
}
|
||||
[taskArguments addObject: [NSString stringWithUTF8String: vpid_string]];
|
||||
free(vpid_string);
|
||||
|
||||
[taskArguments addObject: @"--nodename"];
|
||||
[taskArguments addObject: [NSString stringWithUTF8String: nodes[nnode]->name]];
|
||||
|
||||
[task setObject: taskArguments forKey: XGJobSpecificationArgumentsKey];
|
||||
|
||||
/* Add task to the task specification dictionary */
|
||||
[taskSpecifications setObject: task
|
||||
forKey: [NSString stringWithFormat: @"%d", nnode]];
|
||||
}
|
||||
|
||||
/* job specification */
|
||||
NSMutableDictionary *jobSpecification = [NSMutableDictionary dictionary];
|
||||
[jobSpecification setObject:XGJobSpecificationTypeTaskListValue
|
||||
forKey:XGJobSpecificationTypeKey];
|
||||
[jobSpecification setObject: [NSString stringWithFormat:
|
||||
@"org.open-mpi.plm.xgrid"]
|
||||
forKey:XGJobSpecificationSubmissionIdentifierKey];
|
||||
[jobSpecification setObject: [NSString stringWithFormat: @"Open MPI Job %u",
|
||||
jdata->jobid]
|
||||
forKey:XGJobSpecificationNameKey];
|
||||
[jobSpecification setObject:taskSpecifications
|
||||
forKey:XGJobSpecificationTaskSpecificationsKey];
|
||||
|
||||
/* Submit the request and get our monitor */
|
||||
XGActionMonitor *actionMonitor =
|
||||
[controller performSubmitJobActionWithJobSpecification: jobSpecification
|
||||
gridIdentifier: [grid identifier]];
|
||||
|
||||
/* wait until we have some idea if job succeeded or not */
|
||||
while (XGActionMonitorOutcomeNone == [actionMonitor outcome]) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* we should have a result - find out if it worked */
|
||||
if (XGActionMonitorOutcomeSuccess == [actionMonitor outcome]) {
|
||||
rc = ORTE_SUCCESS;
|
||||
} else {
|
||||
NSError *err = [actionMonitor error];
|
||||
fprintf(stderr, "orte:plm:xgrid: launch failed: (%d) %s\n",
|
||||
[actionMonitor outcome],
|
||||
[[err localizedDescription] UTF8String]);
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* save the XGJob identifier somewhere we can get to it */
|
||||
[active_xgrid_jobs setObject: [[actionMonitor results] objectForKey: @"jobIdentifier"]
|
||||
forKey: [NSString stringWithFormat: @"%u", jdata->jobid]];
|
||||
|
||||
/* wait for daemons to callback */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:xgrid: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid:launch: finished, rc=%d\n", rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
-(int) terminateOrteds
|
||||
{
|
||||
NSArray *keys = [active_xgrid_jobs allKeys];
|
||||
NSEnumerator *enumerator = [keys objectEnumerator];
|
||||
NSString *key;
|
||||
XGJob *job;
|
||||
XGActionMonitor *actionMonitor;
|
||||
int ret = ORTE_SUCCESS;
|
||||
|
||||
while (key = [enumerator nextObject]) {
|
||||
job = [grid jobForIdentifier: [active_xgrid_jobs objectForKey: key]];
|
||||
|
||||
actionMonitor = [job performStopAction];
|
||||
while (XGActionMonitorOutcomeNone == [actionMonitor outcome]) {
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
/* we should have a result - find out if it worked */
|
||||
if (XGActionMonitorOutcomeSuccess != [actionMonitor outcome]) {
|
||||
NSError *err = [actionMonitor error];
|
||||
fprintf(stderr, "orte:plm:xgrid: terminate failed: %s\n",
|
||||
[[err localizedDescription] UTF8String]);
|
||||
ret = ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* delegate for changes */
|
||||
-(void) connectionDidOpen:(XGConnection*) myConnection
|
||||
{
|
||||
/* this isn't an error condition -- we finally opened the
|
||||
connection, so trigger the condition variable we're waiting
|
||||
on */
|
||||
opal_condition_broadcast(&state_cond);
|
||||
}
|
||||
|
||||
|
||||
-(void) connectionDidNotOpen:(XGConnection*) myConnection withError: (NSError*) error
|
||||
{
|
||||
opal_output(orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Controller connection did not open: (%ld) %s",
|
||||
(long)[error code],
|
||||
[[error localizedDescription] UTF8String]);
|
||||
opal_condition_broadcast(&state_cond);
|
||||
}
|
||||
|
||||
|
||||
-(void) connectionDidClose:(XGConnection*) myConnection
|
||||
{
|
||||
// check for success
|
||||
if ([myConnection error] != nil) {
|
||||
switch ([[myConnection error] code]) {
|
||||
case 200:
|
||||
/* success */
|
||||
break;
|
||||
case 530:
|
||||
case 535:
|
||||
opal_output(orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Connection to XGrid controller failed due to authentication error (%ld):",
|
||||
(long)[[myConnection error] code]);
|
||||
break;
|
||||
default:
|
||||
opal_output(orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Connection to XGrid controller unexpectedly closed: (%ld) %s",
|
||||
(long)[[myConnection error] code],
|
||||
[[[myConnection error] localizedDescription] UTF8String]);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
opal_output(orte_plm_globals.output,
|
||||
"orte:plm:xgrid: Connection to XGrid controller unexpectedly closed");
|
||||
}
|
||||
|
||||
opal_condition_broadcast(&state_cond);
|
||||
}
|
||||
|
||||
-(NSMutableArray*) getArgumentsForOrtedLaunch
|
||||
{
|
||||
char **argv = NULL;
|
||||
int argc = 0;
|
||||
int i;
|
||||
|
||||
orte_plm_base_orted_append_basic_args(&argc, &argv,
|
||||
"env",
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
/* Note that capacity is a starting capacity, not max */
|
||||
NSMutableArray *ret = [NSMutableArray arrayWithCapacity: argc];
|
||||
for (i = 0 ; i < argc ; ++i) {
|
||||
[ret addObject: [NSString stringWithUTF8String: argv[i]]];
|
||||
}
|
||||
|
||||
if (NULL != argv) opal_argv_free(argv);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@end
|
@ -1,172 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#import "orte_config.h"
|
||||
|
||||
#import <stdlib.h>
|
||||
#import <unistd.h>
|
||||
|
||||
#import "orte/constants.h"
|
||||
#import "opal/util/argv.h"
|
||||
#import "opal/util/path.h"
|
||||
#import "opal/util/basename.h"
|
||||
|
||||
#import "orte/util/proc_info.h"
|
||||
#import "orte/mca/plm/plm.h"
|
||||
#import "orte/mca/plm/base/base.h"
|
||||
#import "orte/mca/plm/base/plm_private.h"
|
||||
#import "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#import "plm_xgrid.h"
|
||||
#import "plm_xgrid_client.h"
|
||||
|
||||
int orte_plm_xgrid_component_open(void);
|
||||
int orte_plm_xgrid_component_close(void);
|
||||
int orte_plm_xgrid_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
orte_plm_xgrid_component_t mca_plm_xgrid_component = {
|
||||
{
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
ORTE_PLM_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"xgrid",
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
orte_plm_xgrid_component_open,
|
||||
orte_plm_xgrid_component_close,
|
||||
orte_plm_xgrid_component_query
|
||||
},
|
||||
{
|
||||
/* This component is not checkpointable */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int
|
||||
orte_plm_xgrid_component_open(void)
|
||||
{
|
||||
mca_base_param_reg_string(&mca_plm_xgrid_component.super.base_version,
|
||||
"orted",
|
||||
"The command name that the component will invoke for the ORTE daemon",
|
||||
false, false, "orted", NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_plm_xgrid_component.super.base_version,
|
||||
"priority",
|
||||
"Priority of the xgrid plm component",
|
||||
false, false, 20, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_plm_xgrid_component.super.base_version,
|
||||
"delete_job",
|
||||
"Delete job from XGrid controller's database on job completion",
|
||||
false, false, 1, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_plm_xgrid_component.super.base_version,
|
||||
"num_slots",
|
||||
"Number of slots to reserve for job (including future spawned processes). "
|
||||
"0 will result in number of processes requested in initial launch.",
|
||||
false, false, 0, NULL);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_plm_xgrid_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int orte_plm_xgrid_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
char *string;
|
||||
int ret, val, param;
|
||||
|
||||
if (NULL == getenv("XGRID_CONTROLLER_HOSTNAME")) {
|
||||
opal_output_verbose(10, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: not available: controller info not set");
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: initializing PlmXGridClient");
|
||||
mca_plm_xgrid_component.pool = [[NSAutoreleasePool alloc] init];
|
||||
mca_plm_xgrid_component.client = [[PlmXGridClient alloc] init];
|
||||
|
||||
/* setup daemon name */
|
||||
param = mca_base_param_find("plm", "xgrid", "orted");
|
||||
mca_base_param_lookup_string(param, &string);
|
||||
[mca_plm_xgrid_component.client setOrtedAsCString: string];
|
||||
if (NULL != string) free(string);
|
||||
|
||||
/* setup contact information */
|
||||
if (NULL != getenv("XGRID_CONTROLLER_PASSWORD")) {
|
||||
[mca_plm_xgrid_component.client setControllerPasswordAsCString:
|
||||
getenv("XGRID_CONTROLLER_PASSWORD")];
|
||||
}
|
||||
[mca_plm_xgrid_component.client setControllerHostnameAsCString:
|
||||
getenv("XGRID_CONTROLLER_HOSTNAME")];
|
||||
|
||||
/* info we need */
|
||||
param = mca_base_param_find("plm", "xgrid", "priority");
|
||||
mca_base_param_lookup_int(param, priority);
|
||||
|
||||
param = mca_base_param_find("plm", "xgrid", "delete_job");
|
||||
mca_base_param_lookup_int(param, &val);
|
||||
[mca_plm_xgrid_component.client setCleanUp: val];
|
||||
|
||||
opal_progress_register(orte_plm_xgrid_progress);
|
||||
|
||||
ret = [mca_plm_xgrid_component.client connect];
|
||||
if (ret != ORTE_SUCCESS) {
|
||||
opal_output_verbose(10, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: not available: connection failed");
|
||||
orte_plm_xgrid_finalize();
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
opal_output_verbose(10, orte_plm_globals.output,
|
||||
"orte:plm:xgrid: initialized");
|
||||
*module = (mca_base_module_t *) &orte_plm_xgrid_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_plm_xgrid_progress(void)
|
||||
{
|
||||
/* tick the event loop */
|
||||
[[NSRunLoop currentRunLoop] runUntilDate:
|
||||
[NSDate dateWithTimeIntervalSinceNow:1]];
|
||||
}
|
@ -1,245 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#import "orte_config.h"
|
||||
|
||||
#import <stdlib.h>
|
||||
#import <unistd.h>
|
||||
#import <errno.h>
|
||||
#import <string.h>
|
||||
#import <sys/types.h>
|
||||
#import <sys/stat.h>
|
||||
#import <sys/wait.h>
|
||||
#import <fcntl.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#import <sys/time.h>
|
||||
#endif
|
||||
|
||||
#import "orte/constants.h"
|
||||
#import "opal/util/argv.h"
|
||||
#import "opal/class/opal_pointer_array.h"
|
||||
|
||||
#import "orte/util/show_help.h"
|
||||
#import "orte/util/session_dir.h"
|
||||
#import "opal/mca/event/event.h"
|
||||
#import "orte/runtime/orte_wait.h"
|
||||
#import "orte/mca/plm/plm.h"
|
||||
#import "orte/mca/plm/base/plm_private.h"
|
||||
#import "orte/mca/rml/rml.h"
|
||||
#import "orte/mca/errmgr/errmgr.h"
|
||||
#import "orte/mca/rmaps/rmaps.h"
|
||||
#import "orte/mca/iof/iof.h"
|
||||
|
||||
#import "plm_xgrid.h"
|
||||
|
||||
int orte_plm_xgrid_init(void);
|
||||
int orte_plm_xgrid_spawn(orte_job_t *jdata);
|
||||
int orte_plm_xgrid_terminate_orteds(void);
|
||||
int orte_plm_xgrid_signal_job(orte_jobid_t job, int32_t signal);
|
||||
int orte_plm_xgrid_finalize(void);
|
||||
|
||||
orte_plm_base_module_1_0_0_t orte_plm_xgrid_module = {
|
||||
orte_plm_xgrid_init,
|
||||
orte_plm_base_set_hnp_name,
|
||||
orte_plm_xgrid_spawn,
|
||||
NULL,
|
||||
orte_plm_base_orted_terminate_job,
|
||||
orte_plm_xgrid_terminate_orteds,
|
||||
orte_plm_base_orted_kill_local_procs,
|
||||
orte_plm_xgrid_signal_job,
|
||||
orte_plm_xgrid_finalize
|
||||
};
|
||||
|
||||
|
||||
/* counter of number of "nodes" created */
|
||||
static int node_counter = 0;
|
||||
|
||||
int
|
||||
orte_plm_xgrid_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
orte_plm_xgrid_make_nodes(orte_job_t *jdata)
|
||||
{
|
||||
int num_nodes = 0, param, i, rc;
|
||||
orte_app_context_t *app, **apps;
|
||||
|
||||
/* figure out how many slots we need */
|
||||
apps = (orte_app_context_t**)jdata->apps->addr;
|
||||
for(i = 0 ; i < jdata->num_apps ; i++) {
|
||||
app = apps[i];
|
||||
if (0 == app->num_procs) return ORTE_ERR_NOT_SUPPORTED;
|
||||
num_nodes += app->num_procs;
|
||||
}
|
||||
|
||||
/* Create node entries for the orteds we're going to spawn. */
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
for (i = 0 ; i < num_nodes ; ++i) {
|
||||
orte_node_t *node = OBJ_NEW(orte_node_t);
|
||||
if (NULL == node) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
return rc;
|
||||
}
|
||||
asprintf(&node->name, "ompi-xgrid-node-%d", node_counter++);
|
||||
node->state = ORTE_NODE_STATE_UP;
|
||||
node->slots_inuse = 0;
|
||||
node->slots_max = 0;
|
||||
node->slots = 1;
|
||||
node->slots_alloc = 1;
|
||||
node->index = opal_pointer_array_add(orte_node_pool, (void*)node);
|
||||
/* update the total slots in the job */
|
||||
jdata->total_slots_alloc += node->slots_alloc;
|
||||
}
|
||||
jdata->oversubscribe_override = true;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_plm_xgrid_spawn(orte_job_t *jdata)
|
||||
{
|
||||
int rc;
|
||||
orte_process_name_t name = {ORTE_JOBID_INVALID, 0};
|
||||
bool failed_launch = true;
|
||||
|
||||
/* create a jobid for this job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:xgrid: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
|
||||
/* insert the job object into the global pool */
|
||||
opal_pointer_array_add(orte_job_data, jdata);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_xgrid_make_nodes(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:xgrid: mapping job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:xgrid: setting up I/O for %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
|
||||
/* launch new daemons */
|
||||
rc = [mca_plm_xgrid_component.client launchOrteds: jdata];
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Daemons are running - launch the applications */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(jdata->jobid))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:xgrid: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* get here if launch went okay */
|
||||
failed_launch = false;
|
||||
|
||||
cleanup:
|
||||
/* check for failed launch - if so, force terminate */
|
||||
if (failed_launch) {
|
||||
orte_errmgr.update_state(jdata->jobid, ORTE_JOB_STATE_FAILED_TO_START,
|
||||
NULL, ORTE_PROC_STATE_UNDEF,
|
||||
0, ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_plm_xgrid_terminate_orteds(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = orte_plm_base_orted_exit(ORTE_DAEMON_HALT_VM_CMD);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
rc = [mca_plm_xgrid_component.client terminateOrteds];
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != rc) ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_plm_xgrid_signal_job(orte_jobid_t jobid, int32_t signal)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* order them to pass this signal to their local procs */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_signal_local_procs(jobid, signal))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_plm_xgrid_finalize(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* cleanup any pending recvs */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
[mca_plm_xgrid_component.client release];
|
||||
[mca_plm_xgrid_component.pool release];
|
||||
|
||||
opal_progress_unregister(orte_plm_xgrid_progress);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user