1
1

Ckpt the bproc support. All compiles now except for PLM module

This commit was SVN r18744.
Этот коммит содержится в:
Ralph Castain 2008-06-26 03:48:22 +00:00
родитель dd563f9297
Коммит 9cebe0ca96
20 изменённых файлов: 392 добавлений и 1324 удалений

Просмотреть файл

@ -21,63 +21,66 @@
* See odls_bproc.h for an overview of how it works.
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#include <pty.h>
#endif
#include <dirent.h>
#include "opal/mca/base/mca_base_param.h"
#include "opal/runtime/opal_progress.h"
#include "opal/threads/condition.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/os_path.h"
#include "opal/util/output.h"
#include "opal/dss/dss.h"
#include "orte/dss/dss.h"
#include "orte/util/sys_info.h"
#include "orte/orte_constants.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/iof_base_setup.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/session_dir.h"
#include "orte/util/univ_info.h"
#include "orte/mca/odls/base/odls_private.h"
#include "odls_bproc.h"
static int orte_odls_bproc_launch_local_procs(opal_buffer_t *data);
static int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state);
static int orte_odls_bproc_signal_local_procs(const orte_process_name_t *proc, int32_t signal);
/**
* Initialization of the bproc_orted module with all the needed function pointers
*/
orte_odls_base_module_t orte_odls_bproc_module = {
orte_odls_bproc_subscribe_launch_data,
orte_odls_bproc_get_add_procs_data,
orte_odls_base_default_get_add_procs_data,
orte_odls_bproc_launch_local_procs,
orte_odls_bproc_kill_local_procs,
orte_odls_bproc_signal_local_procs
orte_odls_bproc_signal_local_procs,
orte_odls_base_default_deliver_message,
orte_odls_base_default_require_sync,
orte_odls_base_default_collect_data
};
static int odls_bproc_make_dir(char *directory);
static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid,
orte_std_cntr_t app_context);
orte_std_cntr_t app_context);
static void odls_bproc_delete_dir_tree(char * path);
static int odls_bproc_remove_dir(void);
static void odls_bproc_send_cb(int status, orte_process_name_t * peer,
orte_buffer_t* buffer, int tag, void* cbdata);
opal_buffer_t* buffer, int tag, void* cbdata);
static int odls_bproc_setup_stdio(orte_process_name_t *proc_name,
int proc_rank, orte_jobid_t jobid,
orte_std_cntr_t app_context, bool connect_stdin);
int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}
int proc_rank, orte_jobid_t jobid,
orte_std_cntr_t app_context, bool connect_stdin);
/* Local globals */
static char *user = NULL;
static char *frontend = NULL;
/**
* Creates the passed directory. If the directory already exists, it and its
@ -115,39 +118,36 @@ static char *
odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid,
orte_std_cntr_t app_context)
{
char *path = NULL, *user = NULL, *job = NULL;
char *path = NULL, *job = NULL;
int rc;
/* ensure that system info is set */
orte_sys_info();
if (NULL == orte_universe_info.name) { /* error condition */
ORTE_ERROR_LOG(ORTE_ERROR);
return NULL;
}
rc = orte_ns.convert_jobid_to_string(&job, jobid);
rc = orte_util_convert_jobid_to_string(&job, jobid);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return NULL;
}
/* get the username set by the bproc pls. We need to get it from here
/* get the username set by the bproc plm. We need to get it from here
* because on many bproc systems the method we use to get the username
* from the system on the backend fails and we only get the uid. */
rc = mca_base_param_register_string("pls", "bproc", "username", NULL,
orte_system_info.user);
mca_base_param_lookup_string(rc,&user);
* from the system on the backend fails and we only get the uid
*/
mca_base_param_reg_string_name("orte", "plm_bproc_username",
"Name of the user on the remote node",
false, false, NULL, &user);
if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d",
user, orte_universe_info.name,
job, (int) app_context, proc_rank)) {
if (0 > asprintf(&frontend, OPAL_PATH_SEP"%s"OPAL_PATH_SEP"openmpi-bproc-%s",
orte_process_info.tmpdir_base, user)) {
ORTE_ERROR_LOG(ORTE_ERROR);
path = NULL;
}
if(0 < mca_odls_bproc_component.debug) {
opal_output(0, "odls bproc io setup. Path: %s\n", path);
if (0 > asprintf(&path, "%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d",
frontend, job, (int) app_context, proc_rank)) {
ORTE_ERROR_LOG(ORTE_ERROR);
path = NULL;
}
OPAL_OUTPUT_VERBOSE((0, orte_odls_globals.output,
"odls bproc io setup. Path: %s\n", path));
free(user);
free(job);
return path;
@ -199,26 +199,6 @@ odls_bproc_delete_dir_tree(char * path)
static int
odls_bproc_remove_dir()
{
char *frontend = NULL, *user = NULL, *filename = NULL;
int id;
/* get the username set by the bproc pls. We need to get it from here
* because on many bproc systems the method we use to get the username
* from the system on the backend fails and we only get the uid. */
id = mca_base_param_register_string("pls", "bproc", "username", NULL,
orte_system_info.user);
mca_base_param_lookup_string(id,&user);
asprintf(&filename, "openmpi-bproc-%s", user );
if( NULL == filename ) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERROR;
}
frontend = opal_os_path(false, "tmp", filename, NULL );
free(filename); /* Always free the filename */
if (NULL == frontend) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERROR;
}
/* we do our best to clean up the directory tree, but we ignore errors*/
odls_bproc_delete_dir_tree(frontend);
free(frontend);
@ -236,7 +216,7 @@ odls_bproc_remove_dir()
*/
static void
odls_bproc_send_cb(int status, orte_process_name_t * peer,
orte_buffer_t* buffer, int tag, void* cbdata)
opal_buffer_t* buffer, int tag, void* cbdata)
{
OBJ_RELEASE(buffer);
}
@ -430,196 +410,49 @@ cleanup:
}
/* this entire function gets called within a GPR compound command,
* so the subscription actually doesn't get done until the orted
* executes the compound command
*/
int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc)
{
char *segment;
orte_gpr_value_t *values[1];
orte_gpr_subscription_t *subs, sub=ORTE_GPR_SUBSCRIPTION_EMPTY;
orte_gpr_trigger_t *trigs, trig=ORTE_GPR_TRIGGER_EMPTY;
char* keys[] = {
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_NODE_NAME_KEY,
};
int num_keys = 3;
int i, rc;
/* get the job segment name */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* attach ourselves to the "standard" orted trigger */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTED_LAUNCH_STAGE_GATE_TRIGGER, job))) {
ORTE_ERROR_LOG(rc);
free(segment);
return rc;
}
/* ask for return of all data required for launching local processes */
subs = &sub;
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
ORTED_LAUNCH_STG_SUB,
job))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(trig.name);
return rc;
}
sub.cnt = 1;
sub.values = values;
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[0]), ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment, num_keys, 0))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(sub.name);
free(trig.name);
return rc;
}
for (i=0; i < num_keys; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[i]),
keys[i], ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(sub.name);
free(trig.name);
OBJ_RELEASE(values[0]);
return rc;
}
}
sub.cbfunc = cbfunc;
trigs = &trig;
/* do the subscription */
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 1, &trigs))) {
ORTE_ERROR_LOG(rc);
}
free(segment);
free(sub.name);
free(trig.name);
OBJ_RELEASE(values[0]);
return rc;
}
/**
* Setup io for the current node, then tell orterun we are ready for the actual
* processes.
* @retval ORTE_SUCCESS
* @retval error
*/
int
orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ)
int orte_odls_bproc_launch_local_procs(opal_buffer_t *data)
{
odls_bproc_child_t *child;
orte_odls_child_t *child;
opal_list_item_t* item;
orte_gpr_value_t *value, **values;
orte_gpr_keyval_t *kval;
char *node_name;
int rc;
orte_std_cntr_t i, j, kv, kv2, *sptr;
int src = 0;
orte_buffer_t *ack;
opal_buffer_t *ack;
bool connect_stdin;
orte_jobid_t jobid;
int cycle = 0;
/* first, retrieve the job number we are to launch from the
* returned data - we can extract the jobid directly from the
* subscription name we created
*/
if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&jobid, data->target))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/**
* hack for bproc4, change process group so that we do not receive signals
* from the parent/front-end process, as bproc4 does not currently allow the
* process to intercept the signal
*/
setpgid(0,0);
/* loop through the returned data to find the global info and
* the info for processes going onto this node
*/
values = (orte_gpr_value_t**)(data->values)->addr;
for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */
if (NULL != values[j]) {
i++;
value = values[j];
/* this must have come from one of the process containers, so it must
* contain data for a proc structure - see if it belongs to this node
*/
for (kv=0; kv < value->cnt; kv++) {
kval = value->keyvals[kv];
if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) {
/* Most C-compilers will bark if we try to directly compare the string in the
* kval data area against a regular string, so we need to "get" the data
* so we can access it */
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if this is our node...must also protect against a zero-length string */
if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) {
/* ...harvest the info into a new child structure */
child = OBJ_NEW(odls_bproc_child_t);
for (kv2 = 0; kv2 < value->cnt; kv2++) {
kval = value->keyvals[kv2];
if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) {
/* copy the name into the child object */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
continue;
}
if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
child->app_idx = *sptr; /* save the index into the app_context objects */
continue;
}
} /* kv2 */
/* protect operation on the global list of children */
OPAL_THREAD_LOCK(&mca_odls_bproc_component.mutex);
opal_list_append(&mca_odls_bproc_component.children, &child->super);
opal_condition_signal(&mca_odls_bproc_component.cond);
OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.mutex);
}
}
} /* for kv */
} /* for j */
/* construct the list of children we are to launch */
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &jobid))) {
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"%s odls:bproc:launch:local failed to construct child list on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
goto cleanup;
}
/* set up the io files for our children */
for(item = opal_list_get_first(&mca_odls_bproc_component.children);
item != opal_list_get_end(&mca_odls_bproc_component.children);
/* set up the io files for our children */
for(item = opal_list_get_first(&orte_odls_globals.children);
item != opal_list_get_end(&orte_odls_globals.children);
item = opal_list_get_next(item)) {
child = (odls_bproc_child_t *) item;
if(0 < mca_odls_bproc_component.debug) {
opal_output(0, "orte_odls_bproc_launch: setting up io for "
"[%lu,%lu,%lu] proc rank %lu\n",
ORTE_NAME_ARGS((child->name)),
child->name->vpid);
}
child = (orte_odls_child_t *) item;
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"%s odls:bproc:launch:local setting up io for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)));
/* only setup to forward stdin if it is rank 0, otherwise connect
* to /dev/null */
* to /dev/null
*/
if(0 == child->name->vpid) {
connect_stdin = true;
} else {
@ -638,7 +471,7 @@ orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_env
}
/* message to indicate that we are ready */
ack = OBJ_NEW(orte_buffer_t);
ack = OBJ_NEW(opal_buffer_t);
rc = orte_dss.pack(ack, &src, 1, ORTE_INT);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -29,84 +29,22 @@
#include "orte_config.h"
#include <sys/bproc.h>
#include "opal/mca/mca.h"
#include "opal/threads/condition.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/odls/odls.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
/*
* Module open / close
*/
int orte_odls_bproc_component_open(void);
int orte_odls_bproc_component_close(void);
int orte_odls_bproc_finalize(void);
orte_odls_base_module_t* orte_odls_bproc_init(int *priority);
int orte_odls_bproc_component_query(mca_base_module_t **module, int *priority);
/*
* Startup / Shutdown
*/
int orte_odls_bproc_finalize(void);
ORTE_MODULE_DECLSPEC extern orte_odls_base_component_t mca_odls_bproc_component;
/*
* Interface
*/
int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc);
int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map);
int orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ);
int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state);
int orte_odls_bproc_signal_local_procs(const orte_process_name_t* proc_name, int32_t signal);
END_C_DECLS
/**
* ODLS bproc_orted component
*/
struct orte_odls_bproc_component_t {
orte_odls_base_component_t super;
/**< The base class */
int debug;
/**< If greater than 0 print debugging information */
int priority;
/**< The priority of this component. This will be returned if we determine
* that bproc is available and running on this node, */
opal_mutex_t lock;
/**< Lock used to prevent some race conditions */
opal_condition_t cond;
/**< Condition used to wake up waiting threads */
opal_list_t children;
/**< list of children on this node */
};
/**
* Convenience typedef
*/
typedef struct orte_odls_bproc_component_t orte_odls_bproc_component_t;
/*
* List object to locally store the process names and pids of
* our children. This can subsequently be used to order termination
* or pass signals without looking the info up again.
*/
typedef struct odls_bproc_child_t {
opal_list_item_t super; /* required to place this on a list */
orte_process_name_t *name; /* the OpenRTE name of the proc */
pid_t pid; /* local pid of the proc */
orte_std_cntr_t app_idx; /* index of the app_context for this proc */
bool alive; /* is this proc alive? */
} odls_bproc_child_t;
OBJ_CLASS_DECLARATION(odls_bproc_child_t);
ORTE_MODULE_DECLSPEC extern orte_odls_bproc_component_t mca_odls_bproc_component;
extern orte_odls_base_module_t orte_odls_bproc_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_ODLS_BPROC_H_ */

Просмотреть файл

@ -21,30 +21,17 @@
* Takes care of the component stuff for the MCA.
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/odls/base/odls_private.h"
#include "odls_bproc.h"
/* instance the child list object */
static void odls_bproc_child_constructor(odls_bproc_child_t *ptr)
{
ptr->name = NULL;
ptr->app_idx = -1;
ptr->alive = false;
}
static void odls_bproc_child_destructor(odls_bproc_child_t *ptr)
{
if (NULL != ptr->name) free(ptr->name);
}
OBJ_CLASS_INSTANCE(odls_bproc_child_t,
opal_list_item_t,
odls_bproc_child_constructor,
odls_bproc_child_destructor);
extern orte_odls_base_module_t orte_odls_bproc_module;
/**
* The bproc component data structure used to store all the relevent data
@ -65,17 +52,14 @@ orte_odls_bproc_component_t mca_odls_bproc_component = {
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_odls_bproc_component_open,
orte_odls_bproc_component_close
orte_odls_bproc_component_close,
orte_odls_bproc_component_query
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */
orte_odls_bproc_init,
orte_odls_bproc_finalize
}
};
/**
@ -84,42 +68,26 @@ orte_odls_bproc_component_t mca_odls_bproc_component = {
*/
int orte_odls_bproc_component_open(void)
{
/* initialize globals */
OBJ_CONSTRUCT(&mca_odls_bproc_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_odls_bproc_component.cond, opal_condition_t);
OBJ_CONSTRUCT(&mca_odls_bproc_component.children, opal_list_t);
/* lookup parameters */
mca_base_param_reg_int(&mca_odls_bproc_component.super.version,
"priority", NULL, false, false, 100,
&mca_odls_bproc_component.priority);
mca_base_param_reg_int(&mca_odls_bproc_component.super.version,
"debug", "If > 0 prints library debugging information",
false, false, 0, &mca_odls_bproc_component.debug);
return ORTE_SUCCESS;
}
/**
* Initializes the module. We do not want to run unless we are not the seed,
* bproc is running, and we are not on the master node.
* Initializes the module.
*/
orte_odls_base_module_t *orte_odls_bproc_init(int *priority)
int orte_odls_bproc_component_query(mca_base_module_t **module, int *priority)
{
int ret;
struct bproc_version_t version;
/* the base open/select logic protects us against operation when
* we are NOT in a daemon, so we don't have to check that here
*/
/* check to see if BProc is running here */
ret = bproc_version(&version);
if (ret != 0) {
return NULL;
}
*priority = mca_odls_bproc_component.priority;
return &orte_odls_bproc_module;
*priority = 30;
*module = (mca_base_module_t *)&orte_odls_bproc_module;
return ORTE_SUCCESS;
}
/**
@ -127,8 +95,10 @@ orte_odls_base_module_t *orte_odls_bproc_init(int *priority)
*/
int orte_odls_bproc_component_close(void)
{
OBJ_DESTRUCT(&mca_odls_bproc_component.lock);
OBJ_DESTRUCT(&mca_odls_bproc_component.cond);
OBJ_DESTRUCT(&mca_odls_bproc_component.children);
/* cleanup state */
while (NULL != (item = opal_list_remove_first(&orte_odls_globals.children))) {
OBJ_RELEASE(item);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -19,8 +19,8 @@
* @file:
*/
#ifndef ORTE_ODLS_H
#define ORTE_ODLS_H
#ifndef ORTE_ODLS_DEFAULT_H
#define ORTE_ODLS_DEFAULT_H
#include "orte_config.h"

Просмотреть файл

@ -32,9 +32,9 @@ endif
sources = \
plm_bproc.h \
plm_bproc_component.c \
plm_bproc.c \
plm_bproc_state.c \
plm_bproc_component.c
plm_bproc_state.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)

Просмотреть файл

@ -21,10 +21,11 @@
*/
/**
* @file:
* Part of the bproc launcher. See pls_bproc.h for an overview of how it works.
* Part of the bproc launcher. See plm_bproc.h for an overview of how it works.
*/
#include "orte_config.h"
#if HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
@ -60,72 +61,60 @@
#include "opal/util/trace.h"
#include "orte/dss/dss.h"
#include "orte/util/sys_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/schema/schema_types.h"
#include "orte/mca/smr/smr.h"
#include "orte/runtime/orte_wait.h"
#include "orte/runtime/runtime.h"
#include "orte/runtime/params.h"
#include "orte/mca/pls/base/pls_private.h"
#include "pls_bproc.h"
#include "orte/mca/plm/base/plm_private.h"
#include "plm_bproc.h"
static bool daemons_launched;
static bool bynode;
#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS
int orte_pls_bproc_launch_threaded(orte_jobid_t);
#endif
static int plm_tm_init(void);
static int plm_tm_launch_job(orte_job_t *jdata);
static int plm_tm_terminate_job(orte_jobid_t jobid);
static int plm_tm_terminate_orteds(void);
static int plm_tm_signal_job(orte_jobid_t jobid, int32_t signal);
static int plm_tm_finalize(void);
static int plm_tm_connect(void);
static int plm_tm_disconnect(void);
/**
* Initialization of the bproc module with all the needed function pointers
*/
orte_pls_base_module_t orte_pls_bproc_module = {
#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS
orte_pls_bproc_launch_threaded,
#else
orte_pls_bproc_launch,
#endif
orte_pls_bproc_terminate_job,
orte_pls_bproc_terminate_orteds,
orte_pls_bproc_terminate_proc,
orte_pls_bproc_signal_job,
orte_pls_bproc_signal_proc,
orte_pls_bproc_cancel_operation,
orte_pls_bproc_finalize
orte_plm_base_module_t orte_plm_tm_module = {
plm_bproc_init,
orte_plm_base_set_hnp_name,
plm_bproc_launch_job,
NULL,
plm_bproc_terminate_job,
plm_bproc_terminate_orteds,
plm_bproc_signal_job,
plm_bproc_finalize
};
static int orte_pls_bproc_node_list(orte_job_map_t *map,
int *node_array, int * num_nodes,
int num_procs);
static int orte_pls_bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io,
int node_rank, int app_context);
static void orte_pls_bproc_waitpid_cb(pid_t wpid, int status, void *data);
static void orte_pls_bproc_waitpid_daemon_cb(pid_t wpid, int status, void *data);
#ifdef MCA_pls_bproc_scyld
/* compatibility functions for scyld bproc and pre 3.2.0 LANL bproc */
static int bproc_vexecmove_io(int nnodes, int *nodes, int *pids,
struct bproc_io_t *io, int iolen, const char *cmd,
char * const argv[], char * envp[]);
static int bproc_vexecmove(int nnodes, int *nodes, int *pids, const char *cmd,
char * const argv[], char * envp[]);
#endif
static void orte_pls_bproc_setup_env(char *** env);
static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp);
static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
orte_vpid_t vpid_start, int app_context);
/**
* Init the module
*/
static int plm_bproc_init(void)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/**
* Creates a list of nodes from a job map that should participate in the next launch cycle.
@ -134,10 +123,11 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
* @param num_nodes a pointer to the place where we will store the number of nodes in the array
* @param num_procs the number of processes that a node must have to be placed on the list
*/
static int orte_pls_bproc_node_list(orte_job_map_t *map, int *node_array, int *num_nodes, int num_procs)
static int bproc_node_list(orte_job_map_t *map, int *node_array, int *num_nodes, int num_procs)
{
opal_list_item_t *item;
orte_mapped_node_t *node;
orte_node_t **nodes;
orte_std_cntr_t i;
OPAL_TRACE(1);
@ -146,15 +136,13 @@ static int orte_pls_bproc_node_list(orte_job_map_t *map, int *node_array, int *n
memset((void*)node_array, -1, sizeof(int) * map->num_nodes);
/* build the node list */
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
if (node->num_procs >= num_procs) {
node_array[(*num_nodes)++] = atoi(node->nodename);
nodes = (orte_node_t**)map->nodes->addr;
for (i=0; i < map->num_nodes; i++) {
if (nodes[i]->num_procs >= num_procs) {
node_array[(*num_nodes)++] = atoi(nodes[i]->name);
}
}
return ORTE_SUCCESS;
}
@ -168,21 +156,12 @@ static int orte_pls_bproc_node_list(orte_job_map_t *map, int *node_array, int *n
* @retval ORTE_SUCCESS
* @retval error
*/
static int orte_pls_bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io,
int node_rank, int app_context) {
static int bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io,
int node_rank, int app_context) {
char *frontend = NULL, *path = NULL, *job = NULL;
int rc, i;
OPAL_TRACE(1);
/* ensure that system info is set */
orte_sys_info();
if (NULL == orte_system_info.user) { /* error condition */
return ORTE_ERROR;
}
if (NULL == orte_universe_info.name) { /* error condition */
return ORTE_ERROR;
}
rc = orte_ns.convert_jobid_to_string(&job, jobid);
if(ORTE_SUCCESS != rc) {
@ -204,7 +183,7 @@ static int orte_pls_bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
if (mca_pls_bproc_component.debug) {
if (mca_plm_bproc_component.debug) {
opal_output(0, "mpirun bproc io setup. Path: %s\n", path);
}
io[i].fd = i;
@ -245,7 +224,7 @@ static int orte_pls_bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io,
* @param status tells why the process died
* @param data a pointer to the process's name
*/
static void orte_pls_bproc_waitpid_cb(pid_t wpid, int status, void *data) {
static void orte_plm_bproc_waitpid_cb(pid_t wpid, int status, void *data) {
orte_process_name_t * proc = (orte_process_name_t*) data;
int rc;
@ -270,7 +249,7 @@ static void orte_pls_bproc_waitpid_cb(pid_t wpid, int status, void *data) {
* @param status tells why the daemon died
* @param data a pointer to the node the daemon was on
*/
static void orte_pls_bproc_waitpid_daemon_cb(pid_t wpid, int status, void *data) {
static void orte_plm_bproc_waitpid_daemon_cb(pid_t wpid, int status, void *data) {
OPAL_TRACE(1);
@ -295,19 +274,19 @@ static void orte_pls_bproc_waitpid_daemon_cb(pid_t wpid, int status, void *data)
ORTE_ERROR_LOG(rc);
}
}
OPAL_THREAD_LOCK(&mca_pls_bproc_component.lock);
if(0 < mca_pls_bproc_component.num_daemons) {
mca_pls_bproc_component.num_daemons--;
OPAL_THREAD_LOCK(&mca_plm_bproc_component.lock);
if(0 < mca_plm_bproc_component.num_daemons) {
mca_plm_bproc_component.num_daemons--;
}
opal_condition_signal(&mca_pls_bproc_component.condition);
OPAL_THREAD_UNLOCK(&mca_pls_bproc_component.lock);
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "in orte_pls_bproc_waitpid_daemon_cb, %d daemons left\n",
mca_pls_bproc_component.num_daemons);
opal_condition_signal(&mca_plm_bproc_component.condition);
OPAL_THREAD_UNLOCK(&mca_plm_bproc_component.lock);
if(0 < mca_plm_bproc_component.debug) {
opal_output(0, "in orte_plm_bproc_waitpid_daemon_cb, %d daemons left\n",
mca_plm_bproc_component.num_daemons);
}
}
#ifdef MCA_pls_bproc_scyld
#ifdef MCA_plm_bproc_scyld
/**
* compatibility function for scyld bproc and pre 3.2.0 LANL bproc. See the
* bproc documentation for details
@ -331,12 +310,12 @@ static int bproc_vexecmove_io(int nnodes, int *nodes, int *pids,
opal_setenv("BPROC_RANK", rank, true, &envp);
bproc_execmove_io(nodes[i], io, iolen, cmd, argv, envp);
/* if we get here, there was an error */
opal_show_help("help-pls-bproc.txt", "bproc-vexecmove-launch", true,
opal_show_help("help-plm-bproc.txt", "bproc-vexecmove-launch", true,
cmd, nodes[i], errno);
ORTE_ERROR_LOG(ORTE_ERROR);
exit(-1);
} else if(-1 == pids[i]) {
opal_show_help("help-pls-bproc.txt", "bproc-vexecmove-fork", true,
opal_show_help("help-plm-bproc.txt", "bproc-vexecmove-fork", true,
errno);
ORTE_ERROR_LOG(ORTE_ERROR);
return -1;
@ -359,7 +338,7 @@ static int bproc_vexecmove(int nnodes, int *nodes, int *pids, const char *cmd,
* Sets up the passed environment for processes launched by the bproc launcher.
* @param env a pointer to the environment to setup
*/
static void orte_pls_bproc_setup_env(char *** env)
static void orte_plm_bproc_setup_env(char *** env)
{
char ** merged;
char * var;
@ -386,7 +365,7 @@ static void orte_pls_bproc_setup_env(char *** env)
/* make sure the username used to create the bproc directory is the same on
* the backend as the frontend */
var = mca_base_param_environ_variable("pls","bproc","username");
var = mca_base_param_environ_variable("plm","bproc","username");
opal_setenv(var, orte_system_info.user, true, env);
free(var);
@ -435,7 +414,7 @@ static void orte_pls_bproc_setup_env(char *** env)
* @retval ORTE_SUCCESS
* @retval error
*/
static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
static int orte_plm_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
int * daemon_list = NULL;
int num_daemons = 0;
int rc, i;
@ -450,15 +429,15 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
orte_std_cntr_t idx;
struct stat buf;
opal_list_t daemons;
orte_pls_daemon_info_t *dmn;
orte_plm_daemon_info_t *dmn;
opal_list_item_t *item;
struct timeval joblaunchstart, launchstart, launchstop;
OPAL_TRACE(1);
if (orte_pls_base.timing) {
if (orte_plm_base.timing) {
if (0 != gettimeofday(&joblaunchstart, NULL)) {
opal_output(0, "pls_bproc: could not obtain start time");
opal_output(0, "plm_bproc: could not obtain start time");
}
}
@ -514,7 +493,7 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
}
/* setup the daemon environment */
orte_pls_bproc_setup_env(envp);
orte_plm_bproc_setup_env(envp);
/* direct the daemons to drop contact files so the local procs
* can learn how to contact them - this is used for routing
@ -528,7 +507,7 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
* push that value into their environment */
stride = 1;
asprintf(&param, "%ld", (long)stride);
var = mca_base_param_environ_variable("pls", "bproc", "stride");
var = mca_base_param_environ_variable("plm", "bproc", "stride");
opal_setenv(var, param, true, envp);
free(param);
free(var);
@ -542,10 +521,10 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
}
argc = 0;
opal_argv_append(&argc, &argv, mca_pls_bproc_component.orted);
opal_argv_append(&argc, &argv, mca_plm_bproc_component.orted);
/* check for debug flags */
#if 0
if (mca_pls_bproc_component.debug) {
if (mca_plm_bproc_component.debug) {
opal_argv_append(&argc, &argv, "--debug");
opal_argv_append(&argc, &argv, "--debug-daemons");
}
@ -567,20 +546,20 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
opal_argv_append(&argc, &argv, "--no-daemonize");
/* find orted */
if(0 == stat(mca_pls_bproc_component.orted, &buf)) {
orted_path = strdup(mca_pls_bproc_component.orted);
if(0 == stat(mca_plm_bproc_component.orted, &buf)) {
orted_path = strdup(mca_plm_bproc_component.orted);
} else {
orted_path = opal_path_findv(mca_pls_bproc_component.orted, 0, environ, NULL);
orted_path = opal_path_findv(mca_plm_bproc_component.orted, 0, environ, NULL);
if(NULL == orted_path) {
orted_path = opal_os_path( false, opal_install_dirs.bindir, mca_pls_bproc_component.orted, NULL );
orted_path = opal_os_path( false, opal_install_dirs.bindir, mca_plm_bproc_component.orted, NULL );
if( (NULL != orted_path) || (0 != stat(orted_path, &buf)) ) {
char *path = getenv("PATH");
if (NULL == path) {
path = ("PATH is empty!");
}
opal_show_help("help-pls-bproc.txt", "no-orted", true,
mca_pls_bproc_component.orted,
mca_pls_bproc_component.orted, path, opal_install_dirs.bindir);
opal_show_help("help-plm-bproc.txt", "no-orted", true,
mca_plm_bproc_component.orted,
mca_plm_bproc_component.orted, path, opal_install_dirs.bindir);
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -588,50 +567,50 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
}
}
if(0 < mca_pls_bproc_component.debug) {
if(0 < mca_plm_bproc_component.debug) {
opal_output(0, "PLS_BPROC DEBUG: launching %d daemons. cmd: %s ",
num_daemons, orted_path);
}
/* launch the daemons */
if (orte_pls_base.timing) {
if (orte_plm_base.timing) {
if (0 != gettimeofday(&launchstart, NULL)) {
opal_output(0, "pls_bproc: could not obtain start time");
opal_output(0, "plm_bproc: could not obtain start time");
}
}
if (mca_pls_bproc_component.do_not_launch) {
if (mca_plm_bproc_component.do_not_launch) {
for (i=0; i < num_daemons; i++) pids[i] = i+1;
rc = num_daemons;
} else {
rc = bproc_vexecmove(num_daemons, daemon_list, pids, orted_path, argv, *envp);
}
if (orte_pls_base.timing) {
if (orte_plm_base.timing) {
if (0 != gettimeofday(&launchstop, NULL)) {
opal_output(0, "pls_bproc: could not obtain stop time");
opal_output(0, "plm_bproc: could not obtain stop time");
} else {
opal_output(0, "pls_bproc: daemon launch time is %ld usec",
opal_output(0, "plm_bproc: daemon launch time is %ld usec",
(launchstop.tv_sec - launchstart.tv_sec)*1000000 +
(launchstop.tv_usec - launchstart.tv_usec));
}
}
if(rc != num_daemons) {
opal_show_help("help-pls-bproc.txt", "daemon-launch-number", true,
opal_show_help("help-plm-bproc.txt", "daemon-launch-number", true,
num_daemons, rc, orted_path);
rc = ORTE_ERROR;
goto cleanup;
}
if(0 < mca_pls_bproc_component.debug) {
if(0 < mca_plm_bproc_component.debug) {
opal_output(0, "PLS_BPROC DEBUG: %d daemons launched. First pid: %d\n",
rc, *pids);
}
for(i = 0; i < num_daemons; i++) {
if(0 >= pids[i]) {
opal_show_help("help-pls-bproc.txt", "daemon-launch-bad-pid", true,
opal_show_help("help-plm-bproc.txt", "daemon-launch-bad-pid", true,
daemon_list[i], pids[i], errno, orted_path);
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
@ -642,13 +621,13 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
rc = orte_pls_bproc_set_node_pid(ORTE_PROC_MY_NAME->cellid, param, map->job, pids[i]);
rc = orte_plm_bproc_set_node_pid(ORTE_PROC_MY_NAME->cellid, param, map->job, pids[i]);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn = OBJ_NEW(orte_plm_daemon_info_t);
rc = orte_ns.create_process_name(&(dmn->name), ORTE_PROC_MY_NAME->cellid, 0,
daemon_vpid_start + i);
if(ORTE_SUCCESS != rc) {
@ -665,7 +644,7 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
}
/* store the daemon info */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
@ -673,9 +652,9 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
* daemon info so that short-lived apps don't cause mpirun to
* try and terminate the orteds before we record them
*/
if (!mca_pls_bproc_component.do_not_launch) {
if (!mca_plm_bproc_component.do_not_launch) {
for (i=0; i < num_daemons; i++) {
rc = orte_wait_cb(pids[i], orte_pls_bproc_waitpid_daemon_cb,
rc = orte_wait_cb(pids[i], orte_plm_bproc_waitpid_daemon_cb,
&daemon_list[i]);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -705,17 +684,17 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
if(-1 == src[0]) {
/* one of the daemons has failed to properly launch. The error is sent
* by orte_pls_bproc_waitpid_daemon_cb */
* by orte_plm_bproc_waitpid_daemon_cb */
if(-1 == src[1]) { /* did not die on a signal */
opal_show_help("help-pls-bproc.txt", "daemon-died-no-signal", true,
opal_show_help("help-plm-bproc.txt", "daemon-died-no-signal", true,
src[2], src[3]);
} else { /* died on a signal */
opal_show_help("help-pls-bproc.txt", "daemon-died-signal", true,
opal_show_help("help-plm-bproc.txt", "daemon-died-signal", true,
src[2], src[3], src[1]);
}
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
orte_pls_bproc_terminate_job(map->job, &orte_abort_timeout, NULL);
orte_plm_bproc_terminate_job(map->job, &orte_abort_timeout, NULL);
goto cleanup;
}
}
@ -724,11 +703,11 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) {
/* indicate that the daemons have now launched */
daemons_launched = true;
if (orte_pls_base.timing) {
if (orte_plm_base.timing) {
if (0 != gettimeofday(&launchstop, NULL)) {
opal_output(0, "pls_bproc: could not obtain stop time");
opal_output(0, "plm_bproc: could not obtain stop time");
} else {
opal_output(0, "pls_bproc: total job launch time is %ld usec",
opal_output(0, "plm_bproc: total job launch time is %ld usec",
(launchstop.tv_sec - joblaunchstart.tv_sec)*1000000 +
(launchstop.tv_usec - joblaunchstart.tv_usec));
}
@ -754,7 +733,7 @@ cleanup:
static int
orte_pls_bproc_node_failed(orte_gpr_notify_message_t *msg)
orte_plm_bproc_node_failed(orte_gpr_notify_message_t *msg)
{
orte_jobid_t job;
@ -778,10 +757,10 @@ orte_pls_bproc_node_failed(orte_gpr_notify_message_t *msg)
orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target);
/* terminate all jobs in the in the job family */
orte_pls_bproc_terminate_job(job, &orte_abort_timeout, NULL);
orte_plm_bproc_terminate_job(job, &orte_abort_timeout, NULL);
/* kill the daemons */
orte_pls_bproc_terminate_job(0, &orte_abort_timeout, NULL);
orte_plm_bproc_terminate_job(0, &orte_abort_timeout, NULL);
/* shouldn't ever get here.. */
exit(1);
@ -803,7 +782,7 @@ orte_pls_bproc_node_failed(orte_gpr_notify_message_t *msg)
* @retval ORTE_SUCCESS
* @retval error
*/
static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
static int orte_plm_bproc_launch_app(orte_job_map_t* map, int num_slots,
orte_vpid_t vpid_start, int app_context) {
int *node_array, num_nodes, cycle;
int rc, i, j, stride;
@ -822,7 +801,7 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
/* set up app context */
asprintf(&param, "%d", app_context);
var = mca_base_param_environ_variable("pls", "bproc", "app_context");
var = mca_base_param_environ_variable("plm", "bproc", "app_context");
opal_setenv(var, param, true, &env);
free(param);
free(var);
@ -850,7 +829,7 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
}
/* and push that value into the process' environment */
asprintf(&param, "%ld", (long)stride);
var = mca_base_param_environ_variable("pls", "bproc", "stride");
var = mca_base_param_environ_variable("plm", "bproc", "stride");
opal_setenv(var, param, true, &env);
free(param);
free(var);
@ -883,20 +862,20 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
i = 1;
num_processes = map->vpid_range;
rc = orte_pls_bproc_node_list(map, node_array, &num_nodes, i);
rc = orte_plm_bproc_node_list(map, node_array, &num_nodes, i);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
opal_output_verbose(1, orte_pls_base.pls_output,
opal_output_verbose(1, orte_plm_base.plm_output,
"launching app %s", map->apps[app_context]->app);
while(0 != num_nodes) {
if (0 < mca_pls_bproc_component.debug) {
opal_output_verbose(1, orte_pls_base.pls_output,
if (0 < mca_plm_bproc_component.debug) {
opal_output_verbose(1, orte_plm_base.plm_output,
"\tlaunching cycle %d", i);
for (dbg=0; dbg<num_nodes; dbg++) {
opal_output_verbose(1, orte_pls_base.pls_output,
opal_output_verbose(1, orte_plm_base.plm_output,
"\t\tlaunching on node %d", node_array[dbg]);
}
}
@ -909,13 +888,13 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
goto cleanup;
}
rc = orte_pls_bproc_setup_io(map->job, bproc_io, i - 1, app_context);
rc = orte_plm_bproc_setup_io(map->job, bproc_io, i - 1, app_context);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: launching %d processes:", num_nodes);
if(0 < mca_plm_bproc_component.debug) {
opal_output(0, "plm_bproc: launching %d processes:", num_nodes);
}
/* allocate space for bproc to return the pids */
@ -926,7 +905,7 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
goto cleanup;
}
if (mca_pls_bproc_component.do_not_launch) {
if (mca_plm_bproc_component.do_not_launch) {
for (j=0; j < num_nodes; j++) pids[j] = j+1;
rc = num_nodes;
} else {
@ -935,12 +914,12 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
map->apps[app_context]->argv, env);
}
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: %d processes launched. First pid: %d",
if(0 < mca_plm_bproc_component.debug) {
opal_output(0, "plm_bproc: %d processes launched. First pid: %d",
rc, *pids);
}
if(rc != num_nodes) {
opal_show_help("help-pls-bproc.txt", "proc-launch-number", true,
opal_show_help("help-plm-bproc.txt", "proc-launch-number", true,
num_nodes, rc, map->apps[app_context]->app);
rc = ORTE_ERROR;
goto cleanup;
@ -948,7 +927,7 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
for(j = 0; j < num_nodes; j++) {
if(0 >= pids[j]) {
opal_show_help("help-pls-bproc.txt", "proc-launch-bad-pid", true,
opal_show_help("help-plm-bproc.txt", "proc-launch-bad-pid", true,
node_array[j], pids[j], errno, map->apps[app_context]->app);
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
@ -960,13 +939,13 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
ORTE_ERROR_LOG(rc);
goto cleanup;
}
orte_pls_bproc_set_proc_pid(proc_name, pids[j], node_array[j]);
orte_plm_bproc_set_proc_pid(proc_name, pids[j], node_array[j]);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (!mca_pls_bproc_component.do_not_launch) {
rc = orte_wait_cb(pids[j], orte_pls_bproc_waitpid_cb, proc_name);
if (!mca_plm_bproc_component.do_not_launch) {
rc = orte_wait_cb(pids[j], orte_plm_bproc_waitpid_cb, proc_name);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -998,7 +977,7 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots,
}
}
rc = orte_pls_bproc_node_list(map, node_array, &num_nodes, i);
rc = orte_plm_bproc_node_list(map, node_array, &num_nodes, i);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -1017,7 +996,7 @@ cleanup:
}
/**
* The main bproc launcher. See pls_bproc.h for a high level overview of how
* The main bproc launcher. See plm_bproc.h for a high level overview of how
* the bproc launching works.
* Here we:
* -# Launch the deamons on the backend nodes.
@ -1029,7 +1008,7 @@ cleanup:
* @retval ORTE_SUCCESS
* @retval error
*/
int orte_pls_bproc_launch(orte_jobid_t jobid) {
int orte_plm_bproc_launch(orte_jobid_t jobid) {
orte_job_map_t* map;
orte_mapped_node_t *map_node;
orte_vpid_t vpid_launch;
@ -1044,8 +1023,8 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
OPAL_TRACE(1);
/* make sure the pls_bproc receive function has been started */
if (ORTE_SUCCESS != (rc = orte_pls_bproc_comm_start())) {
/* make sure the plm_bproc receive function has been started */
if (ORTE_SUCCESS != (rc = orte_plm_bproc_comm_start())) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -1126,7 +1105,7 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
while (NULL != (ras_node = (orte_ras_node_t*)opal_list_remove_first(&nodelist))) {
if (num_slots != ras_node->node_slots) {
/* mismatch - error out */
opal_show_help("help-pls-bproc.txt", "mismatched-slots", true);
opal_show_help("help-plm-bproc.txt", "mismatched-slots", true);
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
rc = ORTE_ERR_NOT_SUPPORTED;
goto cleanup;
@ -1136,8 +1115,8 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
OBJ_DESTRUCT(&nodelist);
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: --- starting to launch procs ---");
if(0 < mca_plm_bproc_component.debug) {
opal_output(0, "plm_bproc: --- starting to launch procs ---");
}
/* save the daemon environment */
@ -1145,19 +1124,19 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
/* for each application context, setup its env */
for(i=0; i < map->num_apps; i++) {
orte_pls_bproc_setup_env(&map->apps[i]->env);
orte_plm_bproc_setup_env(&map->apps[i]->env);
}
/* tell the smr which nodes to monitor so we can be notified
when the node's state changes, useful for aborting when
a bproc node up and dies */
if (ORTE_SUCCESS != (rc = orte_smr.begin_monitoring(map, orte_pls_bproc_node_failed, NULL))) {
if (ORTE_SUCCESS != (rc = orte_smr.begin_monitoring(map, orte_plm_bproc_node_failed, NULL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* launch the daemons on all nodes which have processes assigned to them */
rc = orte_pls_bproc_launch_daemons(map, &daemon_env);
rc = orte_plm_bproc_launch_daemons(map, &daemon_env);
opal_argv_free(daemon_env);
if(ORTE_SUCCESS != rc) {
@ -1174,7 +1153,7 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
goto cleanup;
}
rc = orte_pls_bproc_launch_app(map, num_slots, vpid_launch, context);
rc = orte_plm_bproc_launch_app(map, num_slots, vpid_launch, context);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -1187,7 +1166,7 @@ cleanup:
OBJ_RELEASE(map);
if (mca_pls_bproc_component.do_not_launch) {
if (mca_plm_bproc_component.do_not_launch) {
/* indicate that we failed to launch, but do so silently */
return ORTE_ERR_SILENT;
}
@ -1197,25 +1176,25 @@ cleanup:
/**
* Terminate all processes associated with this job */
int orte_pls_bproc_terminate_job(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs) {
int orte_plm_bproc_terminate_job(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs) {
pid_t* pids;
orte_std_cntr_t i, num_pids;
int rc;
OPAL_TRACE(1);
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "orte_pls_bproc: terminating job %ld", jobid);
if(0 < mca_plm_bproc_component.debug) {
opal_output(0, "orte_plm_bproc: terminating job %ld", jobid);
}
/* kill application process */
if(ORTE_SUCCESS != (rc = orte_pls_bproc_get_proc_pids(jobid, &pids, &num_pids, attrs)))
if(ORTE_SUCCESS != (rc = orte_plm_bproc_get_proc_pids(jobid, &pids, &num_pids, attrs)))
return rc;
for(i=0; i<num_pids; i++) {
if(mca_pls_bproc_component.debug) {
opal_output(0, "orte_pls_bproc: killing proc: %d\n", pids[i]);
if(mca_plm_bproc_component.debug) {
opal_output(0, "orte_plm_bproc: killing proc: %d\n", pids[i]);
}
kill(pids[i], mca_pls_bproc_component.terminate_sig);
kill(pids[i], mca_plm_bproc_component.terminate_sig);
}
if(NULL != pids)
free(pids);
@ -1227,7 +1206,7 @@ int orte_pls_bproc_terminate_job(orte_jobid_t jobid, struct timeval *timeout, op
/**
* Terminate the orteds for a given job
*/
int orte_pls_bproc_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs)
int orte_plm_bproc_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs)
{
int rc;
opal_list_t daemons;
@ -1237,13 +1216,13 @@ int orte_pls_bproc_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout,
/* construct the list of active daemons on this job */
OBJ_CONSTRUCT(&daemons, opal_list_t);
if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid, attrs))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_get_active_daemons(&daemons, jobid, attrs))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* now tell them to die! */
if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(&daemons, timeout))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(&daemons, timeout))) {
ORTE_ERROR_LOG(rc);
}
@ -1258,15 +1237,15 @@ CLEANUP:
/**
* Terminate a specific process.
*/
int orte_pls_bproc_terminate_proc(const orte_process_name_t* proc_name) {
int orte_plm_bproc_terminate_proc(const orte_process_name_t* proc_name) {
int rc;
pid_t pid;
OPAL_TRACE(1);
if(ORTE_SUCCESS != (rc = orte_pls_bproc_get_proc_pid(proc_name, &pid)))
if(ORTE_SUCCESS != (rc = orte_plm_bproc_get_proc_pid(proc_name, &pid)))
return rc;
if(kill(pid, mca_pls_bproc_component.terminate_sig) != 0) {
if(kill(pid, mca_plm_bproc_component.terminate_sig) != 0) {
switch(errno) {
case EINVAL:
return ORTE_ERR_BAD_PARAM;
@ -1284,7 +1263,7 @@ int orte_pls_bproc_terminate_proc(const orte_process_name_t* proc_name) {
/**
* Signal all processes associated with this job
*/
int orte_pls_bproc_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *attrs) {
int orte_plm_bproc_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *attrs) {
pid_t* pids;
orte_std_cntr_t i, num_pids;
int rc;
@ -1292,11 +1271,11 @@ int orte_pls_bproc_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *a
OPAL_TRACE(1);
/* signal application process */
if(ORTE_SUCCESS != (rc = orte_pls_bproc_get_proc_pids(jobid, &pids, &num_pids, attrs)))
if(ORTE_SUCCESS != (rc = orte_plm_bproc_get_proc_pids(jobid, &pids, &num_pids, attrs)))
return rc;
for(i=0; i<num_pids; i++) {
if(mca_pls_bproc_component.debug) {
opal_output(0, "orte_pls_bproc: signaling proc: %d\n", pids[i]);
if(mca_plm_bproc_component.debug) {
opal_output(0, "orte_plm_bproc: signaling proc: %d\n", pids[i]);
}
kill(pids[i], (int)signal);
}
@ -1310,13 +1289,13 @@ int orte_pls_bproc_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *a
/**
* Signal a specific process.
*/
int orte_pls_bproc_signal_proc(const orte_process_name_t* proc_name, int32_t signal) {
int orte_plm_bproc_signal_proc(const orte_process_name_t* proc_name, int32_t signal) {
int rc;
pid_t pid;
OPAL_TRACE(1);
if(ORTE_SUCCESS != (rc = orte_pls_bproc_get_proc_pid(proc_name, &pid)))
if(ORTE_SUCCESS != (rc = orte_plm_bproc_get_proc_pid(proc_name, &pid)))
return rc;
if(kill(pid, (int)signal) != 0) {
switch(errno) {
@ -1336,13 +1315,13 @@ int orte_pls_bproc_signal_proc(const orte_process_name_t* proc_name, int32_t sig
/**
* Cancel an operation involving comm to an orted
*/
int orte_pls_bproc_cancel_operation(void)
int orte_plm_bproc_cancel_operation(void)
{
int rc;
OPAL_TRACE(1);
if (ORTE_SUCCESS != (rc = orte_pls_base_orted_cancel_operation())) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_cancel_operation())) {
ORTE_ERROR_LOG(rc);
}
@ -1353,7 +1332,7 @@ int orte_pls_bproc_cancel_operation(void)
/**
* Module cleanup
*/
int orte_pls_bproc_finalize(void)
int orte_plm_bproc_finalize(void)
{
return ORTE_SUCCESS;
}
@ -1365,16 +1344,16 @@ int orte_pls_bproc_finalize(void)
#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS
struct orte_pls_bproc_stack_t {
struct orte_plm_bproc_stack_t {
opal_condition_t cond;
opal_mutex_t mutex;
bool complete;
orte_jobid_t jobid;
int rc;
};
typedef struct orte_pls_bproc_stack_t orte_pls_bproc_stack_t;
typedef struct orte_plm_bproc_stack_t orte_plm_bproc_stack_t;
static void orte_pls_bproc_stack_construct(orte_pls_bproc_stack_t* stack)
static void orte_plm_bproc_stack_construct(orte_plm_bproc_stack_t* stack)
{
OBJ_CONSTRUCT(&stack->mutex, opal_mutex_t);
OBJ_CONSTRUCT(&stack->cond, opal_condition_t);
@ -1382,40 +1361,40 @@ static void orte_pls_bproc_stack_construct(orte_pls_bproc_stack_t* stack)
stack->complete = false;
}
static void orte_pls_bproc_stack_destruct(orte_pls_bproc_stack_t* stack)
static void orte_plm_bproc_stack_destruct(orte_plm_bproc_stack_t* stack)
{
OBJ_DESTRUCT(&stack->mutex);
OBJ_DESTRUCT(&stack->cond);
}
static OBJ_CLASS_INSTANCE(
orte_pls_bproc_stack_t,
orte_plm_bproc_stack_t,
opal_object_t,
orte_pls_bproc_stack_construct,
orte_pls_bproc_stack_destruct);
orte_plm_bproc_stack_construct,
orte_plm_bproc_stack_destruct);
static void orte_pls_bproc_launch_cb(int fd, short event, void* args)
static void orte_plm_bproc_launch_cb(int fd, short event, void* args)
{
orte_pls_bproc_stack_t *stack = (orte_pls_bproc_stack_t*)args;
stack->rc = orte_pls_bproc_launch(stack->jobid);
orte_plm_bproc_stack_t *stack = (orte_plm_bproc_stack_t*)args;
stack->rc = orte_plm_bproc_launch(stack->jobid);
OPAL_THREAD_LOCK(&stack->mutex);
stack->complete = true;
opal_condition_signal(&stack->cond);
OPAL_THREAD_UNLOCK(&stack->mutex);
}
int orte_pls_bproc_launch_threaded(orte_jobid_t jobid)
int orte_plm_bproc_launch_threaded(orte_jobid_t jobid)
{
struct timeval tv = { 0, 0 };
struct opal_event event;
struct orte_pls_bproc_stack_t stack;
struct orte_plm_bproc_stack_t stack;
OBJ_CONSTRUCT(&stack, orte_pls_bproc_stack_t);
OBJ_CONSTRUCT(&stack, orte_plm_bproc_stack_t);
stack.jobid = jobid;
opal_evtimer_set(&event, orte_pls_bproc_launch_cb, &stack);
opal_evtimer_set(&event, orte_plm_bproc_launch_cb, &stack);
opal_evtimer_add(&event, &tv);
OPAL_THREAD_LOCK(&stack.mutex);

Просмотреть файл

@ -20,131 +20,37 @@
*/
/**
* @file:
* Header file for the bproc launcher. This launcher is actually split into 2
* modules: pls_bproc & pls_bproc_orted. The general idea behind this launcher is:
* -# pls_bproc is called by orterun. It figures out the process mapping and
* launches orted's on the nodes
* -# pls_bproc_orted is called by orted. This module intializes either a pty or
* pipes, places symlinks to them in well know points of the filesystem, and
* sets up the io forwarding. It then sends an ack back to orterun.
* -# pls_bproc waits for an ack to come back from the orteds, then does several
* parallel launches of the application processes. The number of launches is
* equal to the maximum number of processes on a node. For example, if there
* were 2 processes assigned to node 1, and 1 process asigned to node 2, we
* would do a parallel launch that launches on process on each node, then
* another which launches another process on node 1.
*/
#ifndef ORTE_PLS_BPROC_H_
#define ORTE_PLS_BPROC_H_
#ifndef ORTE_PLM_BPROC_H_
#define ORTE_PLM_BPROC_H_
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/constants.h"
#include <sys/bproc.h>
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "orte/mca/plm/base/base.h"
#include "opal/threads/condition.h"
#include "orte/class/orte_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/pls/base/base.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Module open / close
*/
int orte_pls_bproc_component_open(void);
int orte_pls_bproc_component_close(void);
/*
* Startup / Shutdown
*/
orte_pls_base_module_t* orte_pls_bproc_init(int *priority);
int orte_pls_bproc_finalize(void);
/*
* Interface
*/
int orte_pls_bproc_launch(orte_jobid_t);
int orte_pls_bproc_terminate_job(orte_jobid_t, struct timeval *timeout, opal_list_t*);
int orte_pls_bproc_terminate_proc(const orte_process_name_t* proc_name);
int orte_pls_bproc_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout, opal_list_t*);
int orte_pls_bproc_signal_job(orte_jobid_t, int32_t, opal_list_t*);
int orte_pls_bproc_signal_proc(const orte_process_name_t* proc_name, int32_t);
int orte_pls_bproc_cancel_operation(void);
/* Utility routine to get/set process pid */
ORTE_DECLSPEC int orte_pls_bproc_set_proc_pid(const orte_process_name_t*, pid_t, int);
ORTE_DECLSPEC int orte_pls_bproc_get_proc_pid(const orte_process_name_t*, pid_t*);
/**
* Utility routine to retreive all process pids w/in a specified job.
*/
ORTE_DECLSPEC int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t** pids,
orte_std_cntr_t* num_pids,
opal_list_t *attrs);
BEGIN_C_DECLS
/**
* Utility routine to get/set daemon pid
* PLM bproc Component
*/
ORTE_DECLSPEC int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid);
ORTE_DECLSPEC int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids);
/* utility functions for abort communications */
int orte_pls_bproc_comm_start(void);
int orte_pls_bproc_comm_stop(void);
void orte_pls_bproc_recv(int status, orte_process_name_t* sender,
orte_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
/**
* PLS bproc Component
*/
struct orte_pls_bproc_component_t {
orte_pls_base_component_t super;
struct orte_plm_bproc_component_t {
/**< The base class */
char * orted;
orte_plm_base_component_t super;
/**< The orted executable. This can be an absolute path, or if not found
* we will look for it in the user's path */
int debug;
/**< If greater than 0 print debugging information */
int priority;
/**< The priority of this component. This will be returned if we determine
* that bproc is available and running on this node, */
int terminate_sig;
/**< The signal that gets sent to a process to kill it. */
opal_mutex_t lock;
/**< Lock used to prevent some race conditions */
opal_condition_t condition;
/**< Condition that is signaled when all the daemons have died */
bool recv_issued;
/**< Indicates that the comm recv for reporting abnormal proc termination
* has been issued
*/
bool do_not_launch;
/**< for test purposes, do everything but the actual launch */
orte_std_cntr_t num_daemons;
/**< track the number of daemons being launched so we can tell when
* all have reported in */
char * orted;
};
/**
* Convenience typedef
*/
typedef struct orte_pls_bproc_component_t orte_pls_bproc_component_t;
typedef struct orte_plm_bproc_component_t orte_plm_bproc_component_t;
ORTE_DECLSPEC orte_pls_bproc_component_t mca_pls_bproc_component;
ORTE_DECLSPEC orte_pls_base_module_t orte_pls_bproc_module;
ORTE_DECLSPEC extern orte_plm_bproc_component_t mca_plm_bproc_component;
ORTE_DECLSPEC extern orte_plm_base_module_t orte_plm_bproc_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_PLS_BPROC_H_ */
END_C_DECLS
#endif /* ORTE_PLM_BPROC_H_ */

Просмотреть файл

@ -22,73 +22,65 @@
* Takes care of the component stuff for the MCA.
*/
#include "orte_config.h"
#include "orte/mca/errmgr/errmgr.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "pls_bproc.h"
#include "plm_bproc.h"
/*
* Public string showing the plm ompi_bproc component version number
*/
const char *mca_plm_bproc_component_version_string =
"Open MPI bproc plm MCA component version " ORTE_VERSION;
static int plm_bproc_open(void);
static int plm_bproc_close(void);
static int orte_plm_bproc_component_query(mca_base_module_t **module, int *priority);
/**
* The bproc component data structure used to store all the relevent data about
* this component.
*/
orte_pls_bproc_component_t mca_pls_bproc_component = {
orte_plm_bproc_component_t mca_plm_bproc_component = {
{
{
ORTE_PLS_BASE_VERSION_1_3_0,
"bproc", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_pls_bproc_component_open, /* component open */
orte_pls_bproc_component_close /* component close */
ORTE_PLM_BASE_VERSION_1_0_0,
"bproc", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
/* Component open and close functions */
plm_bproc_open,
plm_bproc_close,
orte_plm_bproc_component_query
},
{
false /* checkpoint / restart */
},
orte_pls_bproc_init /* component init */
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
/**
* Opens the pls_bproc component, setting all the needed mca parameters and
* Opens the plm_bproc component, setting all the needed mca parameters and
* finishes setting up the component struct.
*/
int orte_pls_bproc_component_open(void) {
int rc;
static int plm_bproc_component_open(void) {
mca_base_component_t *c = &mca_plm_bproc_component.super.base_version;
/* init parameters */
mca_base_component_t *c = &mca_pls_bproc_component.super.pls_version;
mca_base_param_reg_int(c, "priority", NULL, false, false, 100,
&mca_pls_bproc_component.priority);
mca_base_param_reg_int(c, "debug",
"If > 0 prints library debugging information",
false, false, 0, &mca_pls_bproc_component.debug);
mca_base_param_reg_int(c, "terminate_sig",
"Signal sent to processes to terminate them", false,
false, 9, &mca_pls_bproc_component.terminate_sig);
mca_base_param_reg_string(c, "orted", "Path to where orted is installed",
false, false, "orted", &mca_pls_bproc_component.orted);
mca_base_param_reg_int(c, "nolaunch", NULL, false, false, (int)false,
&rc);
if ((int)false == rc) {
mca_pls_bproc_component.do_not_launch = false;
} else {
mca_pls_bproc_component.do_not_launch = true;
}
mca_pls_bproc_component.recv_issued = false;
OBJ_CONSTRUCT(&mca_pls_bproc_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_pls_bproc_component.condition, opal_condition_t);
mca_base_param_reg_string(c, "orted", "Path to where orted is installed",
false, false, "orted", &mca_plm_bproc_component.orted);
return ORTE_SUCCESS;
}
/**
* Closes the pls_bproc component
* Closes the plm_bproc component
*/
int orte_pls_bproc_component_close(void) {
OBJ_DESTRUCT(&mca_pls_bproc_component.lock);
OBJ_DESTRUCT(&mca_pls_bproc_component.condition);
static int plm_bproc_component_close(void) {
return ORTE_SUCCESS;
}
@ -96,28 +88,26 @@ int orte_pls_bproc_component_close(void) {
* Initializes the module. We do not want to run unless we are the seed, bproc
* is running, and we are the master node.
*/
orte_pls_base_module_t* orte_pls_bproc_init(int *priority) {
static int orte_plm_bproc_component_query(mca_base_module_t **module, int *priority)
{
int ret;
struct bproc_version_t version;
/* are we the seed */
if(orte_process_info.seed == false)
return NULL;
/* okay, we are in an HNP - now check to see if BProc is running here */
if (!mca_pls_bproc_component.do_not_launch) {
ret = bproc_version(&version);
if (ret != 0) {
return NULL;
}
/* see if BProc is running here */
ret = bproc_version(&version);
if (ret != 0) {
*module = NULL;
return ORTE_ERR_NOT_AVAILABLE;
}
/* only launch from the master node */
if (bproc_currnode() != BPROC_NODE_MASTER) {
return NULL;
*module = NULL;
return ORTE_ERR_NOT_AVAILABLE;
}
*priority = mca_pls_bproc_component.priority;
return &orte_pls_bproc_module;
*priority = 20;
*module = (mca_base_module_t *) &orte_plm_bproc_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -35,9 +35,9 @@ endif
AM_CPPFLAGS= $(ras_bjs_CPPFLAGS)
proxy_SOURCES = \
ras_bjs.c \
ras_bjs.h \
ras_bjs_component.c
ras_bjs_component.c \
ras_bjs.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)

Просмотреть файл

@ -16,30 +16,34 @@
* $HEADER$
*/
#include "orte_config.h"
#include <errno.h>
#include "orte/constants.h"
#include "orte/types.h"
#include <unistd.h>
#include <string.h>
#include <sys/bproc.h>
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/argv.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_bjs.h"
/* API functions */
static int allocate(opal_list_t *nodes);
static int finalize(void);
orte_ras_base_module_t orte_ras_bjs_module = {
allocate,
finalize
};
/**
* Query the bproc node status
*/
static int orte_ras_bjs_node_state(int node)
static int bjs_node_state(int node)
{
#if defined BPROC_API_VERSION && BPROC_API_VERSION >= 4
char nodestatus[BPROC_STATE_LEN + 1];
@ -67,124 +71,44 @@ static int orte_ras_bjs_node_state(int node)
}
/**
* Parse the NODELIST to determine the number of process
* slots/processors available on the node.
*/
static size_t orte_ras_bjs_node_slots(char* node_name)
static int allocate(opal_list_t *nodes)
{
static char** nodelist = NULL;
char** ptr;
size_t count = 0;
if(nodelist == NULL)
nodelist = opal_argv_split(getenv("NODELIST"), ',');
ptr = nodelist;
while(ptr && *ptr) {
if(strcmp(*ptr, node_name) == 0)
count++;
ptr++;
}
return count;
}
/**
* Resolve the node name to node number.
*/
static int orte_ras_bjs_node_resolve(char* node_name, int* node_num)
{
/* for now we expect this to be the node number */
if(NULL == node_name || sscanf(node_name, "%d", node_num) != 1)
return ORTE_ERROR;
return ORTE_SUCCESS;
}
/**
* Discover the available resources.
* - validate any nodes specified via hostfile/commandline
* - check for additional nodes that have already been allocated
*/
static int orte_ras_bjs_discover(
opal_list_t* nodelist,
orte_app_context_t** context,
size_t num_context)
{
char* nodes;
char* nodelist;
char* ptr;
opal_list_item_t* item;
opal_list_t new_nodes;
orte_node_t *node;
int rc;
/* query the nodelist from the registry */
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query(nodelist))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* validate that any user supplied nodes actually exist, etc. */
item = opal_list_get_first(nodelist);
while(item != opal_list_get_end(nodelist)) {
opal_list_item_t* next = opal_list_get_next(item);
int node_num;
orte_ras_node_t* node = (orte_ras_node_t*)item;
if(ORTE_SUCCESS != orte_ras_bjs_node_resolve(node->node_name, &node_num)) {
opal_list_remove_item(nodelist,item);
OBJ_DESTRUCT(item);
item = next;
continue;
}
if(orte_ras_bjs_node_state(node_num) != ORTE_NODE_STATE_UP) {
opal_list_remove_item(nodelist,item);
OBJ_DESTRUCT(item);
item = next;
continue;
}
if(bproc_access(node_num, BPROC_X_OK) != 0) {
opal_list_remove_item(nodelist,item);
OBJ_DESTRUCT(item);
item = next;
continue;
}
/* try and determine the number of available slots */
if(node->node_slots == 0) {
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
}
item = next;
}
/* parse the node list and check node status/access */
nodes = getenv("NODES");
if (NULL == nodes) {
nodelist = getenv("NODES");
if (NULL == nodelist) {
return ORTE_ERR_NOT_AVAILABLE;
}
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
while(NULL != (ptr = strsep(&nodes,","))) {
orte_ras_node_t *node;
while(NULL != (ptr = strsep(&nodelist,","))) {
orte_node_state_t node_state;
int node_num;
/* is this node already in the list */
for(item = opal_list_get_first(nodelist);
item != opal_list_get_end(nodelist);
for(item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
node = (orte_ras_node_t*)item;
if(strcmp(node->node_name, ptr) == 0)
node = (orte_node_t*)item;
if(strcmp(node->name, ptr) == 0)
break;
}
if(item != opal_list_get_end(nodelist))
/* it if is in the list, then just increment the slot count */
if(item != opal_list_get_end(nodes)) {
node->slots++;
continue;
}
/* convert to an int node number */
if(sscanf(ptr, "%d", &node_num) != 1) {
continue;
}
if(ORTE_NODE_STATE_UP != (node_state = orte_ras_bjs_node_state(node_num))) {
if(ORTE_NODE_STATE_UP != (node_state = bjs_node_state(node_num))) {
opal_output(0, "error: a specified node (%d) is not up.\n", node_num);
rc = ORTE_ERROR;
goto cleanup;
@ -196,96 +120,21 @@ static int orte_ras_bjs_discover(
}
/* create a new node entry */
node = OBJ_NEW(orte_ras_node_t);
node->node_name = strdup(ptr);
node->node_state = node_state;
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
opal_list_append(&new_nodes, &node->super);
node = OBJ_NEW(orte_node_t);
node->name = strdup(ptr);
node->state = node_state;
node->slots = 1;
opal_list_append(nodes, &node->super);
}
/* add any newly discovered nodes to the registry */
if(opal_list_get_size(&new_nodes)) {
rc = orte_ras_base_node_insert(&new_nodes);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
/* append them to the nodelist */
while(NULL != (item = opal_list_remove_first(&new_nodes)))
opal_list_append(nodelist, item);
cleanup:
OBJ_DESTRUCT(&new_nodes);
return rc;
}
/**
* Discover available (pre-allocated) nodes. Allocate the
* requested number of nodes/process slots to the job.
*
*/
static int orte_ras_bjs_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
opal_list_t nodes;
opal_list_item_t* item;
int rc;
orte_app_context_t **context = NULL;
orte_std_cntr_t i, num_context = 0;
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_rmgr.get_app_context(jobid, &context, &num_context);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(ORTE_SUCCESS != (rc = orte_ras_bjs_discover(&nodes, context, num_context))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
rc = orte_ras_base_allocate_nodes(jobid, &nodes);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
for(i=0; i<num_context; i++) {
OBJ_RELEASE(context[i]);
}
if (NULL != context) {
free(context);
}
return rc;
}
static int orte_ras_bjs_deallocate(orte_jobid_t jobid)
static int finalize(void)
{
return ORTE_SUCCESS;
}
static int orte_ras_bjs_finalize(void)
{
return ORTE_SUCCESS;
}
orte_ras_base_module_t orte_ras_bjs_module = {
orte_ras_bjs_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_base_node_query_alloc,
orte_ras_base_node_lookup,
orte_ras_bjs_deallocate,
orte_ras_bjs_finalize
};

Просмотреть файл

@ -24,27 +24,12 @@
#define ORTE_RAS_BJS_H
#include "orte/mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
/**
* RAS Component
*/
struct orte_ras_bjs_component_t {
orte_ras_base_component_t super;
int debug;
int priority;
char *schedule_policy;
};
typedef struct orte_ras_bjs_component_t orte_ras_bjs_component_t;
ORTE_DECLSPEC extern orte_ras_bjs_component_t mca_ras_bjs_component;
ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_bjs_component;
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_bjs_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS
#endif

Просмотреть файл

@ -17,115 +17,65 @@
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "opal/util/output.h"
#include "ras_bjs.h"
/*
* Local functions
*/
static int orte_ras_bjs_open(void);
static int orte_ras_bjs_close(void);
static orte_ras_base_module_t* orte_ras_bjs_init(int* priority);
static int ras_bjs_open(void);
static int ras_bjs_component_query(mca_base_module_t **module, int *priority);
orte_ras_bjs_component_t mca_ras_bjs_component = {
orte_ras_base_component_t mca_ras_bjs_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a ras v1.3.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_3_0,
/* Indicate that we are a ras v2.0.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_2_0_0,
"bjs", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_bjs_open, /* component open */
orte_ras_bjs_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_bjs_init
/* Component open and close functions */
ras_bjs_open, /* component open */
NULL, /* component close */
ras_bjs_component_query
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
/**
* Convience functions to lookup MCA parameter values.
*/
static int orte_ras_bjs_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("ras","bjs",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
static char* orte_ras_bjs_param_register_string(
const char * a, const char *b, const char *c,
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string(a, b, c, NULL, default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
/**
* component open/close/init function
*/
static int orte_ras_bjs_open(void)
static int ras_bjs_open(void)
{
mca_ras_bjs_component.debug = orte_ras_bjs_param_register_int("debug",1);
mca_ras_bjs_component.priority = orte_ras_bjs_param_register_int("priority",75);
/* JMS To be changed post-beta to LAM's C/N command line notation */
mca_ras_bjs_component.schedule_policy =
orte_ras_bjs_param_register_string("ras", "base", "schedule_policy", "slot");
return ORTE_SUCCESS;
}
static orte_ras_base_module_t *orte_ras_bjs_init(int* priority)
static int ras_bjs_component_query(mca_base_module_t **module, int *priority)
{
/* if we are not an HNP, then we must not be selected */
if (!orte_process_info.seed) {
return NULL;
}
#if 0
if(getenv("NODES") == NULL) {
return NULL;
*module = NULL;
return ORTE_ERR_NOT_AVAILABLE;
}
#endif
*priority = mca_ras_bjs_component.priority;
return &orte_ras_bjs_module;
}
/**
* Close all subsystems.
*/
static int orte_ras_bjs_close(void)
{
*priority = 10;
*module = (mca_base_module_t *) &orte_ras_bjs_module;
return ORTE_SUCCESS;
}

Просмотреть файл

Просмотреть файл

@ -1 +0,0 @@
rhc

Просмотреть файл

@ -1,54 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ras_lsf_bproc_DSO
component_noinst =
component_install = mca_ras_lsf_bproc.la
else
component_noinst = libmca_ras_lsf_bproc.la
component_install =
endif
AM_CPPFLAGS= $(ras_lsf_bproc_CPPFLAGS)
proxy_SOURCES = \
ras_lsf_bproc.c \
ras_lsf_bproc.h \
ras_lsf_bproc_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_ras_lsf_bproc_la_SOURCES = $(proxy_SOURCES)
mca_ras_lsf_bproc_la_LIBADD = \
$(ras_lsf_bproc_LIBS) \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
mca_ras_lsf_bproc_la_LDFLAGS = -module -avoid-version $(ras_lsf_bproc_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_ras_lsf_bproc_la_SOURCES = $(proxy_SOURCES)
libmca_ras_lsf_bproc_la_LIBADD = $(ras_lsf_bproc_LIBS)
libmca_ras_lsf_bproc_la_LDFLAGS = -module -avoid-version $(ras_lsf_bproc_LDFLAGS)

Просмотреть файл

@ -1,38 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ras_lsf_bproc_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_ras_lsf_bproc_CONFIG],[
OMPI_CHECK_BPROC([ras_lsf_bproc], [ras_lsf_bproc_good=1],
[ras_lsf_bproc_good=1], [ras_lsf_bproc_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$ras_lsf_bproc_good" = "1"],
[ras_lsf_bproc_WRAPPER_EXTRA_LDFLAGS="$ras_lsf_bproc_LDFLAGS"
ras_lsf_bproc_WRAPPER_EXTRA_LIBS="$ras_lsf_bproc_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([ras_lsf_bproc_CPPFLAGS])
AC_SUBST([ras_lsf_bproc_LDFLAGS])
AC_SUBST([ras_lsf_bproc_LIBS])
])dnl

Просмотреть файл

@ -1,24 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -1,55 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include "orte/orte_constants.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_lsf_bproc.h"
static int orte_ras_lsf_bproc_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
return ORTE_SUCCESS;
}
static int orte_ras_lsf_bproc_deallocate(orte_jobid_t jobid)
{
return ORTE_SUCCESS;
}
static int orte_ras_lsf_bproc_finalize(void)
{
return ORTE_SUCCESS;
}
orte_ras_base_module_t orte_ras_lsf_bproc_module = {
orte_ras_lsf_bproc_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_base_node_query_alloc,
orte_ras_base_node_lookup,
orte_ras_lsf_bproc_deallocate,
orte_ras_lsf_bproc_finalize
};

Просмотреть файл

@ -1,49 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Allocation (LSF over BPROC)
*/
#ifndef ORTE_RAS_LSF_BPROC_H
#define ORTE_RAS_LSF_BPROC_H
#include "orte/mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* RAS Component
*/
struct orte_ras_lsf_bproc_component_t {
orte_ras_base_component_t super;
int debug;
int priority;
};
typedef struct orte_ras_lsf_bproc_component_t orte_ras_lsf_bproc_component_t;
ORTE_DECLSPEC extern orte_ras_lsf_bproc_component_t mca_ras_lsf_bproc_component;
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_lsf_bproc_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,111 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "opal/util/output.h"
#include "ras_lsf_bproc.h"
/*
* Local functions
*/
static int orte_ras_lsf_bproc_open(void);
static int orte_ras_lsf_bproc_close(void);
static orte_ras_base_module_t* orte_ras_lsf_bproc_init(int* priority);
orte_ras_lsf_bproc_component_t mca_ras_lsf_bproc_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a ras v1.3.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_3_0,
"lsf_bproc", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_lsf_bproc_open, /* component open */
orte_ras_lsf_bproc_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_lsf_bproc_init
}
};
/**
* Convience functions to lookup MCA parameters
*/
static int orte_ras_lsf_bproc_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("ras","lsf_bproc",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/**
* component open/close/init function
*/
static int orte_ras_lsf_bproc_open(void)
{
mca_ras_lsf_bproc_component.debug = orte_ras_lsf_bproc_param_register_int("debug",1);
mca_ras_lsf_bproc_component.priority = orte_ras_lsf_bproc_param_register_int("priority",-1);
return ORTE_SUCCESS;
}
static orte_ras_base_module_t *orte_ras_lsf_bproc_init(int* priority)
{
/* if we are not an HNP, then we must not be selected */
if (!orte_process_info.seed) {
return NULL;
}
*priority = mca_ras_lsf_bproc_component.priority;
return NULL;
}
/**
* Close all subsystems.
*/
static int orte_ras_lsf_bproc_close(void)
{
return ORTE_SUCCESS;
}