1
1
This commit was SVN r18889.
Этот коммит содержится в:
Ralph Castain 2008-07-11 15:40:25 +00:00
родитель 7834201f69
Коммит 58964b2bf8
4 изменённых файлов: 57 добавлений и 81 удалений

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/util/opal_environ.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/show_help.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "opal/mca/base/mca_base_param.h"
@ -309,55 +310,38 @@ static int lsf_set_name(void)
{
int rc;
int id;
char* name_string = NULL;
int lsf_nodeid;
/* start by getting our jobid, and vpid (which is the
starting vpid for the list of daemons) */
id = mca_base_param_register_string("orte", "ess", "name", NULL, NULL);
mca_base_param_lookup_string(id, &name_string);
if (name_string != NULL) {
if (ORTE_SUCCESS !=
(rc = orte_util_convert_string_to_process_name(&ORTE_PROC_MY_NAME, name_string))) {
ORTE_ERROR_LOG(rc);
free(name_string);
return rc;
}
free(name_string);
} else {
orte_jobid_t jobid;
orte_vpid_t vpid;
char* jobid_string;
char* vpid_string;
orte_jobid_t jobid;
orte_vpid_t vpid;
char* jobid_string;
char* vpid_string;
id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL);
mca_base_param_lookup_string(id, &jobid_string);
if (NULL == jobid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS !=
(rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS !=
(rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
ORTE_PROC_MY_NAME->jobid;
ORTE_PROC_MY_NAME->vpid = vpid;
id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL);
mca_base_param_lookup_string(id, &jobid_string);
if (NULL == jobid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS !=
(rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS !=
(rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
ORTE_PROC_MY_NAME->jobid = jobid;
ORTE_PROC_MY_NAME->vpid = vpid;
/* fix up the base name and make it the "real" name */
lsf_nodeid = atoi(getenv("LSF_PM_TASKID"));

Просмотреть файл

@ -28,17 +28,17 @@
BEGIN_C_DECLS
struct orte_plm_lsf_component_t {
orte_plm_base_component_t super;
int priority;
bool timing;
char *orted;
};
typedef struct orte_plm_lsf_component_t orte_plm_lsf_component_t;
struct orte_plm_lsf_component_t {
orte_plm_base_component_t super;
int priority;
bool timing;
char *orted;
};
typedef struct orte_plm_lsf_component_t orte_plm_lsf_component_t;
/* Globally exported variables */
ORTE_DECLSPEC extern orte_plm_lsf_component_t mca_plm_lsf_component;
extern orte_plm_base_module_t orte_plm_lsf_module;
/* Globally exported variables */
ORTE_DECLSPEC extern orte_plm_lsf_component_t mca_plm_lsf_component;
extern orte_plm_base_module_t orte_plm_lsf_module;
END_C_DECLS

Просмотреть файл

@ -29,9 +29,9 @@
#include <lsf/lsbatch.h>
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "opal/util/argv.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
@ -95,7 +95,6 @@ orte_plm_lsf_component_t mca_plm_lsf_component = {
static int plm_lsf_open(void)
{
int tmp, value;
mca_base_component_t *comp = &mca_plm_lsf_component.super.base_version;
mca_base_param_reg_int(comp, "priority", "Default selection priority",
@ -106,15 +105,6 @@ static int plm_lsf_open(void)
false, false, "orted",
&mca_plm_lsf_component.orted);
tmp = mca_base_param_reg_int_name("orte", "timing",
"Request that critical timing loops be measured",
false, false, 0, &value);
if (value != 0) {
mca_plm_lsf_component.timing = true;
} else {
mca_plm_lsf_component.timing = false;
}
return ORTE_SUCCESS;
}
@ -131,7 +121,7 @@ static int orte_plm_lsf_component_query(mca_base_module_t **module, int *priorit
/* check if lsf is running here */
if (NULL == getenv("LSB_JOBID") || lsb_init("ORTE launcher") < 0) {
/* nope, not here */
opal_output_verbose(10, orte_plm_base.plm_output,
opal_output_verbose(10, orte_plm_globals.output,
"plm:lsf: NOT available for selection");
*module = NULL;
return ORTE_ERROR:

Просмотреть файл

@ -55,12 +55,12 @@
#include "opal/mca/installdirs/installdirs.h"
#include "opal/util/argv.h"
#include "orte/util/show_help.h"
#include "opal/util/opal_environ.h"
#include "opal/util/path.h"
#include "opal/util/basename.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "orte/runtime/runtime.h"
#include "orte/runtime/orte_wakeup.h"
#include "orte/runtime/orte_wait.h"
@ -88,7 +88,7 @@ static int plm_lsf_finalize(void);
/*
* Global variable
*/
orte_plm_base_module_1_3_0_t orte_plm_lsf_module = {
orte_plm_base_module_t orte_plm_lsf_module = {
plm_lsf_init,
orte_plm_base_set_hnp_name,
plm_lsf_launch_job,
@ -130,7 +130,6 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
int argc;
int rc;
char** env = NULL;
char* var;
char **nodelist_argv;
int nodelist_argc;
char *vpid_string;
@ -147,7 +146,7 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
/* default to declaring the daemons failed*/
failed_job = ORTE_PROC_MY_NAME->jobid;
if (mca_plm_lsf_component.timing) {
if (orte_timing) {
if (0 != gettimeofday(&joblaunchstart, NULL)) {
opal_output(0, "plm_lsf: could not obtain job start time");
}
@ -257,8 +256,8 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
don't support different --prefix'es for different nodes in
the SLURM plm) */
cur_prefix = NULL;
for (i=0; i < map->num_apps; i++) {
char * app_prefix_dir = map->apps[i]->prefix_dir;
for (i=0; i < jdata->num_apps; i++) {
char * app_prefix_dir = apps[i]->prefix_dir;
/* Check for already set cur_prefix_dir -- if different,
complain */
if (NULL != app_prefix_dir) {
@ -284,7 +283,7 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
/* setup environment */
env = opal_argv_copy(orte_launch_environ);
if (mca_plm_lsf_component.timing) {
if (orte_timing) {
if (0 != gettimeofday(&launchstart, NULL)) {
opal_output(0, "plm_lsf: could not obtain start time");
}
@ -306,7 +305,10 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
/* wait for daemons to callback */
if (ORTE_SUCCESS !=
(rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
ORTE_ERROR_LOG(rc);
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:lsf: daemon launch failed for job %s on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
goto cleanup;
}
@ -314,14 +316,17 @@ launch_apps:
/* daemons succeeded - any failure now would be from apps */
failed_job = active_job;
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
ORTE_ERROR_LOG(rc);
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:lsf: launch of apps failed for job %s on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
goto cleanup;
}
/* declare the launch a success */
failed_launch = false;
if (mca_plm_lsf_component.timing) {
if (orte_timing) {
if (0 != gettimeofday(&launchstop, NULL)) {
opal_output(0, "plm_lsf: could not obtain stop time");
} else {
@ -340,9 +345,6 @@ launch_apps:
}
cleanup:
if (NULL != map) {
OBJ_RELEASE(map);
}
if (NULL != argv) {
opal_argv_free(argv);
}