1
1

- change from mca_base_param_register_* to mca_base_param_reg_*

- update include files / fix minor bugs

This commit was SVN r7746.
Этот коммит содержится в:
Thara Angskun 2005-10-13 12:58:31 +00:00
родитель 1d2035cd85
Коммит 73fff4ea2c
3 изменённых файлов: 100 добавлений и 123 удалений

Просмотреть файл

@ -48,6 +48,7 @@ struct orte_pls_poe_component_t {
int debug;
char* orted;
char* class;
char* resource_allocation;
char* hostfile;
char* cmdfile;
char* mp_stdoutmode;

Просмотреть файл

@ -85,36 +85,6 @@ orte_pls_poe_component_t mca_pls_poe_component = {
}
};
/**
orte_pls_poe_param_reg_int - register and lookup a integer parameter
@param param_name parameter name [INPUT]
@param default_value default value [INPUT]
@return parameter value
*/
int orte_pls_poe_param_reg_int(char * param_name, int default_value)
{
int id, param_value;
id = mca_base_param_register_int("pls","poe",param_name,NULL,default_value);
param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/**
orte_pls_poe_param_reg_string - register and lookup a string parameter
@param param_name parameter name [INPUT]
@param default_value default value [INPUT]
@return parameter value
*/
char* orte_pls_poe_param_reg_string(char* param_name, char* default_value)
{
char *param_value;
int id;
id = mca_base_param_register_string("pls","poe",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
/**
orte_pls_poe_component_open - open component and register all parameters
@return error number
@ -122,21 +92,48 @@ orte_pls_poe_component_open - open component and register all parameters
int orte_pls_poe_component_open(void)
{
char *param;
mca_base_component_t *c = &mca_pls_poe_component.super.pls_version;
mca_pls_poe_component.mp_retry = orte_pls_poe_param_reg_int("mp_retry", 0);
mca_pls_poe_component.mp_retrycount = orte_pls_poe_param_reg_int("mp_retrycount", 0);
mca_pls_poe_component.mp_infolevel = orte_pls_poe_param_reg_int("mp_infolevel", 0);
mca_pls_poe_component.mp_labelio = orte_pls_poe_param_reg_string("mp_labelio","no");
mca_pls_poe_component.mp_stdoutmode = orte_pls_poe_param_reg_string("mp_stdoutmode","unordered");
mca_base_param_reg_int(c, "mp_retry",
"specifies the interval (in seconds) to wait before repeating the node request",
true, false, 0, &mca_pls_poe_component.mp_retry);
mca_base_param_reg_int(c, "mp_retrycount",
"specifies the number of times the Partition Manager should make the request before returning",
true, false, 0, &mca_pls_poe_component.mp_retrycount);
mca_base_param_reg_int(c, "mp_infolevel",
"specify the level of messages you want from POE (0-6)",
true, false, 0, &mca_pls_poe_component.mp_infolevel);
mca_base_param_reg_string(c, "mp_labelio",
"Whether or not to label message output with task identifiers (yes or no)",
true, false, "no", &mca_pls_poe_component.mp_labelio);
mca_base_param_reg_string(c, "mp_stdoutmode",
"standard output mode (ordered, unordered or taskID)",
true, false, "unordered", &mca_pls_poe_component.mp_stdoutmode);
mca_pls_poe_component.debug = orte_pls_poe_param_reg_int("debug",0);
mca_pls_poe_component.verbose = orte_pls_poe_param_reg_int("verbose",0);
mca_pls_poe_component.priority = orte_pls_poe_param_reg_int("priority", 100);
mca_pls_poe_component.orted = orte_pls_poe_param_reg_string("orted","orted");
mca_pls_poe_component.class = orte_pls_poe_param_reg_string("class","interactive");
mca_pls_poe_component.env = orte_pls_poe_param_reg_string("progenv","env");
param = orte_pls_poe_param_reg_string("progpoe","poe");
mca_base_param_reg_int(c, "debug",
"Whether or not to enable debugging output for the poe pls component (0 or 1)",
false, false, 0, &mca_pls_poe_component.debug);
mca_base_param_reg_int(c, "verbose",
"Verbose level",
true, false, 0, &mca_pls_poe_component.verbose);
mca_base_param_reg_int(c, "priority",
"Priority of the poe pls component",
false , false, 100, &mca_pls_poe_component.priority);
mca_base_param_reg_string(c, "orted",
"The command name that the poe pls component will invoke for the ORTE daemon",
false, false, "orted", &mca_pls_poe_component.orted);
mca_base_param_reg_string(c, "class",
"class (interactive or batch)",
true, false, "interactive", &mca_pls_poe_component.class);
mca_base_param_reg_string(c, "resource_allocation",
"resource_allocation mode (hostfile or automatic)",
false, false, "hostfile", &mca_pls_poe_component.resource_allocation);
mca_base_param_reg_string(c, "progenv",
"The command name that setup environment",
false, false, "env", &mca_pls_poe_component.env);
mca_base_param_reg_string(c, "progpoe",
"The POE command",
false, false, "poe", &param);
mca_pls_poe_component.argv = opal_argv_split(param, ' ');
mca_pls_poe_component.argc = opal_argv_count(mca_pls_poe_component.argv);
if (mca_pls_poe_component.argc > 0) {
@ -146,6 +143,8 @@ int orte_pls_poe_component_open(void)
mca_pls_poe_component.path = NULL;
return ORTE_ERR_BAD_PARAM;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -24,20 +24,27 @@
#include <fcntl.h>
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "include/orte_constants.h"
#include "mca/pls/pls.h"
#include "mca/gpr/gpr.h"
#include "mca/rmaps/base/base.h"
#include "mca/rmaps/base/rmaps_base_map.h"
#include "mca/base/mca_base_param.h"
#include "mca/ns/ns.h"
#include "mca/rml/rml.h"
#include "mca/errmgr/errmgr.h"
#include "mca/soh/soh.h"
#include "util/univ_info.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "pls_poe.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/poe/pls_poe.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/rmaps/base/base.h"
#include "orte/mca/rmaps/base/rmaps_base_map.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/soh/soh.h"
#include "orte/util/univ_info.h"
#include "orte/util/session_dir.h"
#include "orte/runtime/orte_wait.h"
extern char **environ;
@ -57,10 +64,10 @@ orte_pls_base_module_1_0_0_t orte_pls_poe_module = {
};
/**
__poe_set_handler_default - set signal handler to default
poe_set_handler_default - set signal handler to default
@param sig signal [IN]
*/
static void __poe_set_handler_default(int sig)
static void poe_set_handler_default(int sig)
{
struct sigaction act;
@ -71,14 +78,14 @@ static void __poe_set_handler_default(int sig)
}
/**
__poe_argv_append_int - append integer variable to argument variable
poe_argv_append_int - append integer variable to argument variable
@param argc argument count [OUT]
@param argv argument variable [OUT]
@param varname variable name [IN]
@param min minimum value [IN]
@param argname argument name [IN]
*/
static inline int __poe_argv_append_int(int *argc, char ***argv, int varname, int min, char *argname)
static inline int poe_argv_append_int(int *argc, char ***argv, int varname, int min, char *argname)
{
char *tmp_string;
if(varname >= min) {
@ -95,6 +102,9 @@ static inline int __poe_argv_append_int(int *argc, char ***argv, int varname, in
/**
@warning - THIS FUNCTION IS NOT USED. IT WILL BE USED WHEN FAULT-TOLERANCE FEATURE IS NEEDED
*/
#ifdef __FOR_LATER
int pls_poe_launch_interactive_orted(orte_jobid_t jobid)
{
opal_list_t nodes, mapping_list;
@ -121,7 +131,6 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid)
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
if (mca_pls_poe_component.verbose > 10) opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__);
if((mca_pls_poe_component.hostfile=tempnam(NULL,NULL))==NULL) return ORTE_ERR_OUT_OF_RESOURCE;
if((mca_pls_poe_component.cmdfile=tempnam(NULL,NULL))==NULL) return ORTE_ERR_OUT_OF_RESOURCE;
@ -265,9 +274,9 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid)
opal_argv_append(&argc, &argv, "6");
opal_argv_append(&argc, &argv, "-stdoutmode");
opal_argv_append(&argc, &argv, "ordered");
rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry");
rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry");
if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount");
rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount");
if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
if (mca_pls_poe_component.verbose) {
@ -285,7 +294,6 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid)
execv(mca_pls_poe_component.path, argv);
opal_output(0, "orte_pls_poe: execv failed with errno=%d\n", errno);
exit(-1);
} else {
}
cleanup:
@ -303,29 +311,26 @@ cleanup:
return rc;
}
#endif
/**
__poe_wait_job - call back when POE finish
poe_wait_job - call back when POE finish
@param pid pid
@param status status
@param cbdata call back data
@return error number
*/
int __poe_wait_job(pid_t pid, int status, void* cbdata)
static void poe_wait_job(pid_t pid, int status, void* cbdata)
{
opal_list_t map;
opal_list_item_t* item;
int rc;
if(mca_pls_poe_component.verbose > 10) {
opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__);
}
/* query allocation for the job */
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_map(mca_pls_poe_component.jobid,&map);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(item = opal_list_get_first(&map);
@ -344,15 +349,10 @@ int __poe_wait_job(pid_t pid, int status, void* cbdata)
}
}
OBJ_DESTRUCT(&map);
cleanup:
if(mca_pls_poe_component.verbose>10) {
opal_output(0, "%s: --- END rc(%d) ---\n", __FUNCTION__, rc);
}
return rc;
}
/**
__poe_create_cmd_file - create POE command file
poe_create_cmd_file - create POE command file
@param cfp command file pointer [IN]
@param context context [IN]
@param proc proc [IN]
@ -360,25 +360,19 @@ __poe_create_cmd_file - create POE command file
@param vpid_range vpid range [IN]
@return error number
*/
static int __poe_create_cmd_file(
static int poe_create_cmd_file(
FILE *cfp,
orte_app_context_t* context,
orte_rmaps_base_proc_t* proc,
orte_vpid_t vpid_start,
orte_vpid_t vpid_range)
{
pid_t pid;
int rc;
int i;
char* param;
char* uri;
char **environ_copy;
if(mca_pls_poe_component.verbose > 10) {
opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__);
}
/* setup base environment */
environ_copy = NULL;
param = mca_base_param_environ_variable("rmgr","bootproxy","jobid");
@ -441,19 +435,15 @@ static int __poe_create_cmd_file(
/* POE will upset if the file doesn't contain end of line. */
fprintf(cfp,"\n");
if(mca_pls_poe_component.verbose>10) {
opal_output(0, "%s: --- END ---\n", __FUNCTION__);
}
return ORTE_SUCCESS;
}
/**
__poe_launch_interactive - launch an interactive job
poe_launch_interactive - launch an interactive job
@param jobid JOB Identifier [IN]
@return error number
*/
static inline int __poe_launch_interactive(orte_jobid_t jobid)
static inline int poe_launch_interactive(orte_jobid_t jobid)
{
opal_list_t map, nodes, mapping_list;
opal_list_item_t* item;
@ -462,15 +452,12 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
FILE *hfp, *cfp;
char** argv;
int argc;
int rc, status, pid;
int rc, pid;
sigset_t sigs;
if(mca_pls_poe_component.verbose > 10) {
opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__);
}
if( (NULL==(mca_pls_poe_component.cmdfile=tempnam(NULL,NULL))) ||
(NULL==(cfp=fopen(mca_pls_poe_component.cmdfile,"w"))) ) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
@ -479,11 +466,11 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
OBJ_CONSTRUCT(&nodes, opal_list_t);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if(ORTE_SUCCESS != rc) { goto cleanup; }
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
num_nodes = opal_list_get_size(&nodes);
if(num_nodes > 0) {
if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) {
/* Create a tempolary hostlist file if user specify */
@ -501,7 +488,7 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
}
rc = orte_rmgr_base_get_job_slots(jobid, &num_procs);
if(ORTE_SUCCESS != rc) { return rc; }
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_map(jobid,&map);
@ -518,7 +505,7 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
orte_rmaps_base_map_t* map2 = (orte_rmaps_base_map_t*)item;
size_t i;
for(i=0; i<map2->num_procs; i++) {
rc = __poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range);
rc = poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range);
if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
}
}
@ -529,12 +516,12 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
argv = opal_argv_copy(mca_pls_poe_component.argv);
argc = mca_pls_poe_component.argc;
if(num_nodes > 0) {
if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) {
opal_argv_append(&argc, &argv, "-hostfile");
opal_argv_append(&argc, &argv, mca_pls_poe_component.hostfile);
opal_argv_append(&argc, &argv, "-resd");
opal_argv_append(&argc, &argv, "no");
rc=__poe_argv_append_int(&argc, &argv, num_nodes, 1, "-nodes");
rc=poe_argv_append_int(&argc, &argv, num_nodes, 1, "-nodes");
if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
}
@ -547,17 +534,17 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
opal_argv_append(&argc, &argv, "-stdoutmode");
opal_argv_append(&argc, &argv, mca_pls_poe_component.mp_stdoutmode);
rc=__poe_argv_append_int(&argc, &argv, num_procs, 1, "-procs");
rc=poe_argv_append_int(&argc, &argv, num_procs, 1, "-procs");
if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry");
rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry");
if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount");
rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount");
if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_infolevel, 0, "-infolevel");
rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_infolevel, 0, "-infolevel");
if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
if(mca_pls_poe_component.verbose>10) {
opal_output(0, "%s:POE cmdline %s\n", __FUNCTION__, opal_argv_join(argv, ' '));
opal_output(0, "POE cmdline %s\n", opal_argv_join(argv, ' '));
}
/* Start job with POE */
@ -568,20 +555,21 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if(pid == 0) {
__poe_set_handler_default(SIGTERM);
__poe_set_handler_default(SIGINT);
__poe_set_handler_default(SIGHUP);
__poe_set_handler_default(SIGCHLD);
__poe_set_handler_default(SIGPIPE);
poe_set_handler_default(SIGTERM);
poe_set_handler_default(SIGINT);
poe_set_handler_default(SIGHUP);
poe_set_handler_default(SIGCHLD);
poe_set_handler_default(SIGPIPE);
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
execv(mca_pls_poe_component.path, argv);
opal_output(0, "orte_pls_poe: execv failed with errno=%d\n", errno);
exit(-1);
} else {
orte_wait_cb(pid, __poe_wait_job, NULL);
orte_wait_cb(pid, poe_wait_job, NULL);
}
cleanup:
while(NULL != (item = opal_list_remove_first(&map))) {
OBJ_RELEASE(item);
@ -597,9 +585,6 @@ cleanup:
}
OBJ_DESTRUCT(&mapping_list);
if(mca_pls_poe_component.verbose>10) {
opal_output(0, "%s: --- END rc(%d) ---\n", __FUNCTION__, rc);
}
return rc;
}
@ -612,7 +597,7 @@ pls_poe_launch - launch a POE job
static int pls_poe_launch(orte_jobid_t jobid)
{
if(!strncmp(mca_pls_poe_component.class,"interactive",11)) {
return __poe_launch_interactive(jobid);
return poe_launch_interactive(jobid);
}
return ORTE_ERR_NOT_IMPLEMENTED;
}
@ -634,15 +619,7 @@ pls_poe_finalize - clean up tempolary files
*/
static int pls_poe_finalize(void)
{
if (mca_pls_poe_component.verbose > 10) {
opal_output(0, "%s: --- BEGIN ---\n", __FUNCTION__);
}
unlink(mca_pls_poe_component.cmdfile);
unlink(mca_pls_poe_component.hostfile);
if (mca_pls_poe_component.verbose > 10) {
opal_output(0, "%s: --- END ---\n", __FUNCTION__);
}
return ORTE_SUCCESS;
}