From 73fff4ea2c6078ae9256937b7f8800ab9d6806ac Mon Sep 17 00:00:00 2001 From: Thara Angskun Date: Thu, 13 Oct 2005 12:58:31 +0000 Subject: [PATCH] - change from mca_base_param_register_* to mca_base_param_reg_* - update include files / fix minor bugs This commit was SVN r7746. --- orte/mca/pls/poe/pls_poe.h | 1 + orte/mca/pls/poe/pls_poe_component.c | 85 ++++++++--------- orte/mca/pls/poe/pls_poe_module.c | 137 +++++++++++---------------- 3 files changed, 100 insertions(+), 123 deletions(-) diff --git a/orte/mca/pls/poe/pls_poe.h b/orte/mca/pls/poe/pls_poe.h index 3f9329c231..dd6111cd50 100644 --- a/orte/mca/pls/poe/pls_poe.h +++ b/orte/mca/pls/poe/pls_poe.h @@ -48,6 +48,7 @@ struct orte_pls_poe_component_t { int debug; char* orted; char* class; + char* resource_allocation; char* hostfile; char* cmdfile; char* mp_stdoutmode; diff --git a/orte/mca/pls/poe/pls_poe_component.c b/orte/mca/pls/poe/pls_poe_component.c index 7bd8061bc2..dbd34ff419 100644 --- a/orte/mca/pls/poe/pls_poe_component.c +++ b/orte/mca/pls/poe/pls_poe_component.c @@ -85,36 +85,6 @@ orte_pls_poe_component_t mca_pls_poe_component = { } }; -/** -orte_pls_poe_param_reg_int - register and lookup a integer parameter -@param param_name parameter name [INPUT] -@param default_value default value [INPUT] -@return parameter value -*/ -int orte_pls_poe_param_reg_int(char * param_name, int default_value) -{ - int id, param_value; - id = mca_base_param_register_int("pls","poe",param_name,NULL,default_value); - param_value = default_value; - mca_base_param_lookup_int(id,¶m_value); - return param_value; -} - -/** -orte_pls_poe_param_reg_string - register and lookup a string parameter -@param param_name parameter name [INPUT] -@param default_value default value [INPUT] -@return parameter value -*/ -char* orte_pls_poe_param_reg_string(char* param_name, char* default_value) -{ - char *param_value; - int id; - id = mca_base_param_register_string("pls","poe",param_name,NULL,default_value); - mca_base_param_lookup_string(id, ¶m_value); - return param_value; -} - /** orte_pls_poe_component_open - open component and register all parameters @return error number @@ -122,21 +92,48 @@ orte_pls_poe_component_open - open component and register all parameters int orte_pls_poe_component_open(void) { char *param; + mca_base_component_t *c = &mca_pls_poe_component.super.pls_version; - mca_pls_poe_component.mp_retry = orte_pls_poe_param_reg_int("mp_retry", 0); - mca_pls_poe_component.mp_retrycount = orte_pls_poe_param_reg_int("mp_retrycount", 0); - mca_pls_poe_component.mp_infolevel = orte_pls_poe_param_reg_int("mp_infolevel", 0); - mca_pls_poe_component.mp_labelio = orte_pls_poe_param_reg_string("mp_labelio","no"); - mca_pls_poe_component.mp_stdoutmode = orte_pls_poe_param_reg_string("mp_stdoutmode","unordered"); + mca_base_param_reg_int(c, "mp_retry", + "specifies the interval (in seconds) to wait before repeating the node request", + true, false, 0, &mca_pls_poe_component.mp_retry); + mca_base_param_reg_int(c, "mp_retrycount", + "specifies the number of times the Partition Manager should make the request before returning", + true, false, 0, &mca_pls_poe_component.mp_retrycount); + mca_base_param_reg_int(c, "mp_infolevel", + "specify the level of messages you want from POE (0-6)", + true, false, 0, &mca_pls_poe_component.mp_infolevel); + mca_base_param_reg_string(c, "mp_labelio", + "Whether or not to label message output with task identifiers (yes or no)", + true, false, "no", &mca_pls_poe_component.mp_labelio); + mca_base_param_reg_string(c, "mp_stdoutmode", + "standard output mode (ordered, unordered or taskID)", + true, false, "unordered", &mca_pls_poe_component.mp_stdoutmode); - mca_pls_poe_component.debug = orte_pls_poe_param_reg_int("debug",0); - mca_pls_poe_component.verbose = orte_pls_poe_param_reg_int("verbose",0); - mca_pls_poe_component.priority = orte_pls_poe_param_reg_int("priority", 100); - mca_pls_poe_component.orted = orte_pls_poe_param_reg_string("orted","orted"); - mca_pls_poe_component.class = orte_pls_poe_param_reg_string("class","interactive"); - mca_pls_poe_component.env = orte_pls_poe_param_reg_string("progenv","env"); - - param = orte_pls_poe_param_reg_string("progpoe","poe"); + mca_base_param_reg_int(c, "debug", + "Whether or not to enable debugging output for the poe pls component (0 or 1)", + false, false, 0, &mca_pls_poe_component.debug); + mca_base_param_reg_int(c, "verbose", + "Verbose level", + true, false, 0, &mca_pls_poe_component.verbose); + mca_base_param_reg_int(c, "priority", + "Priority of the poe pls component", + false , false, 100, &mca_pls_poe_component.priority); + mca_base_param_reg_string(c, "orted", + "The command name that the poe pls component will invoke for the ORTE daemon", + false, false, "orted", &mca_pls_poe_component.orted); + mca_base_param_reg_string(c, "class", + "class (interactive or batch)", + true, false, "interactive", &mca_pls_poe_component.class); + mca_base_param_reg_string(c, "resource_allocation", + "resource_allocation mode (hostfile or automatic)", + false, false, "hostfile", &mca_pls_poe_component.resource_allocation); + mca_base_param_reg_string(c, "progenv", + "The command name that setup environment", + false, false, "env", &mca_pls_poe_component.env); + mca_base_param_reg_string(c, "progpoe", + "The POE command", + false, false, "poe", ¶m); mca_pls_poe_component.argv = opal_argv_split(param, ' '); mca_pls_poe_component.argc = opal_argv_count(mca_pls_poe_component.argv); if (mca_pls_poe_component.argc > 0) { @@ -146,6 +143,8 @@ int orte_pls_poe_component_open(void) mca_pls_poe_component.path = NULL; return ORTE_ERR_BAD_PARAM; } + + return ORTE_SUCCESS; } diff --git a/orte/mca/pls/poe/pls_poe_module.c b/orte/mca/pls/poe/pls_poe_module.c index 040084f387..ebcb8c15d0 100644 --- a/orte/mca/pls/poe/pls_poe_module.c +++ b/orte/mca/pls/poe/pls_poe_module.c @@ -24,20 +24,27 @@ #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #include "include/orte_constants.h" -#include "mca/pls/pls.h" -#include "mca/gpr/gpr.h" -#include "mca/rmaps/base/base.h" -#include "mca/rmaps/base/rmaps_base_map.h" -#include "mca/base/mca_base_param.h" -#include "mca/ns/ns.h" -#include "mca/rml/rml.h" -#include "mca/errmgr/errmgr.h" -#include "mca/soh/soh.h" -#include "util/univ_info.h" +#include "opal/mca/base/mca_base_param.h" #include "opal/util/argv.h" #include "opal/util/opal_environ.h" -#include "pls_poe.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/pls/pls.h" +#include "orte/mca/pls/poe/pls_poe.h" +#include "orte/mca/ns/ns.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/rmaps/base/rmaps_base_map.h" +#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/sds/base/base.h" +#include "orte/mca/soh/soh.h" +#include "orte/util/univ_info.h" +#include "orte/util/session_dir.h" +#include "orte/runtime/orte_wait.h" extern char **environ; @@ -57,10 +64,10 @@ orte_pls_base_module_1_0_0_t orte_pls_poe_module = { }; /** -__poe_set_handler_default - set signal handler to default +poe_set_handler_default - set signal handler to default @param sig signal [IN] */ -static void __poe_set_handler_default(int sig) +static void poe_set_handler_default(int sig) { struct sigaction act; @@ -71,14 +78,14 @@ static void __poe_set_handler_default(int sig) } /** -__poe_argv_append_int - append integer variable to argument variable +poe_argv_append_int - append integer variable to argument variable @param argc argument count [OUT] @param argv argument variable [OUT] @param varname variable name [IN] @param min minimum value [IN] @param argname argument name [IN] */ -static inline int __poe_argv_append_int(int *argc, char ***argv, int varname, int min, char *argname) +static inline int poe_argv_append_int(int *argc, char ***argv, int varname, int min, char *argname) { char *tmp_string; if(varname >= min) { @@ -95,6 +102,9 @@ static inline int __poe_argv_append_int(int *argc, char ***argv, int varname, in /** @warning - THIS FUNCTION IS NOT USED. IT WILL BE USED WHEN FAULT-TOLERANCE FEATURE IS NEEDED */ + +#ifdef __FOR_LATER + int pls_poe_launch_interactive_orted(orte_jobid_t jobid) { opal_list_t nodes, mapping_list; @@ -121,7 +131,6 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid) * - need to know if we are launching on a subset of the allocated nodes * All other mapping responsibilities fall to orted in the fork PLS */ - if (mca_pls_poe_component.verbose > 10) opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__); if((mca_pls_poe_component.hostfile=tempnam(NULL,NULL))==NULL) return ORTE_ERR_OUT_OF_RESOURCE; if((mca_pls_poe_component.cmdfile=tempnam(NULL,NULL))==NULL) return ORTE_ERR_OUT_OF_RESOURCE; @@ -265,9 +274,9 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid) opal_argv_append(&argc, &argv, "6"); opal_argv_append(&argc, &argv, "-stdoutmode"); opal_argv_append(&argc, &argv, "ordered"); - rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry"); + rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry"); if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount"); + rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount"); if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } if (mca_pls_poe_component.verbose) { @@ -285,8 +294,7 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid) execv(mca_pls_poe_component.path, argv); opal_output(0, "orte_pls_poe: execv failed with errno=%d\n", errno); exit(-1); - } else { - } + } cleanup: while(NULL != (item = opal_list_remove_first(&nodes))) { @@ -303,29 +311,26 @@ cleanup: return rc; } +#endif + /** -__poe_wait_job - call back when POE finish +poe_wait_job - call back when POE finish @param pid pid @param status status @param cbdata call back data @return error number */ -int __poe_wait_job(pid_t pid, int status, void* cbdata) +static void poe_wait_job(pid_t pid, int status, void* cbdata) { opal_list_t map; opal_list_item_t* item; int rc; - if(mca_pls_poe_component.verbose > 10) { - opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__); - } - /* query allocation for the job */ OBJ_CONSTRUCT(&map, opal_list_t); rc = orte_rmaps_base_get_map(mca_pls_poe_component.jobid,&map); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); - goto cleanup; } for(item = opal_list_get_first(&map); @@ -344,15 +349,10 @@ int __poe_wait_job(pid_t pid, int status, void* cbdata) } } OBJ_DESTRUCT(&map); -cleanup: - if(mca_pls_poe_component.verbose>10) { - opal_output(0, "%s: --- END rc(%d) ---\n", __FUNCTION__, rc); - } - return rc; } /** -__poe_create_cmd_file - create POE command file +poe_create_cmd_file - create POE command file @param cfp command file pointer [IN] @param context context [IN] @param proc proc [IN] @@ -360,25 +360,19 @@ __poe_create_cmd_file - create POE command file @param vpid_range vpid range [IN] @return error number */ -static int __poe_create_cmd_file( +static int poe_create_cmd_file( FILE *cfp, orte_app_context_t* context, orte_rmaps_base_proc_t* proc, orte_vpid_t vpid_start, orte_vpid_t vpid_range) { - pid_t pid; - int rc; int i; char* param; char* uri; char **environ_copy; - if(mca_pls_poe_component.verbose > 10) { - opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__); - } - /* setup base environment */ environ_copy = NULL; param = mca_base_param_environ_variable("rmgr","bootproxy","jobid"); @@ -441,19 +435,15 @@ static int __poe_create_cmd_file( /* POE will upset if the file doesn't contain end of line. */ fprintf(cfp,"\n"); - if(mca_pls_poe_component.verbose>10) { - opal_output(0, "%s: --- END ---\n", __FUNCTION__); - } - return ORTE_SUCCESS; } /** -__poe_launch_interactive - launch an interactive job +poe_launch_interactive - launch an interactive job @param jobid JOB Identifier [IN] @return error number */ -static inline int __poe_launch_interactive(orte_jobid_t jobid) +static inline int poe_launch_interactive(orte_jobid_t jobid) { opal_list_t map, nodes, mapping_list; opal_list_item_t* item; @@ -462,15 +452,12 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid) FILE *hfp, *cfp; char** argv; int argc; - int rc, status, pid; + int rc, pid; sigset_t sigs; - if(mca_pls_poe_component.verbose > 10) { - opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__); - } - if( (NULL==(mca_pls_poe_component.cmdfile=tempnam(NULL,NULL))) || (NULL==(cfp=fopen(mca_pls_poe_component.cmdfile,"w"))) ) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } @@ -479,11 +466,11 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid) OBJ_CONSTRUCT(&nodes, opal_list_t); OBJ_CONSTRUCT(&mapping_list, opal_list_t); rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid); - if(ORTE_SUCCESS != rc) { goto cleanup; } + if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } num_nodes = opal_list_get_size(&nodes); - if(num_nodes > 0) { + if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) { /* Create a tempolary hostlist file if user specify */ @@ -501,7 +488,7 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid) } rc = orte_rmgr_base_get_job_slots(jobid, &num_procs); - if(ORTE_SUCCESS != rc) { return rc; } + if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } OBJ_CONSTRUCT(&map, opal_list_t); rc = orte_rmaps_base_get_map(jobid,&map); @@ -518,7 +505,7 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid) orte_rmaps_base_map_t* map2 = (orte_rmaps_base_map_t*)item; size_t i; for(i=0; inum_procs; i++) { - rc = __poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range); + rc = poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } } } @@ -529,12 +516,12 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid) argv = opal_argv_copy(mca_pls_poe_component.argv); argc = mca_pls_poe_component.argc; - if(num_nodes > 0) { + if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) { opal_argv_append(&argc, &argv, "-hostfile"); opal_argv_append(&argc, &argv, mca_pls_poe_component.hostfile); opal_argv_append(&argc, &argv, "-resd"); opal_argv_append(&argc, &argv, "no"); - rc=__poe_argv_append_int(&argc, &argv, num_nodes, 1, "-nodes"); + rc=poe_argv_append_int(&argc, &argv, num_nodes, 1, "-nodes"); if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } } @@ -547,17 +534,17 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid) opal_argv_append(&argc, &argv, "-stdoutmode"); opal_argv_append(&argc, &argv, mca_pls_poe_component.mp_stdoutmode); - rc=__poe_argv_append_int(&argc, &argv, num_procs, 1, "-procs"); + rc=poe_argv_append_int(&argc, &argv, num_procs, 1, "-procs"); if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry"); + rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retry, 0, "-retry"); if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount"); + rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_retrycount, 0, "-retrycount"); if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc=__poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_infolevel, 0, "-infolevel"); + rc=poe_argv_append_int(&argc, &argv, mca_pls_poe_component.mp_infolevel, 0, "-infolevel"); if(ORTE_SUCCESS!=rc) { ORTE_ERROR_LOG(rc); goto cleanup; } if(mca_pls_poe_component.verbose>10) { - opal_output(0, "%s:POE cmdline %s\n", __FUNCTION__, opal_argv_join(argv, ' ')); + opal_output(0, "POE cmdline %s\n", opal_argv_join(argv, ' ')); } /* Start job with POE */ @@ -568,20 +555,21 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid) return ORTE_ERR_OUT_OF_RESOURCE; } if(pid == 0) { - __poe_set_handler_default(SIGTERM); - __poe_set_handler_default(SIGINT); - __poe_set_handler_default(SIGHUP); - __poe_set_handler_default(SIGCHLD); - __poe_set_handler_default(SIGPIPE); + poe_set_handler_default(SIGTERM); + poe_set_handler_default(SIGINT); + poe_set_handler_default(SIGHUP); + poe_set_handler_default(SIGCHLD); + poe_set_handler_default(SIGPIPE); sigprocmask(0, 0, &sigs); sigprocmask(SIG_UNBLOCK, &sigs, 0); execv(mca_pls_poe_component.path, argv); opal_output(0, "orte_pls_poe: execv failed with errno=%d\n", errno); exit(-1); } else { - orte_wait_cb(pid, __poe_wait_job, NULL); + orte_wait_cb(pid, poe_wait_job, NULL); } + cleanup: while(NULL != (item = opal_list_remove_first(&map))) { OBJ_RELEASE(item); @@ -597,9 +585,6 @@ cleanup: } OBJ_DESTRUCT(&mapping_list); - if(mca_pls_poe_component.verbose>10) { - opal_output(0, "%s: --- END rc(%d) ---\n", __FUNCTION__, rc); - } return rc; } @@ -612,7 +597,7 @@ pls_poe_launch - launch a POE job static int pls_poe_launch(orte_jobid_t jobid) { if(!strncmp(mca_pls_poe_component.class,"interactive",11)) { - return __poe_launch_interactive(jobid); + return poe_launch_interactive(jobid); } return ORTE_ERR_NOT_IMPLEMENTED; } @@ -634,15 +619,7 @@ pls_poe_finalize - clean up tempolary files */ static int pls_poe_finalize(void) { - if (mca_pls_poe_component.verbose > 10) { - opal_output(0, "%s: --- BEGIN ---\n", __FUNCTION__); - } - unlink(mca_pls_poe_component.cmdfile); unlink(mca_pls_poe_component.hostfile); - - if (mca_pls_poe_component.verbose > 10) { - opal_output(0, "%s: --- END ---\n", __FUNCTION__); - } return ORTE_SUCCESS; }