From 65a35d92effe72b24b3e4e3914dc941d4abd5a91 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 1 Jun 2014 17:59:06 +0000 Subject: [PATCH] Cleanup compile issues - missing updates to some plm components and the slurm ras component This commit was SVN r31921. --- orte/mca/plm/alps/plm_alps_module.c | 9 +++---- orte/mca/plm/lsf/plm_lsf_module.c | 7 +++--- orte/mca/plm/slurm/plm_slurm_module.c | 9 +++---- orte/mca/plm/tm/plm_tm_module.c | 15 +++++++----- orte/mca/ras/slurm/ras_slurm_module.c | 34 ++++++++++++++++++--------- 5 files changed, 46 insertions(+), 28 deletions(-) diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index edc4e0cd78..03403bb632 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -169,7 +169,7 @@ static int plm_alps_launch_job(orte_job_t *jdata) opal_argv_append (&env_count, &app->env, "PMI_NO_PREINITIALIZE=1"); } - if (ORTE_JOB_CONTROL_RESTART & jdata->controls) { + if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { /* this is a restart situation - skip to the mapping stage */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); } else { @@ -206,7 +206,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ - if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) { + if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); @@ -304,7 +304,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* if the daemon already exists on this node, then * don't include it */ - if (node->daemon_launched) { + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { continue; } @@ -378,7 +378,7 @@ static void launch_daemons(int fd, short args, void *cbdata) if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, i))) { continue; } - app_prefix_dir = app->prefix_dir; + orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&app_prefix_dir, OPAL_STRING); /* Check for already set cur_prefix_dir -- if different, complain */ if (NULL != app_prefix_dir) { @@ -398,6 +398,7 @@ static void launch_daemons(int fd, short args, void *cbdata) cur_prefix); } } + free(app_prefix_dir); } } diff --git a/orte/mca/plm/lsf/plm_lsf_module.c b/orte/mca/plm/lsf/plm_lsf_module.c index f50dbed055..9ac1e1dfb7 100644 --- a/orte/mca/plm/lsf/plm_lsf_module.c +++ b/orte/mca/plm/lsf/plm_lsf_module.c @@ -141,7 +141,7 @@ int plm_lsf_init(void) */ static int plm_lsf_launch_job(orte_job_t *jdata) { - if (ORTE_JOB_CONTROL_RESTART & jdata->controls) { + if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { /* this is a restart situation - skip to the mapping stage */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); } else { @@ -235,7 +235,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* if the daemon already exists on this node, then * don't include it */ - if (node->daemon_launched) { + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { continue; } @@ -299,7 +299,7 @@ static void launch_daemons(int fd, short args, void *cbdata) if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } - app_prefix_dir = app->prefix_dir; + orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&app_prefix_dir, OPAL_STRING); /* Check for already set cur_prefix_dir -- if different, complain */ if (NULL != app_prefix_dir) { @@ -319,6 +319,7 @@ static void launch_daemons(int fd, short args, void *cbdata) "%s plm:lsf: Set prefix:%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_prefix)); } + free(app_prefix_dir); } } diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index c9290a55cf..49086a04a7 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -157,7 +157,7 @@ static int plm_slurm_init(void) */ static int plm_slurm_launch_job(orte_job_t *jdata) { - if (ORTE_JOB_CONTROL_RESTART & jdata->controls) { + if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { /* this is a restart situation - skip to the mapping stage */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); } else { @@ -198,7 +198,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ - if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) { + if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); @@ -298,7 +298,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* if the daemon already exists on this node, then * don't include it */ - if (node->daemon_launched) { + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { continue; } @@ -375,7 +375,7 @@ static void launch_daemons(int fd, short args, void *cbdata) if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, n))) { continue; } - app_prefix_dir = app->prefix_dir; + orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&app_prefix_dir, OPAL_STRING); /* Check for already set cur_prefix_dir -- if different, complain */ if (NULL != app_prefix_dir) { @@ -396,6 +396,7 @@ static void launch_daemons(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_prefix)); } + free(app_prefix_dir); } } diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index 288c1e695b..3372151bb3 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -150,7 +150,7 @@ static int plm_tm_init(void) static int plm_tm_launch_job(orte_job_t *jdata) { - if (ORTE_JOB_CONTROL_RESTART & jdata->controls) { + if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { /* this is a restart situation - skip to the mapping stage */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); } else { @@ -188,13 +188,14 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_job_t *daemons, *jdata; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; int32_t launchid, *ldptr; + char *prefix_dir = NULL; jdata = state->jdata; /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ - if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & jdata->controls) { + if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); OBJ_RELEASE(state); @@ -270,7 +271,7 @@ static void launch_daemons(int fd, short args, void *cbdata) } /* if this daemon already exists, don't launch it! */ - if (node->daemon_launched) { + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { continue; } @@ -330,14 +331,15 @@ static void launch_daemons(int fd, short args, void *cbdata) there */ app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0); - if (NULL != app->prefix_dir) { + orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&prefix_dir, OPAL_STRING); + if (NULL != prefix_dir) { char *newenv; for (i = 0; NULL != env && NULL != env[i]; ++i) { /* Reset PATH */ if (0 == strncmp("PATH=", env[i], 5)) { asprintf(&newenv, "%s/%s:%s", - app->prefix_dir, bin_base, env[i] + 5); + prefix_dir, bin_base, env[i] + 5); OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:tm: resetting PATH: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -349,7 +351,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* Reset LD_LIBRARY_PATH */ else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) { asprintf(&newenv, "%s/%s:%s", - app->prefix_dir, lib_base, env[i] + 16); + prefix_dir, lib_base, env[i] + 16); OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:tm: resetting LD_LIBRARY_PATH: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -358,6 +360,7 @@ static void launch_daemons(int fd, short args, void *cbdata) free(newenv); } } + free(prefix_dir); } /* Iterate through each of the nodes and spin diff --git a/orte/mca/ras/slurm/ras_slurm_module.c b/orte/mca/ras/slurm/ras_slurm_module.c index 6bec9a3471..43343066fa 100644 --- a/orte/mca/ras/slurm/ras_slurm_module.c +++ b/orte/mca/ras/slurm/ras_slurm_module.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -739,6 +739,7 @@ static void recv_data(int fd, short args, void *cbdata) orte_app_context_t *app; orte_jobid_t jobid; orte_job_t *jdata; + char **dash_host = NULL; opal_output_verbose(2, orte_ras_base_framework.framework_output, "%s ras:slurm: dynamic allocation - data recvd", @@ -802,6 +803,8 @@ static void recv_data(int fd, short args, void *cbdata) idx = -1; sjob = -1; nodelist = NULL; + /* release the current dash_host as that contained the *desired* allocation */ + orte_remove_attribute(&app->attributes, ORTE_APP_DASH_HOST); for (i=1; NULL != alloc[i]; i++) { if (ORTE_SUCCESS != parse_alloc_msg(alloc[i], &idx, &sjob, &nodelist, &tpn)) { orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, jtrk->cmd); @@ -820,9 +823,6 @@ static void recv_data(int fd, short args, void *cbdata) opal_pointer_array_set_item(&jtrk->apps, idx, aptrk); } aptrk->sjob = sjob; - /* release the current dash_host as that contained the *desired* allocation */ - opal_argv_free(app->dash_host); - app->dash_host = NULL; /* since the nodelist/tpn may contain regular expressions, parse them */ if (ORTE_SUCCESS != (rc = orte_ras_slurm_discover(nodelist, tpn, &ndtmp))) { ORTE_ERROR_LOG(rc); @@ -835,7 +835,7 @@ static void recv_data(int fd, short args, void *cbdata) */ while (NULL != (item = opal_list_remove_first(&ndtmp))) { nd = (orte_node_t*)item; - opal_argv_append_nosize(&app->dash_host, nd->name); + opal_argv_append_nosize(&dash_host, nd->name); /* check for duplicates */ found = false; for (itm = opal_list_get_first(&nds); @@ -861,6 +861,12 @@ static void recv_data(int fd, short args, void *cbdata) /* cleanup */ opal_argv_free(alloc); OBJ_DESTRUCT(&ndtmp); + if (NULL != dash_host) { + tpn = opal_argv_join(dash_host, ','); + orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, (void*)tpn, OPAL_STRING); + opal_argv_free(dash_host); + free(tpn); + } if (opal_list_is_empty(&nds)) { /* if we get here, then we were able to contact slurm, @@ -908,6 +914,7 @@ static int dyn_allocate(orte_job_t *jdata) int i; struct timeval tv; local_jobtracker_t *jtrk; + int64_t i64, *i64ptr; if (NULL == mca_ras_slurm_component.config_file) { opal_output(0, "Cannot perform dynamic allocation as no Slurm configuration file provided"); @@ -956,6 +963,7 @@ static int dyn_allocate(orte_job_t *jdata) free(tmp); /* for each app, add its allocation request info */ + i64ptr = &i64; for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; @@ -969,8 +977,8 @@ static int dyn_allocate(orte_job_t *jdata) opal_argv_append_nosize(&cmd, tmp); free(tmp); /* if we were given a minimum number of nodes, pass it along */ - if (0 < app->min_number_of_nodes) { - asprintf(&tmp, "N=%ld", (long int)app->min_number_of_nodes); + if (orte_get_attribute(&app->attributes, ORTE_APP_MIN_NODES, (void**)&i64ptr, OPAL_INT64)) { + asprintf(&tmp, "N=%ld", (long int)i64); opal_argv_append_nosize(&cmd, tmp); free(tmp); } @@ -985,7 +993,7 @@ static int dyn_allocate(orte_job_t *jdata) free(tmp); } /* add the mandatory/optional flag */ - if (app->mandatory) { + if (orte_get_attribute(&app->attributes, ORTE_APP_MANDATORY, NULL, OPAL_BOOL)) { opal_argv_append_nosize(&cmd, "flag=mandatory"); } else { opal_argv_append_nosize(&cmd, "flag=optional"); @@ -1071,13 +1079,17 @@ static char* get_node_list(orte_app_context_t *app) int j; char **total_host = NULL; char *nodes; + char **dash_host, *dh; - if (NULL == app->dash_host) { + if (!orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&dh, OPAL_STRING)) { return NULL; } - for (j=0; NULL != app->dash_host[j]; j++) { - opal_argv_append_unique_nosize(&total_host, app->dash_host[j], false); + dash_host = opal_argv_split(dh, ','); + free(dh); + for (j=0; NULL != dash_host[j]; j++) { + opal_argv_append_unique_nosize(&total_host, dash_host[j], false); } + opal_argv_free(dash_host); if (NULL == total_host) { return NULL; }