1
1

Cleanup compile issues - missing updates to some plm components and the slurm ras component

This commit was SVN r31921.
Этот коммит содержится в:
Ralph Castain 2014-06-01 17:59:06 +00:00
родитель 4b0c3dcd29
Коммит 65a35d92ef
5 изменённых файлов: 46 добавлений и 28 удалений

Просмотреть файл

@ -169,7 +169,7 @@ static int plm_alps_launch_job(orte_job_t *jdata)
opal_argv_append (&env_count, &app->env, "PMI_NO_PREINITIALIZE=1");
}
if (ORTE_JOB_CONTROL_RESTART & jdata->controls) {
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
@ -206,7 +206,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* if we are launching debugger daemons, then just go
* do it - no new daemons will be launched
*/
if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) {
if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
@ -304,7 +304,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* if the daemon already exists on this node, then
* don't include it
*/
if (node->daemon_launched) {
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) {
continue;
}
@ -378,7 +378,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, i))) {
continue;
}
app_prefix_dir = app->prefix_dir;
orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&app_prefix_dir, OPAL_STRING);
/* Check for already set cur_prefix_dir -- if different,
complain */
if (NULL != app_prefix_dir) {
@ -398,6 +398,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
cur_prefix);
}
}
free(app_prefix_dir);
}
}

Просмотреть файл

@ -141,7 +141,7 @@ int plm_lsf_init(void)
*/
static int plm_lsf_launch_job(orte_job_t *jdata)
{
if (ORTE_JOB_CONTROL_RESTART & jdata->controls) {
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
@ -235,7 +235,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* if the daemon already exists on this node, then
* don't include it
*/
if (node->daemon_launched) {
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) {
continue;
}
@ -299,7 +299,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
app_prefix_dir = app->prefix_dir;
orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&app_prefix_dir, OPAL_STRING);
/* Check for already set cur_prefix_dir -- if different,
complain */
if (NULL != app_prefix_dir) {
@ -319,6 +319,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
"%s plm:lsf: Set prefix:%s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_prefix));
}
free(app_prefix_dir);
}
}

Просмотреть файл

@ -157,7 +157,7 @@ static int plm_slurm_init(void)
*/
static int plm_slurm_launch_job(orte_job_t *jdata)
{
if (ORTE_JOB_CONTROL_RESTART & jdata->controls) {
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
@ -198,7 +198,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* if we are launching debugger daemons, then just go
* do it - no new daemons will be launched
*/
if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) {
if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
@ -298,7 +298,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* if the daemon already exists on this node, then
* don't include it
*/
if (node->daemon_launched) {
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) {
continue;
}
@ -375,7 +375,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, n))) {
continue;
}
app_prefix_dir = app->prefix_dir;
orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&app_prefix_dir, OPAL_STRING);
/* Check for already set cur_prefix_dir -- if different,
complain */
if (NULL != app_prefix_dir) {
@ -396,6 +396,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
cur_prefix));
}
free(app_prefix_dir);
}
}

Просмотреть файл

@ -150,7 +150,7 @@ static int plm_tm_init(void)
static int plm_tm_launch_job(orte_job_t *jdata)
{
if (ORTE_JOB_CONTROL_RESTART & jdata->controls) {
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
@ -188,13 +188,14 @@ static void launch_daemons(int fd, short args, void *cbdata)
orte_job_t *daemons, *jdata;
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
int32_t launchid, *ldptr;
char *prefix_dir = NULL;
jdata = state->jdata;
/* if we are launching debugger daemons, then just go
* do it - no new daemons will be launched
*/
if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & jdata->controls) {
if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
@ -270,7 +271,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
}
/* if this daemon already exists, don't launch it! */
if (node->daemon_launched) {
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) {
continue;
}
@ -330,14 +331,15 @@ static void launch_daemons(int fd, short args, void *cbdata)
there
*/
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
if (NULL != app->prefix_dir) {
orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&prefix_dir, OPAL_STRING);
if (NULL != prefix_dir) {
char *newenv;
for (i = 0; NULL != env && NULL != env[i]; ++i) {
/* Reset PATH */
if (0 == strncmp("PATH=", env[i], 5)) {
asprintf(&newenv, "%s/%s:%s",
app->prefix_dir, bin_base, env[i] + 5);
prefix_dir, bin_base, env[i] + 5);
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:tm: resetting PATH: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -349,7 +351,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* Reset LD_LIBRARY_PATH */
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
asprintf(&newenv, "%s/%s:%s",
app->prefix_dir, lib_base, env[i] + 16);
prefix_dir, lib_base, env[i] + 16);
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:tm: resetting LD_LIBRARY_PATH: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -358,6 +360,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
free(newenv);
}
}
free(prefix_dir);
}
/* Iterate through each of the nodes and spin

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -739,6 +739,7 @@ static void recv_data(int fd, short args, void *cbdata)
orte_app_context_t *app;
orte_jobid_t jobid;
orte_job_t *jdata;
char **dash_host = NULL;
opal_output_verbose(2, orte_ras_base_framework.framework_output,
"%s ras:slurm: dynamic allocation - data recvd",
@ -802,6 +803,8 @@ static void recv_data(int fd, short args, void *cbdata)
idx = -1;
sjob = -1;
nodelist = NULL;
/* release the current dash_host as that contained the *desired* allocation */
orte_remove_attribute(&app->attributes, ORTE_APP_DASH_HOST);
for (i=1; NULL != alloc[i]; i++) {
if (ORTE_SUCCESS != parse_alloc_msg(alloc[i], &idx, &sjob, &nodelist, &tpn)) {
orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, jtrk->cmd);
@ -820,9 +823,6 @@ static void recv_data(int fd, short args, void *cbdata)
opal_pointer_array_set_item(&jtrk->apps, idx, aptrk);
}
aptrk->sjob = sjob;
/* release the current dash_host as that contained the *desired* allocation */
opal_argv_free(app->dash_host);
app->dash_host = NULL;
/* since the nodelist/tpn may contain regular expressions, parse them */
if (ORTE_SUCCESS != (rc = orte_ras_slurm_discover(nodelist, tpn, &ndtmp))) {
ORTE_ERROR_LOG(rc);
@ -835,7 +835,7 @@ static void recv_data(int fd, short args, void *cbdata)
*/
while (NULL != (item = opal_list_remove_first(&ndtmp))) {
nd = (orte_node_t*)item;
opal_argv_append_nosize(&app->dash_host, nd->name);
opal_argv_append_nosize(&dash_host, nd->name);
/* check for duplicates */
found = false;
for (itm = opal_list_get_first(&nds);
@ -861,6 +861,12 @@ static void recv_data(int fd, short args, void *cbdata)
/* cleanup */
opal_argv_free(alloc);
OBJ_DESTRUCT(&ndtmp);
if (NULL != dash_host) {
tpn = opal_argv_join(dash_host, ',');
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, (void*)tpn, OPAL_STRING);
opal_argv_free(dash_host);
free(tpn);
}
if (opal_list_is_empty(&nds)) {
/* if we get here, then we were able to contact slurm,
@ -908,6 +914,7 @@ static int dyn_allocate(orte_job_t *jdata)
int i;
struct timeval tv;
local_jobtracker_t *jtrk;
int64_t i64, *i64ptr;
if (NULL == mca_ras_slurm_component.config_file) {
opal_output(0, "Cannot perform dynamic allocation as no Slurm configuration file provided");
@ -956,6 +963,7 @@ static int dyn_allocate(orte_job_t *jdata)
free(tmp);
/* for each app, add its allocation request info */
i64ptr = &i64;
for (i=0; i < jdata->apps->size; i++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
@ -969,8 +977,8 @@ static int dyn_allocate(orte_job_t *jdata)
opal_argv_append_nosize(&cmd, tmp);
free(tmp);
/* if we were given a minimum number of nodes, pass it along */
if (0 < app->min_number_of_nodes) {
asprintf(&tmp, "N=%ld", (long int)app->min_number_of_nodes);
if (orte_get_attribute(&app->attributes, ORTE_APP_MIN_NODES, (void**)&i64ptr, OPAL_INT64)) {
asprintf(&tmp, "N=%ld", (long int)i64);
opal_argv_append_nosize(&cmd, tmp);
free(tmp);
}
@ -985,7 +993,7 @@ static int dyn_allocate(orte_job_t *jdata)
free(tmp);
}
/* add the mandatory/optional flag */
if (app->mandatory) {
if (orte_get_attribute(&app->attributes, ORTE_APP_MANDATORY, NULL, OPAL_BOOL)) {
opal_argv_append_nosize(&cmd, "flag=mandatory");
} else {
opal_argv_append_nosize(&cmd, "flag=optional");
@ -1071,13 +1079,17 @@ static char* get_node_list(orte_app_context_t *app)
int j;
char **total_host = NULL;
char *nodes;
char **dash_host, *dh;
if (NULL == app->dash_host) {
if (!orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&dh, OPAL_STRING)) {
return NULL;
}
for (j=0; NULL != app->dash_host[j]; j++) {
opal_argv_append_unique_nosize(&total_host, app->dash_host[j], false);
dash_host = opal_argv_split(dh, ',');
free(dh);
for (j=0; NULL != dash_host[j]; j++) {
opal_argv_append_unique_nosize(&total_host, dash_host[j], false);
}
opal_argv_free(dash_host);
if (NULL == total_host) {
return NULL;
}