1
1

use the orte attributes in the FT code to fix compile errors

This commit was SVN r32093.
Этот коммит содержится в:
Adrian Reber 2014-06-26 03:19:17 +00:00
родитель 10c1a50705
Коммит cabf1d4e68
7 изменённых файлов: 80 добавлений и 67 удалений

Просмотреть файл

@ -431,18 +431,26 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
opal_list_append(&coll->participants, &nm->super);
#if OPAL_ENABLE_FT_CR == 1
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_SNAPC_INIT_BAR, (void**)gidptr, )) {
coll = orte_grpcomm_base_setup_collective(jdata->snapc_init_barrier);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
{
orte_grpcomm_coll_id_t gid, *gidptr;
gidptr = &gid;
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_SNAPC_INIT_BAR,
(void**)&gidptr, ORTE_GRPCOMM_COLL_ID_T)) {
coll = orte_grpcomm_base_setup_collective(*gidptr);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
}
coll = orte_grpcomm_base_setup_collective(jdata->snapc_fini_barrier);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_SNAPC_FINI_BAR,
(void**)&gidptr, ORTE_GRPCOMM_COLL_ID_T)) {
coll = orte_grpcomm_base_setup_collective(*gidptr);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
}
}
#endif

Просмотреть файл

@ -290,7 +290,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata)
#if OPAL_ENABLE_FT_CR == 1
{
orte_grpcomm_collective_id_t id;
orte_grpcomm_coll_id_t id;
id = orte_grpcomm_base_get_coll_id();
orte_set_attribute(&caddy->jdata->attributes, ORTE_JOB_SNAPC_INIT_BAR, ORTE_ATTR_GLOBAL, &id, ORTE_GRPCOMM_COLL_ID_T);
(void) mca_base_var_env_name("orte_snapc_init_barrier_id", &barcr1_par);

Просмотреть файл

@ -257,7 +257,7 @@ int global_coord_setup_job(orte_jobid_t jobid) {
return ORTE_ERR_NOT_FOUND;
}
if( ORTE_JOB_CONTROL_RESTART == jdata->controls ) {
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
"Global) Restarting Job %s...",
ORTE_JOBID_PRINT(jobid)));

Просмотреть файл

@ -696,26 +696,27 @@ int orte_sstore_stage_local_fetch_app_deps(orte_app_context_t *app)
orte_proc_t *child = NULL;
int loc_argc = 0;
bool skip_xfer = false;
char *sload = NULL;
if( !app->used_on_node || NULL == app->sstore_load ) {
orte_get_attribute(&app->attributes, ORTE_APP_SSTORE_LOAD, (void **)&sload, OPAL_STRING);
if(!ORTE_FLAG_TEST(app, ORTE_APP_FLAG_USED_ON_NODE) || NULL == sload) {
OPAL_OUTPUT_VERBOSE((30, mca_sstore_stage_component.super.output_handle,
"sstore:stage:(local): fetch_app_deps(%3d): Not for this daemon (%s, %d, %s)",
app->idx,
(app->used_on_node ? "T" : "F"),
(int)app->num_procs,
app->sstore_load));
app->idx, (ORTE_FLAG_TEST(app, ORTE_APP_FLAG_USED_ON_NODE) ? "T" : "F"),
(int)app->num_procs, sload));
/* Nothing to do */
goto cleanup;
}
OPAL_OUTPUT_VERBOSE((10, mca_sstore_stage_component.super.output_handle,
"sstore:stage:(local): fetch_app_deps(%3d): %s",
app->idx, app->sstore_load));
app->idx, sload));
/*
* Extract the 'ref:seq' parameter
*/
sstore_args = opal_argv_split(app->sstore_load, ':');
sstore_args = opal_argv_split(sload, ':');
req_snap_loc = strdup(sstore_args[0]);
req_snap_global_ref = strdup(sstore_args[1]);
req_snap_ref = strdup(sstore_args[2]);

Просмотреть файл

@ -607,19 +607,26 @@ int orte_daemon(int argc, char *argv[])
opal_list_append(&coll->participants, &nm->super);
#if OPAL_ENABLE_FT_CR == 1
jdata->snapc_init_barrier = orte_grpcomm_base_get_coll_id();
coll = orte_grpcomm_base_setup_collective(jdata->snapc_init_barrier);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
{
orte_grpcomm_coll_id_t id;
id = orte_grpcomm_base_get_coll_id();
coll = orte_grpcomm_base_setup_collective(id);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
orte_set_attribute(&jdata->attributes, ORTE_JOB_SNAPC_INIT_BAR,
ORTE_ATTR_GLOBAL, &id, ORTE_GRPCOMM_COLL_ID_T);
jdata->snapc_fini_barrier = orte_grpcomm_base_get_coll_id();
coll = orte_grpcomm_base_setup_collective(jdata->snapc_fini_barrier);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
id = orte_grpcomm_base_get_coll_id();
coll = orte_grpcomm_base_setup_collective(id);
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = jdata->jobid;
nm->name.vpid = ORTE_VPID_WILDCARD;
opal_list_append(&coll->participants, &nm->super);
orte_set_attribute(&jdata->attributes, ORTE_JOB_SNAPC_FINI_BAR,
ORTE_ATTR_GLOBAL, &id, ORTE_GRPCOMM_COLL_ID_T);
}
#endif
/* create a string that contains our uri + sysinfo */

Просмотреть файл

@ -538,6 +538,9 @@ static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) {
orte_jobid_t mask=0x0000ffff;
#if OPAL_ENABLE_FT_CR == 1
char * state_str = NULL;
size_t ckpt_state;
char *snap_ref = NULL;
char *snap_loc = NULL;
#endif
for(i=0; i < num_jobs; i++) {
@ -561,18 +564,17 @@ static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) {
len_slots = 6;
len_vpid_r = (int) strlen("Num Procs");
#if OPAL_ENABLE_FT_CR == 1
orte_snapc_ckpt_state_str(&state_str, job->ckpt_state);
orte_get_attribute(&job->attributes, ORTE_JOB_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32);
orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING);
orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING);
orte_snapc_ckpt_state_str(&state_str, ckpt_state);
len_ckpt_s = (int) (strlen(state_str) < strlen("Ckpt State") ?
strlen("Ckpt State") :
strlen(state_str) );
len_ckpt_r = (int) (NULL == job->ckpt_snapshot_ref ? strlen("Ckpt Ref") :
(strlen(job->ckpt_snapshot_ref) < strlen("Ckpt Ref") ?
strlen("Ckpt Ref") :
strlen(job->ckpt_snapshot_ref) ) );
len_ckpt_l = (int) (NULL == job->ckpt_snapshot_loc ? strlen("Ckpt Loc") :
(strlen(job->ckpt_snapshot_loc) < strlen("Ckpt Loc") ?
strlen("Ckpt Loc") :
strlen(job->ckpt_snapshot_loc) ) );
len_ckpt_r = (int) (NULL == snap_ref ? strlen("Ckpt Ref") : (strlen(snap_ref) < strlen("Ckpt Ref") ?
strlen("Ckpt Ref") : strlen(snap_ref)));
len_ckpt_l = (int) (NULL == snap_loc ? strlen("Ckpt Loc") : (strlen(snap_loc) < strlen("Ckpt Loc") ?
strlen("Ckpt Loc") : strlen(snap_loc)));
#else
len_ckpt_s = -3;
len_ckpt_r = -3;
@ -614,12 +616,8 @@ static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) {
printf("%*d | ", len_vpid_r, job->num_procs);
#if OPAL_ENABLE_FT_CR == 1
printf("%*s | ", len_ckpt_s, state_str);
printf("%*s | ", len_ckpt_r, (NULL == job->ckpt_snapshot_ref ?
"" :
job->ckpt_snapshot_ref) );
printf("%*s |", len_ckpt_l, (NULL == job->ckpt_snapshot_loc ?
"" :
job->ckpt_snapshot_loc) );
printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref));
printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc));
#endif
printf("\n");
@ -648,6 +646,9 @@ static int pretty_print_vpids(orte_job_t *job) {
char *o_proc_name;
#if OPAL_ENABLE_FT_CR == 1
char *state_str = NULL;
size_t ckpt_state;
char *snap_ref = NULL;
char *snap_loc = NULL;
#endif
char *nodename;
@ -715,19 +716,20 @@ static int pretty_print_vpids(orte_job_t *job) {
if( (int)strlen(orte_proc_state_to_str(vpid->state)) > len_state)
len_state = strlen(orte_proc_state_to_str(vpid->state));
#if OPAL_ENABLE_FT_CR == 1
orte_snapc_ckpt_state_str(&state_str, vpid->ckpt_state);
orte_get_attribute(&vpid->attributes, ORTE_PROC_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32);
orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING);
orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING);
orte_snapc_ckpt_state_str(&state_str, ckpt_state);
if( (int)strlen(state_str) > len_ckpt_s)
len_ckpt_s = strlen(state_str);
if( NULL != vpid->ckpt_snapshot_ref &&
(int)strlen(vpid->ckpt_snapshot_ref) > len_ckpt_r)
len_ckpt_r = strlen(vpid->ckpt_snapshot_ref);
if( NULL != vpid->ckpt_snapshot_loc &&
(int)strlen(vpid->ckpt_snapshot_loc) > len_ckpt_l)
len_ckpt_l = strlen(vpid->ckpt_snapshot_loc);
if(NULL != snap_ref && (int)strlen(snap_ref) > len_ckpt_r)
len_ckpt_r = strlen(snap_ref);
if(NULL != snap_loc && (int)strlen(snap_loc) > len_ckpt_l)
len_ckpt_l = strlen(snap_loc);
#endif
}
@ -798,12 +800,8 @@ static int pretty_print_vpids(orte_job_t *job) {
#if OPAL_ENABLE_FT_CR == 1
printf("%*s | ", len_ckpt_s, state_str);
printf("%*s | ", len_ckpt_r, (NULL == vpid->ckpt_snapshot_ref ?
"" :
vpid->ckpt_snapshot_ref));
printf("%*s |", len_ckpt_l, (NULL == vpid->ckpt_snapshot_loc ?
"" :
vpid->ckpt_snapshot_loc));
printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref));
printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc));
#endif
printf("\n");

Просмотреть файл

@ -1965,10 +1965,9 @@ static int create_app(int argc, char* argv[],
}
#if OPAL_ENABLE_FT_CR == 1
if( NULL != orterun_globals.sstore_load ) {
app->sstore_load = strdup(orterun_globals.sstore_load);
} else {
app->sstore_load = NULL;
if(NULL != orterun_globals.sstore_load) {
orte_set_attribute(&app->attributes, ORTE_APP_SSTORE_LOAD, ORTE_ATTR_LOCAL,
orterun_globals.sstore_load, OPAL_STRING);
}
#endif