* Fix issue in odls_bproc where we were using vpid instead of the number of
processes launched locally for the stdio file names. This was causing the expected files to not exist and bproc_vexecmove_io to fail. * Clean up a bunch of debugging output in the bproc pls This commit was SVN r12102.
Этот коммит содержится в:
родитель
f91a95b3fe
Коммит
29c91cf2f3
@ -524,6 +524,7 @@ orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_env
|
||||
orte_buffer_t *ack;
|
||||
bool connect_stdin;
|
||||
orte_jobid_t jobid;
|
||||
int cycle = 0;
|
||||
|
||||
/* first, retrieve the job number we are to launch from the
|
||||
* returned data - we can extract the jobid directly from the
|
||||
@ -616,13 +617,15 @@ orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_env
|
||||
connect_stdin = false;
|
||||
}
|
||||
|
||||
rc = odls_bproc_setup_stdio(child->name, (int)child->name->vpid,
|
||||
rc = odls_bproc_setup_stdio(child->name, cycle,
|
||||
jobid, child->app_idx,
|
||||
connect_stdin);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cycle++;
|
||||
}
|
||||
|
||||
/* message to indicate that we are ready */
|
||||
|
@ -929,10 +929,18 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
fprintf(stderr, "launching app %s\n", map->apps[app_context]->app);
|
||||
opal_output_verbose(1, orte_pls_base.pls_output,
|
||||
"launching app %s", map->apps[app_context]->app);
|
||||
|
||||
while(0 != num_nodes) {
|
||||
fprintf(stderr, "\tlaunching cycle %d\n", i);
|
||||
for (dbg=0; dbg<num_nodes; dbg++) fprintf(stderr, "\t\tlaunching on node %d\n", node_list[dbg]);
|
||||
if (0 < mca_pls_bproc_component.debug) {
|
||||
opal_output_verbose(1, orte_pls_base.pls_output,
|
||||
"\tlaunching cycle %d", i);
|
||||
for (dbg=0; dbg<num_nodes; dbg++) {
|
||||
opal_output_verbose(1, orte_pls_base.pls_output,
|
||||
"\t\tlaunching on node %d", node_list[dbg]);
|
||||
}
|
||||
}
|
||||
|
||||
/* setup environment so the procs can figure out their names */
|
||||
rc = orte_ns_nds_bproc_put(cellid, jobid, vpid_start, global_vpid_start,
|
||||
@ -948,7 +956,7 @@ for (dbg=0; dbg<num_nodes; dbg++) fprintf(stderr, "\t\tlaunching on node %d\n",
|
||||
goto cleanup;
|
||||
}
|
||||
if(0 < mca_pls_bproc_component.debug) {
|
||||
opal_output(0, "pls_bproc: launching %d processes", num_nodes);
|
||||
opal_output(0, "pls_bproc: launching %d processes:", num_nodes);
|
||||
}
|
||||
rc = bproc_vexecmove_io(num_nodes, node_list, pids, bproc_io, 3,
|
||||
map->apps[app_context]->app,
|
||||
@ -1153,6 +1161,7 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
orte_pls_bproc_setup_env(&map->apps[i]->env);
|
||||
num_processes += rc;
|
||||
}
|
||||
@ -1227,7 +1236,7 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
}
|
||||
|
||||
vpid_launch = vpid_start;
|
||||
opal_output(0, "launching apps");
|
||||
|
||||
/* for each application context launch the app */
|
||||
for(context=0; context < map->num_apps; context++) {
|
||||
rc = orte_rmgr.check_context_cwd(map->apps[context], true);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user