* have rsh setup opal_progress so that call_yield is only called if the nodes
are oversubscribed (based on information from ras and current data in gpr) This commit was SVN r6941.
Этот коммит содержится в:
родитель
0a07341c40
Коммит
acd652a7ac
@ -215,6 +215,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
int node_name_index2;
|
||||
int proc_name_index;
|
||||
int local_exec_index, local_exec_index_end;
|
||||
int call_yield_index;
|
||||
char *jobid_string;
|
||||
char *uri, *param;
|
||||
char **argv, **tmp;
|
||||
@ -381,6 +382,11 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
free(uri);
|
||||
free(param);
|
||||
|
||||
opal_argv_append(&argc, &argv, "--mpi-call-yield");
|
||||
call_yield_index = argc;
|
||||
opal_argv_append(&argc, &argv, "0");
|
||||
|
||||
|
||||
local_exec_index_end = argc;
|
||||
if (!(remote_csh || remote_bash)) {
|
||||
opal_argv_append(&argc, &argv, ")");
|
||||
@ -472,6 +478,24 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
node->node_name);
|
||||
}
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
*/
|
||||
if (node->node_slots > 0 &&
|
||||
node->node_slots_inuse > node->node_slots) {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||
node->node_slots, node->node_slots_inuse);
|
||||
}
|
||||
argv[call_yield_index] = "1";
|
||||
} else {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
argv[call_yield_index] = "0";
|
||||
}
|
||||
|
||||
/* Is this a local launch?
|
||||
*
|
||||
* Not all node names may be resolvable (if we found
|
||||
@ -567,26 +591,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
*/
|
||||
if (node->node_slots > 0 &&
|
||||
node->node_slots_inuse > node->node_slots) {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: oversubscribed -- setting mpi_yield_when_idle to 1");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &env);
|
||||
} else {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "0", true, &env);
|
||||
}
|
||||
free(var);
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
|
@ -63,6 +63,8 @@
|
||||
|
||||
#include "tools/orted/orted.h"
|
||||
|
||||
extern char **environ;
|
||||
|
||||
orted_globals_t orted_globals;
|
||||
|
||||
static void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
@ -146,6 +148,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
|
||||
&orted_globals.uri_pipe, OPAL_CMD_LINE_TYPE_INT,
|
||||
"Report this process' uri on indicated pipe"},
|
||||
|
||||
{ NULL, NULL, NULL, '\0', NULL, "mpi-call-yield", 1,
|
||||
&orted_globals.mpi_call_yield, OPAL_CMD_LINE_TYPE_INT,
|
||||
"Have MPI (or similar) applications call yield when idle" },
|
||||
|
||||
/* End of list */
|
||||
{ NULL, NULL, NULL, '\0', NULL, NULL, 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
||||
@ -304,6 +310,12 @@ int main(int argc, char *argv[])
|
||||
|
||||
/* check to see if I'm a bootproxy */
|
||||
if (orted_globals.bootproxy) { /* perform bootproxy-specific things */
|
||||
if (orted_globals.mpi_call_yield > 0) {
|
||||
char *var;
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &environ);
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_rmgr.launch(orted_globals.bootproxy))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
|
@ -61,6 +61,7 @@ typedef struct {
|
||||
opal_mutex_t mutex;
|
||||
opal_condition_t condition;
|
||||
bool exit_condition;
|
||||
int mpi_call_yield;
|
||||
} orted_globals_t;
|
||||
|
||||
extern orted_globals_t orted_globals;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user