Make these changes available for Edgar et al to continue working on.
This commit was SVN r5256.
Этот коммит содержится в:
родитель
3a27c53d19
Коммит
c41b275c35
@ -358,156 +358,110 @@ ompi_comm_start_processes(int count, char **array_of_commands,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* get the spawn handle to start spawning stuff */
|
||||
requires = OMPI_RTE_SPAWN_FROM_MPI | OMPI_RTE_SPAWN_HIGH_QOS;
|
||||
if (count > 1) requires |= OMPI_RTE_SPAWN_MPMD;
|
||||
spawn_handle = ompi_rte_get_spawn_handle(requires, true);
|
||||
if (NULL == spawn_handle) {
|
||||
printf("show_help: get_spawn_handle failed\n");
|
||||
return OMPI_ERROR;
|
||||
/* Convert the list of commands to an array of orte_app_context_t
|
||||
pointers */
|
||||
|
||||
apps = (orte_app_context_t**)malloc(count * sizeof(orte_app_context_t *));
|
||||
if (NULL == apps) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* get our allocations and set them up, one by one */
|
||||
OBJ_CONSTRUCT(&schedlist, ompi_list_t);
|
||||
nodelists = malloc(sizeof(ompi_list_t*) * count);
|
||||
if (NULL == nodelists) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* iterate through all the counts, creating an app schema entry
|
||||
for each one */
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
nodelists[i] = ompi_rte_allocate_resources(spawn_handle,
|
||||
new_jobid, 0,
|
||||
array_of_maxprocs[i]);
|
||||
if (NULL == nodelists[i]) {
|
||||
/* BWB - XXX - help - need to unwind what already done */
|
||||
return OMPI_ERROR;
|
||||
for (i = 0; i < count; ++i) {
|
||||
apps[i] = OBJ_NEW(orte_app_context_t);
|
||||
if (NULL == apps[i]) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
/* rollback what was already done */
|
||||
for (j=0; j < i; j++) OBJ_RELEASE(apps[j]);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
total_start_procs += array_of_maxprocs[i];
|
||||
|
||||
|
||||
/*
|
||||
* Process mapping
|
||||
/* copy over the name of the executable */
|
||||
apps[i]->app = strdup(array_of_commands[i]);
|
||||
if (NULL == apps[i]->app) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
/* rollback what was already done */
|
||||
for (j=0; j < i; j++) OBJ_RELEASE(apps[j]);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* record the number of procs to be generated */
|
||||
apps[i]->num_procs = array_of_maxprocs[i];
|
||||
/* copy over the argv array */
|
||||
if (MPI_ARGVS_NULL != array_of_argv &&
|
||||
MPI_ARGV_NULL != array_of_argv[i]) {
|
||||
/* first need to find out how many entries there are */
|
||||
apps[i]->argc = 0;
|
||||
j=0;
|
||||
while (NULL != array_of_argv[i][j]) {
|
||||
j++;
|
||||
}
|
||||
apps[i]->argc = j;
|
||||
/* now copy them over, ensuring to NULL terminate the array */
|
||||
if (0 < j) {
|
||||
apps[i]->argv = (char**)malloc((1 + apps[i]->argc) * sizeof(char*));
|
||||
if (NULL == apps[i]->argv) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
/* rollback what was already done */
|
||||
for (j=0; j < i; j++) OBJ_RELEASE(apps[j]);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for (j=0; j < apps[i]->argc; j++) {
|
||||
apps[i]->argv[j] = strdup(array_of_argv[i][j]);
|
||||
}
|
||||
apps[i]->argv[apps[i]->argc] = NULL;
|
||||
}
|
||||
}
|
||||
/* the environment gets set by the launcher
|
||||
* all we need to do is add the specific values
|
||||
* needed for comm_spawn
|
||||
*/
|
||||
sched = OBJ_NEW(ompi_rte_node_schedule_t);
|
||||
ompi_argv_append (&(sched->argc), &(sched->argv),
|
||||
array_of_commands[i]);
|
||||
|
||||
if (array_of_argv != MPI_ARGVS_NULL &&
|
||||
array_of_argv[i] != MPI_ARGV_NULL ) {
|
||||
int j = 0;
|
||||
char *arg = array_of_argv[i][j];
|
||||
|
||||
while (arg != NULL) {
|
||||
ompi_argv_append(&(sched->argc), &(sched->argv), arg);
|
||||
arg = array_of_argv[i][++j];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* build environment to be passed
|
||||
*/
|
||||
mca_pcm_base_build_base_env(environ, &(sched->envc), &(sched->env));
|
||||
|
||||
/* set initial contact info */
|
||||
if (ompi_process_info.seed) {
|
||||
my_contact_info = mca_oob_get_contact_info();
|
||||
} else {
|
||||
my_contact_info = strdup(ompi_universe_info.ns_replica);
|
||||
}
|
||||
|
||||
asprintf(&tmp, "OMPI_MCA_ns_base_replica=%s", my_contact_info);
|
||||
ompi_argv_append(&(sched->envc), &(sched->env), tmp);
|
||||
free(tmp);
|
||||
|
||||
asprintf(&tmp, "OMPI_MCA_gpr_base_replica=%s", my_contact_info);
|
||||
ompi_argv_append(&(sched->envc), &(sched->env), tmp);
|
||||
free(tmp);
|
||||
|
||||
if (NULL != ompi_universe_info.name) {
|
||||
asprintf(&tmp, "OMPI_universe_name=%s", ompi_universe_info.name);
|
||||
ompi_argv_append(&(sched->envc), &(sched->env), tmp);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
/* Add environment variable with the contact information for the
|
||||
child processes.
|
||||
12.23.2004 EG: the content of the environment variable
|
||||
does know hold additionally to the oob contact information
|
||||
also the information, which application in spawn/spawn_multiple
|
||||
it has been. This information is needed to construct the
|
||||
attribute MPI_APPNUM on the children side (an optional
|
||||
MPI-2 attribute. */
|
||||
asprintf(&envvarname, "OMPI_PARENT_PORT_%u", new_jobid);
|
||||
asprintf(&tmp, "%s=%s:%d", envvarname, port_name, i);
|
||||
ompi_argv_append(&(sched->envc), &(sched->env), tmp);
|
||||
free(tmp);
|
||||
free(envvarname);
|
||||
|
||||
/* Check for the 'wdir' and later potentially for the
|
||||
'path' Info object */
|
||||
have_wdir = 0;
|
||||
if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) {
|
||||
ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag);
|
||||
if ( flag ) {
|
||||
sched->cwd = cwd;
|
||||
have_wdir = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* default value: If the user did not tell us where to look for the
|
||||
executable, we assume the current working directory */
|
||||
if ( !have_wdir ) {
|
||||
getcwd(cwd, OMPI_PATH_MAX);
|
||||
sched->cwd = strdup(cwd);
|
||||
}
|
||||
sched->nodelist = nodelists[i];
|
||||
|
||||
if (sched->argc == 0) {
|
||||
printf("no app to start\n");
|
||||
return MPI_ERR_ARG;
|
||||
12.23.2004 EG: the content of the environment variable
|
||||
does know hold additionally to the oob contact information
|
||||
also the information, which application in spawn/spawn_multiple
|
||||
it has been. This information is needed to construct the
|
||||
attribute MPI_APPNUM on the children side (an optional
|
||||
MPI-2 attribute. */
|
||||
apps[i]->num_env = 1;
|
||||
apps[i]->env = (char**)malloc((1+apps[i]->num_env) * sizeof(char*));
|
||||
if (NULL == apps[i]->env) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
/* rollback what was already done */
|
||||
for (j=0; j < i; j++) OBJ_RELEASE(apps[j]);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
ompi_list_append(&schedlist, (ompi_list_item_t*) sched);
|
||||
asprintf(&envvarname, "OMPI_PARENT_PORT_%u", new_jobid);
|
||||
asprintf(&(apps[i]->env[0]), "%s=%s:%d", envvarname, port_name, i);
|
||||
free(envvarname);
|
||||
apps[i]->env[1] = NULL;
|
||||
/* Check for the 'wdir' and later potentially for the
|
||||
'path' Info object */
|
||||
have_wdir = 0;
|
||||
if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) {
|
||||
ompi_info_get (array_of_info[i], "wdir", valuelen, cwd, &flag);
|
||||
if ( flag ) {
|
||||
apps[i]->cwd = cwd;
|
||||
have_wdir = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* default value: If the user did not tell us where to look for the
|
||||
executable, we assume the current working directory */
|
||||
if ( !have_wdir ) {
|
||||
getcwd(cwd, OMPI_PATH_MAX);
|
||||
apps[i]->cwd = strdup(cwd);
|
||||
}
|
||||
/* leave the map info alone - the launcher will
|
||||
* decide where to put things
|
||||
*/
|
||||
} /* for (i = 0 ; i < count ; ++i) */
|
||||
|
||||
|
||||
/*
|
||||
* register to monitor the startup
|
||||
*/
|
||||
/* setup segment for this job */
|
||||
asprintf(&segment, "%s-%s", OMPI_RTE_JOB_STATUS_SEGMENT,
|
||||
ompi_name_server.convert_jobid_to_string(new_jobid));
|
||||
|
||||
/* register a synchro on the segment so we get notified when everyone registers */
|
||||
rc_tag = ompi_registry.synchro(
|
||||
OMPI_REGISTRY_SYNCHRO_MODE_LEVEL|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT|
|
||||
OMPI_REGISTRY_SYNCHRO_MODE_STARTUP,
|
||||
OMPI_REGISTRY_OR,
|
||||
segment,
|
||||
NULL,
|
||||
total_start_procs,
|
||||
ompi_rte_all_procs_registered, NULL);
|
||||
|
||||
free(segment);
|
||||
/*
|
||||
* spawn procs
|
||||
*/
|
||||
if (OMPI_SUCCESS != ompi_rte_spawn_procs(spawn_handle, new_jobid, &schedlist)) {
|
||||
printf("show_help: woops! we didn't spawn :( \n");
|
||||
return MPI_ERR_SPAWN;
|
||||
/* spawn procs */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn(apps, count, new_jobid,
|
||||
ompi_comm_spawn_monitor))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return MPI_ERR_SPAWN;
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS != ompi_rte_monitor_procs_registered()) {
|
||||
ompi_output(0, "[%d,%d,%d] procs didn't all register - returning an error",
|
||||
OMPI_NAME_ARGS(*ompi_rte_get_self()));
|
||||
return MPI_ERR_SPAWN;
|
||||
}
|
||||
|
||||
/*
|
||||
* tell processes okay to start by sending startup msg
|
||||
*/
|
||||
ompi_rte_job_startup(new_jobid);
|
||||
|
||||
/*
|
||||
* Clean up
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user