5b9fa7e998
the RML/OOB merge in slightly easier This commit was SVN r15530. The following SVN revision numbers were found above: r15517 --> open-mpi/ompi@41977fcc95 r15520 --> open-mpi/ompi@9cbc9df1b8 r15527 --> open-mpi/ompi@2d17dd9516
165 строки
5.3 KiB
C
165 строки
5.3 KiB
C
/* -*- C -*-
|
|
*
|
|
* $HEADER$
|
|
*
|
|
* The most basic of MPI applications
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "opal/threads/condition.h"
|
|
#include "opal/util/argv.h"
|
|
|
|
#include "orte/util/proc_info.h"
|
|
#include "orte/mca/rmgr/rmgr.h"
|
|
#include "orte/mca/ns/ns.h"
|
|
#include "orte/mca/rml/rml.h"
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
#include "orte/mca/ras/ras_types.h"
|
|
#include "orte/mca/rmaps/rmaps_types.h"
|
|
#include "orte/runtime/runtime.h"
|
|
|
|
#define MY_TAG 12345
|
|
|
|
bool waitexit;
|
|
opal_mutex_t lock;
|
|
opal_condition_t cond;
|
|
|
|
static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state);
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
int rc;
|
|
orte_proc_state_t cb_states;
|
|
orte_app_context_t *app;
|
|
orte_jobid_t job;
|
|
opal_list_t attributes;
|
|
opal_list_item_t *item;
|
|
char cwd[1024];
|
|
bool spawned;
|
|
orte_proc_state_t state;
|
|
orte_vpid_t i, range;
|
|
orte_process_name_t name;
|
|
struct iovec msg;
|
|
|
|
OBJ_CONSTRUCT(&lock, opal_mutex_t);
|
|
OBJ_CONSTRUCT(&cond, opal_condition_t);
|
|
waitexit = false;
|
|
|
|
if (0 > (rc = orte_init(ORTE_NON_INFRASTRUCTURE, ORTE_NON_BARRIER))) {
|
|
fprintf(stderr, "couldn't init orte - error code %d\n", rc);
|
|
return rc;
|
|
}
|
|
|
|
/* create an app_context that defines the app to be run */
|
|
app = OBJ_NEW(orte_app_context_t);
|
|
app->app = strdup("spawn_child");
|
|
opal_argv_append_nosize(&app->argv, "spawn_child");
|
|
app->num_procs = 3;
|
|
|
|
getcwd(cwd, sizeof(cwd));
|
|
app->cwd = strdup(cwd);
|
|
app->user_specified_cwd = false;
|
|
|
|
/* construct an empty attributes list - we don't need this, but it will
|
|
* allow the various steps in the launch procedure add things if they
|
|
* need to do so
|
|
*/
|
|
OBJ_CONSTRUCT(&attributes, opal_list_t);
|
|
|
|
/* tell the RTE that we want to be a child of this process' job */
|
|
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_NS_USE_PARENT,
|
|
ORTE_JOBID, &(orte_process_info.my_name->jobid),
|
|
ORTE_RMGR_ATTR_OVERRIDE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
/* tell the RTE that we want to the children to run inside of our allocation -
|
|
* don't go get one just for them
|
|
*/
|
|
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RAS_USE_PARENT_ALLOCATION,
|
|
ORTE_JOBID, &(orte_process_info.my_name->jobid),
|
|
ORTE_RMGR_ATTR_OVERRIDE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
/* tell the RTE that we want the children mapped the same way as their parent */
|
|
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_USE_PARENT_PLAN,
|
|
ORTE_JOBID, &(orte_process_info.my_name->jobid),
|
|
ORTE_RMGR_ATTR_OVERRIDE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
/* tell the RTE that we want to be cross-connected to the children so we receive
|
|
* their ORTE-level information - e.g., OOB contact info - when they
|
|
* reach the STG1 stage gate
|
|
*/
|
|
state = ORTE_PROC_ORTE_STARTUP_COMPLETE;
|
|
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMGR_XCONNECT_AT_SPAWN,
|
|
ORTE_PROC_STATE, &state,
|
|
ORTE_RMGR_ATTR_OVERRIDE))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
/* launch the job, specifing a callback function so we get notified
|
|
* when it completes
|
|
*/
|
|
|
|
fprintf(stderr, "Parent: My local rank is %ld with %ld num_local_procs - spawning children!\n",
|
|
(long)orte_process_info.local_rank, (long)orte_process_info.num_local_procs);
|
|
cb_states = ORTE_PROC_STATE_TERMINATED;
|
|
spawned = true;
|
|
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(&app, 1, &job, 0, NULL, job_state_callback, cb_states, &attributes))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
spawned = false;
|
|
}
|
|
if (spawned) fprintf(stderr, "Parent: children spawned!\n");
|
|
|
|
/* cleanup the attribute list, just in case someone added something to it */
|
|
while (NULL != (item = opal_list_remove_first(&attributes))) OBJ_RELEASE(item);
|
|
OBJ_DESTRUCT(&attributes);
|
|
|
|
/* done with the application */
|
|
OBJ_RELEASE(app);
|
|
|
|
/* send messages to all children - this will verify that we know their contact info */
|
|
orte_ns.get_vpid_range(job, &range);
|
|
name.jobid = job;
|
|
i = 1;
|
|
msg.iov_base = (void *) &i;
|
|
msg.iov_len = sizeof(i);
|
|
for (i=0; i < range; i++) {
|
|
name.vpid = i;
|
|
fprintf(stderr, "Parent: sending message to child %s\n", ORTE_NAME_PRINT(&name));
|
|
if (0 > (rc = orte_rml.send(&name, &msg, 1, MY_TAG, 0))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
}
|
|
}
|
|
|
|
/* Wait for the app to complete */
|
|
if (spawned) {
|
|
OPAL_THREAD_LOCK(&lock);
|
|
while (!waitexit) {
|
|
opal_condition_wait(&cond, &lock);
|
|
}
|
|
}
|
|
|
|
/* All done */
|
|
orte_finalize();
|
|
return 0;
|
|
}
|
|
|
|
static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state)
|
|
{
|
|
OPAL_THREAD_LOCK(&lock);
|
|
|
|
waitexit = true;
|
|
opal_condition_signal(&cond);
|
|
|
|
OPAL_THREAD_UNLOCK(&lock);
|
|
}
|