1
1

Bring comm_spawn back online. Shift the trigger hosting responsibilities to the HNP.

We still have an issue with the io forwarding going through the spawning process, but that will be dealt with at a future time.

This commit was SVN r11943.
Этот коммит содержится в:
Ralph Castain 2006-10-03 02:07:58 +00:00
родитель b269e4da9b
Коммит 99f2986db7
5 изменённых файлов: 121 добавлений и 3 удалений

Просмотреть файл

@ -29,6 +29,7 @@
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/util/trace.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
@ -97,6 +98,8 @@ void orte_rmgr_base_recv(int status, orte_process_name_t* sender,
orte_app_context_t **context;
int rc;
OPAL_TRACE(2);
/* get the command */
count = 1;
if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_RMGR_CMD))) {
@ -193,6 +196,22 @@ void orte_rmgr_base_recv(int status, orte_process_name_t* sender,
}
break;
case ORTE_RMGR_SETUP_GATES_CMD:
/* get the jobid */
count = 1;
if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &job, &count, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
goto SEND_ANSWER;
}
/* setup the stage gates */
if (ORTE_SUCCESS != (rc = orte_rmgr_base_proc_stage_gate_init(job))) {
ORTE_ERROR_LOG(rc);
goto SEND_ANSWER;
}
break;
default:
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
}

Просмотреть файл

@ -44,6 +44,8 @@ static int orte_rmgr_proxy_setup_job(
orte_std_cntr_t num_context,
orte_jobid_t* jobid);
static int orte_rmgr_proxy_setup_stage_gates(orte_jobid_t jobid);
static int orte_rmgr_proxy_spawn_job(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
@ -153,6 +155,68 @@ static int orte_rmgr_proxy_setup_job(
return rc;
}
static int orte_rmgr_proxy_setup_stage_gates(orte_jobid_t jobid)
{
orte_buffer_t cmd;
orte_buffer_t rsp;
orte_std_cntr_t count;
orte_rmgr_cmd_t command=ORTE_RMGR_SETUP_GATES_CMD;
int rc;
OPAL_TRACE(1);
/* construct command */
OBJ_CONSTRUCT(&cmd, orte_buffer_t);
/* pack the command */
if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_RMGR_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&cmd);
return rc;
}
/* pack the jobid */
if(ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &jobid, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&cmd);
return rc;
}
/* send the command */
if(0 > (rc = orte_rml.send_buffer(ORTE_RML_NAME_SEED, &cmd, ORTE_RML_TAG_RMGR, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&cmd);
return rc;
}
OBJ_DESTRUCT(&cmd);
/* wait for response */
OBJ_CONSTRUCT(&rsp, orte_buffer_t);
if(0 > (rc = orte_rml.recv_buffer(ORTE_RML_NAME_SEED, &rsp, ORTE_RML_TAG_RMGR))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&rsp);
return rc;
}
/* get the returned command */
count = 1;
if (ORTE_SUCCESS != (rc = orte_dss.unpack(&rsp, &command, &count, ORTE_RMGR_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&rsp);
return rc;
}
/* and check it to ensure valid comm */
if (ORTE_RMGR_SETUP_GATES_CMD != command) {
OBJ_DESTRUCT(&rsp);
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
OBJ_DESTRUCT(&rsp);
return rc;
}
static void orte_rmgr_proxy_wireup_stdin(orte_jobid_t jobid)
{
int rc;
@ -332,7 +396,7 @@ static int orte_rmgr_proxy_spawn_job(
}
/* setup the launch system's stage gate counters and subscriptions */
if (ORTE_SUCCESS != (rc = orte_rmgr_base_proc_stage_gate_init(*jobid))) {
if (ORTE_SUCCESS != (rc = orte_rmgr_proxy_setup_stage_gates(*jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -37,6 +37,7 @@ extern "C" {
*/
#define ORTE_RMGR_SETUP_JOB_CMD 1
#define ORTE_RMGR_SPAWN_JOB_CMD 2
#define ORTE_RMGR_SETUP_GATES_CMD 3
#define ORTE_RMGR_CMD ORTE_UINT8
typedef uint8_t orte_rmgr_cmd_t;

Просмотреть файл

@ -1,4 +1,4 @@
PROGS = mpi_no_op hello hello_nodename abort multi_abort
PROGS = mpi_no_op hello hello_nodename abort multi_abort simple_spawn
all: $(PROGS)

34
orte/test/mpi/simple_spawn.c Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
#include <stdio.h>
#include <mpi.h>
int main(int argc, char* argv[])
{
int msg;
MPI_Comm parent, child;
MPI_Init(NULL, NULL);
MPI_Comm_get_parent(&parent);
/* If we get COMM_NULL back, then we're the parent */
if (MPI_COMM_NULL == parent) {
printf("Parent about to spawn!\n");
MPI_Comm_spawn(argv[0], MPI_ARGV_NULL, 1, MPI_INFO_NULL,
0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE);
printf("Parent done with spawn\n");
msg = 38;
printf("Parent sending message to child\n");
MPI_Send(&msg, 1, MPI_INT, 0, 1, child);
MPI_Comm_disconnect(&child);
printf("Parent disconnected\n");
}
/* Otherwise, we're the child */
else {
printf("Hello from the child!\n");
MPI_Recv(&msg, 1, MPI_INT, 0, 1, parent, MPI_STATUS_IGNORE);
printf("Child received msg: %d\n", msg);
MPI_Comm_disconnect(&parent);
printf("Child disconnected\n");
}
MPI_Finalize();
return 0;
}