1
1

Add two new API functions to the RMGR, and modify the "spawn" API to support the enhanced MPI-2 functionality.

No implementation backs these new APIs - just placeholders for now.

This commit was SVN r11699.
Этот коммит содержится в:
Ralph Castain 2006-09-19 01:45:05 +00:00
родитель f8de894efe
Коммит 0ad0d84afd
15 изменённых файлов: 145 добавлений и 21 удалений

Просмотреть файл

@ -533,7 +533,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
if (NULL != base_prefix) free(base_prefix);
/* spawn procs */
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, NULL, ORTE_PROC_STATE_NONE))) {
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, ORTE_PROC_STATE_NONE))) {
ORTE_ERROR_LOG(rc);
opal_progress_event_decrement();
return MPI_ERR_SPAWN;

Просмотреть файл

@ -23,6 +23,7 @@ headers += \
libmca_rmgr_la_SOURCES += \
base/rmgr_base_check_context.c \
base/rmgr_base_context.c \
base/rmgr_base_con_discon.c \
base/rmgr_base_close.c \
base/rmgr_base_open.c \
base/rmgr_base_receive.c \

49
orte/mca/rmgr/base/rmgr_base_con_discon.c Обычный файл
Просмотреть файл

@ -0,0 +1,49 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* Support functions for the RMGR subsystem
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include <string.h>
#include "opal/util/output.h"
#include "opal/class/opal_list.h"
#include "orte/dss/dss.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect)
{
return ORTE_ERR_NOT_SUPPORTED;
}
int orte_rmgr_base_disconnect(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect)
{
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -53,6 +53,8 @@ orte_rmgr_base_module_t orte_rmgr = {
NULL,
orte_rmgr_base_create_not_available,
orte_rmgr_base_spawn_not_available,
orte_rmgr_base_connect,
orte_rmgr_base_disconnect,
orte_rmgr_base_finalize_not_available,
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_get_app_context,

Просмотреть файл

@ -178,7 +178,8 @@ void orte_rmgr_base_recv(int status, orte_process_name_t* sender,
}
/* process the request */
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(context, num_context, &job, NULL, ORTE_PROC_STATE_NONE))) {
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(context, num_context, &job,
0, NULL, NULL, ORTE_PROC_STATE_NONE))) {
ORTE_ERROR_LOG(rc);
goto SEND_ANSWER;
}

Просмотреть файл

@ -43,6 +43,8 @@ orte_rmgr_base_spawn_not_available(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions)
{

Просмотреть файл

@ -83,12 +83,22 @@ int orte_rmgr_base_create_not_available(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid);
int orte_rmgr_base_spawn_not_available(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
int orte_rmgr_base_disconnect(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect);
int orte_rmgr_base_finalize_not_available(void);
/*

Просмотреть файл

@ -46,9 +46,17 @@ static int orte_rmgr_cnos_spawn_job(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
static int orte_rmgr_cnos_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
static int orte_rmgr_cnos_disconnect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
static int orte_rmgr_cnos_finalize(void);
static int orte_rmgr_cnos_get_app_context(orte_jobid_t jobid,
@ -76,6 +84,8 @@ orte_rmgr_base_module_t orte_rmgr_cnos_module = {
NULL, /* don't need special init */
orte_rmgr_cnos_setup_job,
orte_rmgr_cnos_spawn_job,
orte_rmgr_cnos_connect,
orte_rmgr_cnos_disconnect,
orte_rmgr_cnos_finalize,
/** SUPPORT FUNCTIONS ***/
orte_rmgr_cnos_get_app_context,
@ -109,6 +119,18 @@ static int orte_rmgr_cnos_spawn_job(
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect)
{
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_disconnect(orte_std_cntr_t num_connect,
orte_process_name_t *connect)
{
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_finalize(void)
{

Просмотреть файл

@ -45,10 +45,10 @@ orte_rmgr_base_component_t mca_rmgr_cnos_component = {
information about the component itself */
{
/* Indicate that we are a rmgr v1.3.0 component (which also
/* Indicate that we are a rmgr v2.0.0 component (which also
implies a specific MCA version) */
ORTE_RMGR_BASE_VERSION_1_3_0,
ORTE_RMGR_BASE_VERSION_2_0_0,
"cnos", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */

Просмотреть файл

@ -48,6 +48,8 @@ static int orte_rmgr_proxy_spawn_job(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
@ -55,6 +57,8 @@ orte_rmgr_base_module_t orte_rmgr_proxy_module = {
NULL, /* don't need special init */
orte_rmgr_proxy_setup_job,
orte_rmgr_proxy_spawn_job,
orte_rmgr_base_connect,
orte_rmgr_base_disconnect,
NULL, /* finalize */
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_get_app_context,
@ -269,6 +273,8 @@ static int orte_rmgr_proxy_spawn_job(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfunc,
orte_proc_state_t cb_conditions)
{

Просмотреть файл

@ -41,10 +41,10 @@ orte_rmgr_proxy_component_t mca_rmgr_proxy_component = {
information about the component itself */
{
/* Indicate that we are a iof v1.0.0 component (which also
/* Indicate that we are a rmgr v2.0.0 component (which also
implies a specific MCA version) */
ORTE_RMGR_BASE_VERSION_1_3_0,
ORTE_RMGR_BASE_VERSION_2_0_0,
"proxy", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */

Просмотреть файл

@ -93,10 +93,33 @@ typedef int (*orte_rmgr_base_module_spawn_job_fn_t)(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t *jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
/**
* Connect a process to other processes, possibly in other jobs. Note that this
* function supports WILDCARD process name fields. Thus, a process can request
* connection to all other processes in another job by providing a single
* entry in the connect array that has a cellid of ORTE_CELLID_WILDCARD, the
* desired jobid, and a vpid of ORTE_VPID_WILDCARD.
*/
typedef int (*orte_rmgr_base_module_connect_fn_t)(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
/**
* Disconnect a process from one or more other processes. Note that this
* function supports WILDCARD process name fields. Thus, a process can request
* to disconnect from all other processes in another job by providing a single
* entry in the connect array that has a cellid of ORTE_CELLID_WILDCARD, the
* desired jobid, and a vpid of ORTE_VPID_WILDCARD.
*/
typedef int (*orte_rmgr_base_module_disconnect_fn_t)(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect);
/**
* Allow module-specific init.
*/
@ -156,12 +179,14 @@ typedef int (*orte_rmgr_base_module_get_vpid_range_fn_t)(orte_jobid_t jobid,
/*
* Ver 1.3.0
* Ver 2.0
*/
struct orte_rmgr_base_module_1_3_0_t {
struct orte_rmgr_base_module_2_0_0_t {
orte_rmgr_base_module_init_fn_t module_init;
orte_rmgr_base_module_setup_job_fn_t setup_job;
orte_rmgr_base_module_spawn_job_fn_t spawn_job;
orte_rmgr_base_module_connect_fn_t connect;
orte_rmgr_base_module_disconnect_fn_t disconnect;
orte_rmgr_base_module_finalize_fn_t finalize;
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_module_get_app_context_fn_t get_app_context;
@ -172,8 +197,8 @@ struct orte_rmgr_base_module_1_3_0_t {
orte_rmgr_base_module_get_vpid_range_fn_t get_vpid_range;
};
typedef struct orte_rmgr_base_module_1_3_0_t orte_rmgr_base_module_1_3_0_t;
typedef orte_rmgr_base_module_1_3_0_t orte_rmgr_base_module_t;
typedef struct orte_rmgr_base_module_2_0_0_t orte_rmgr_base_module_2_0_0_t;
typedef orte_rmgr_base_module_2_0_0_t orte_rmgr_base_module_t;
/*
* RMGR Component
@ -187,24 +212,24 @@ typedef orte_rmgr_base_module_t* (*orte_rmgr_base_component_init_fn_t)(
* the standard component data structure
*/
struct orte_rmgr_base_component_1_3_0_t {
struct orte_rmgr_base_component_2_0_0_t {
mca_base_component_t rmgr_version;
mca_base_component_data_1_0_0_t rmgr_data;
orte_rmgr_base_component_init_fn_t rmgr_init;
};
typedef struct orte_rmgr_base_component_1_3_0_t orte_rmgr_base_component_1_3_0_t;
typedef orte_rmgr_base_component_1_3_0_t orte_rmgr_base_component_t;
typedef struct orte_rmgr_base_component_2_0_0_t orte_rmgr_base_component_2_0_0_t;
typedef orte_rmgr_base_component_2_0_0_t orte_rmgr_base_component_t;
/**
* Macro for use in components that are of type rmgr v1.0.0
* Macro for use in components that are of type rmgr v2.0.0
*/
#define ORTE_RMGR_BASE_VERSION_1_3_0 \
/* rmgr v1.0 is chained to MCA v1.0 */ \
#define ORTE_RMGR_BASE_VERSION_2_0_0 \
/* rmgr v2.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* rmgr v1.3 */ \
"rmgr", 1, 3, 0
/* rmgr v2.0 */ \
"rmgr", 2, 0, 0
/**
* Global structure for accessing RAS functions

Просмотреть файл

@ -54,6 +54,8 @@ static int orte_rmgr_urm_spawn_job(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
@ -66,6 +68,8 @@ orte_rmgr_base_module_t orte_rmgr_urm_module = {
orte_rmgr_urm_module_init,
orte_rmgr_urm_setup_job,
orte_rmgr_urm_spawn_job,
orte_rmgr_base_connect,
orte_rmgr_base_disconnect,
orte_rmgr_urm_module_finalize,
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_get_app_context,
@ -264,6 +268,8 @@ static int orte_rmgr_urm_spawn_job(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfunc,
orte_proc_state_t cb_conditions)
{

Просмотреть файл

@ -49,10 +49,10 @@ orte_rmgr_urm_component_t mca_rmgr_urm_component = {
information about the component itself */
{
/* Indicate that we are a rmgr v1.3.0 component (which also
/* Indicate that we are a rmgr v2.0.0 component (which also
implies a specific MCA version) */
ORTE_RMGR_BASE_VERSION_1_3_0,
ORTE_RMGR_BASE_VERSION_2_0_0,
"urm", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */

Просмотреть файл

@ -450,7 +450,7 @@ int orterun(int argc, char *argv[])
/* Spawn the job */
cb_states = ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_AT_STG1;
rc = orte_rmgr.spawn_job(apps, num_apps, &jobid, job_state_callback, cb_states);
rc = orte_rmgr.spawn_job(apps, num_apps, &jobid, 0, NULL, job_state_callback, cb_states);
if (ORTE_SUCCESS != rc) {
/* JMS show_help */
opal_output(0, "%s: spawn failed with errno=%d\n", orterun_basename, rc);