1
1

Add two new API functions to the RMGR, and modify the "spawn" API to support the enhanced MPI-2 functionality.

No implementation backs these new APIs - just placeholders for now.

This commit was SVN r11699.
Этот коммит содержится в:
Ralph Castain 2006-09-19 01:45:05 +00:00
родитель f8de894efe
Коммит 0ad0d84afd
15 изменённых файлов: 145 добавлений и 21 удалений

Просмотреть файл

@ -533,7 +533,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
if (NULL != base_prefix) free(base_prefix); if (NULL != base_prefix) free(base_prefix);
/* spawn procs */ /* spawn procs */
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, NULL, ORTE_PROC_STATE_NONE))) { if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, ORTE_PROC_STATE_NONE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
opal_progress_event_decrement(); opal_progress_event_decrement();
return MPI_ERR_SPAWN; return MPI_ERR_SPAWN;

Просмотреть файл

@ -23,6 +23,7 @@ headers += \
libmca_rmgr_la_SOURCES += \ libmca_rmgr_la_SOURCES += \
base/rmgr_base_check_context.c \ base/rmgr_base_check_context.c \
base/rmgr_base_context.c \ base/rmgr_base_context.c \
base/rmgr_base_con_discon.c \
base/rmgr_base_close.c \ base/rmgr_base_close.c \
base/rmgr_base_open.c \ base/rmgr_base_open.c \
base/rmgr_base_receive.c \ base/rmgr_base_receive.c \

49
orte/mca/rmgr/base/rmgr_base_con_discon.c Обычный файл
Просмотреть файл

@ -0,0 +1,49 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* Support functions for the RMGR subsystem
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include <string.h>
#include "opal/util/output.h"
#include "opal/class/opal_list.h"
#include "orte/dss/dss.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect)
{
return ORTE_ERR_NOT_SUPPORTED;
}
int orte_rmgr_base_disconnect(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect)
{
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -53,6 +53,8 @@ orte_rmgr_base_module_t orte_rmgr = {
NULL, NULL,
orte_rmgr_base_create_not_available, orte_rmgr_base_create_not_available,
orte_rmgr_base_spawn_not_available, orte_rmgr_base_spawn_not_available,
orte_rmgr_base_connect,
orte_rmgr_base_disconnect,
orte_rmgr_base_finalize_not_available, orte_rmgr_base_finalize_not_available,
/** SUPPORT FUNCTIONS ***/ /** SUPPORT FUNCTIONS ***/
orte_rmgr_base_get_app_context, orte_rmgr_base_get_app_context,

Просмотреть файл

@ -178,7 +178,8 @@ void orte_rmgr_base_recv(int status, orte_process_name_t* sender,
} }
/* process the request */ /* process the request */
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(context, num_context, &job, NULL, ORTE_PROC_STATE_NONE))) { if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(context, num_context, &job,
0, NULL, NULL, ORTE_PROC_STATE_NONE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto SEND_ANSWER; goto SEND_ANSWER;
} }

Просмотреть файл

@ -43,6 +43,8 @@ orte_rmgr_base_spawn_not_available(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid, orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn, orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions) orte_proc_state_t cb_conditions)
{ {

Просмотреть файл

@ -83,12 +83,22 @@ int orte_rmgr_base_create_not_available(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid); orte_jobid_t* jobid);
int orte_rmgr_base_spawn_not_available( int orte_rmgr_base_spawn_not_available(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid, orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn, orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions); orte_proc_state_t cb_conditions);
int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
int orte_rmgr_base_disconnect(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect);
int orte_rmgr_base_finalize_not_available(void); int orte_rmgr_base_finalize_not_available(void);
/* /*

Просмотреть файл

@ -46,9 +46,17 @@ static int orte_rmgr_cnos_spawn_job(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid, orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn, orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions); orte_proc_state_t cb_conditions);
static int orte_rmgr_cnos_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
static int orte_rmgr_cnos_disconnect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
static int orte_rmgr_cnos_finalize(void); static int orte_rmgr_cnos_finalize(void);
static int orte_rmgr_cnos_get_app_context(orte_jobid_t jobid, static int orte_rmgr_cnos_get_app_context(orte_jobid_t jobid,
@ -76,6 +84,8 @@ orte_rmgr_base_module_t orte_rmgr_cnos_module = {
NULL, /* don't need special init */ NULL, /* don't need special init */
orte_rmgr_cnos_setup_job, orte_rmgr_cnos_setup_job,
orte_rmgr_cnos_spawn_job, orte_rmgr_cnos_spawn_job,
orte_rmgr_cnos_connect,
orte_rmgr_cnos_disconnect,
orte_rmgr_cnos_finalize, orte_rmgr_cnos_finalize,
/** SUPPORT FUNCTIONS ***/ /** SUPPORT FUNCTIONS ***/
orte_rmgr_cnos_get_app_context, orte_rmgr_cnos_get_app_context,
@ -109,6 +119,18 @@ static int orte_rmgr_cnos_spawn_job(
return ORTE_ERR_NOT_SUPPORTED; return ORTE_ERR_NOT_SUPPORTED;
} }
static int orte_rmgr_cnos_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect)
{
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_disconnect(orte_std_cntr_t num_connect,
orte_process_name_t *connect)
{
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_finalize(void) static int orte_rmgr_cnos_finalize(void)
{ {

Просмотреть файл

@ -45,10 +45,10 @@ orte_rmgr_base_component_t mca_rmgr_cnos_component = {
information about the component itself */ information about the component itself */
{ {
/* Indicate that we are a rmgr v1.3.0 component (which also /* Indicate that we are a rmgr v2.0.0 component (which also
implies a specific MCA version) */ implies a specific MCA version) */
ORTE_RMGR_BASE_VERSION_1_3_0, ORTE_RMGR_BASE_VERSION_2_0_0,
"cnos", /* MCA component name */ "cnos", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MAJOR_VERSION, /* MCA component major version */

Просмотреть файл

@ -48,6 +48,8 @@ static int orte_rmgr_proxy_spawn_job(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid, orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn, orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions); orte_proc_state_t cb_conditions);
@ -55,6 +57,8 @@ orte_rmgr_base_module_t orte_rmgr_proxy_module = {
NULL, /* don't need special init */ NULL, /* don't need special init */
orte_rmgr_proxy_setup_job, orte_rmgr_proxy_setup_job,
orte_rmgr_proxy_spawn_job, orte_rmgr_proxy_spawn_job,
orte_rmgr_base_connect,
orte_rmgr_base_disconnect,
NULL, /* finalize */ NULL, /* finalize */
/** SUPPORT FUNCTIONS ***/ /** SUPPORT FUNCTIONS ***/
orte_rmgr_base_get_app_context, orte_rmgr_base_get_app_context,
@ -269,6 +273,8 @@ static int orte_rmgr_proxy_spawn_job(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid, orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfunc, orte_rmgr_cb_fn_t cbfunc,
orte_proc_state_t cb_conditions) orte_proc_state_t cb_conditions)
{ {

Просмотреть файл

@ -41,10 +41,10 @@ orte_rmgr_proxy_component_t mca_rmgr_proxy_component = {
information about the component itself */ information about the component itself */
{ {
/* Indicate that we are a iof v1.0.0 component (which also /* Indicate that we are a rmgr v2.0.0 component (which also
implies a specific MCA version) */ implies a specific MCA version) */
ORTE_RMGR_BASE_VERSION_1_3_0, ORTE_RMGR_BASE_VERSION_2_0_0,
"proxy", /* MCA component name */ "proxy", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MAJOR_VERSION, /* MCA component major version */

Просмотреть файл

@ -93,10 +93,33 @@ typedef int (*orte_rmgr_base_module_spawn_job_fn_t)(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t *jobid, orte_jobid_t *jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn, orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions); orte_proc_state_t cb_conditions);
/**
* Connect a process to other processes, possibly in other jobs. Note that this
* function supports WILDCARD process name fields. Thus, a process can request
* connection to all other processes in another job by providing a single
* entry in the connect array that has a cellid of ORTE_CELLID_WILDCARD, the
* desired jobid, and a vpid of ORTE_VPID_WILDCARD.
*/
typedef int (*orte_rmgr_base_module_connect_fn_t)(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
/**
* Disconnect a process from one or more other processes. Note that this
* function supports WILDCARD process name fields. Thus, a process can request
* to disconnect from all other processes in another job by providing a single
* entry in the connect array that has a cellid of ORTE_CELLID_WILDCARD, the
* desired jobid, and a vpid of ORTE_VPID_WILDCARD.
*/
typedef int (*orte_rmgr_base_module_disconnect_fn_t)(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect);
/** /**
* Allow module-specific init. * Allow module-specific init.
*/ */
@ -156,12 +179,14 @@ typedef int (*orte_rmgr_base_module_get_vpid_range_fn_t)(orte_jobid_t jobid,
/* /*
* Ver 1.3.0 * Ver 2.0
*/ */
struct orte_rmgr_base_module_1_3_0_t { struct orte_rmgr_base_module_2_0_0_t {
orte_rmgr_base_module_init_fn_t module_init; orte_rmgr_base_module_init_fn_t module_init;
orte_rmgr_base_module_setup_job_fn_t setup_job; orte_rmgr_base_module_setup_job_fn_t setup_job;
orte_rmgr_base_module_spawn_job_fn_t spawn_job; orte_rmgr_base_module_spawn_job_fn_t spawn_job;
orte_rmgr_base_module_connect_fn_t connect;
orte_rmgr_base_module_disconnect_fn_t disconnect;
orte_rmgr_base_module_finalize_fn_t finalize; orte_rmgr_base_module_finalize_fn_t finalize;
/** SUPPORT FUNCTIONS ***/ /** SUPPORT FUNCTIONS ***/
orte_rmgr_base_module_get_app_context_fn_t get_app_context; orte_rmgr_base_module_get_app_context_fn_t get_app_context;
@ -172,8 +197,8 @@ struct orte_rmgr_base_module_1_3_0_t {
orte_rmgr_base_module_get_vpid_range_fn_t get_vpid_range; orte_rmgr_base_module_get_vpid_range_fn_t get_vpid_range;
}; };
typedef struct orte_rmgr_base_module_1_3_0_t orte_rmgr_base_module_1_3_0_t; typedef struct orte_rmgr_base_module_2_0_0_t orte_rmgr_base_module_2_0_0_t;
typedef orte_rmgr_base_module_1_3_0_t orte_rmgr_base_module_t; typedef orte_rmgr_base_module_2_0_0_t orte_rmgr_base_module_t;
/* /*
* RMGR Component * RMGR Component
@ -187,24 +212,24 @@ typedef orte_rmgr_base_module_t* (*orte_rmgr_base_component_init_fn_t)(
* the standard component data structure * the standard component data structure
*/ */
struct orte_rmgr_base_component_1_3_0_t { struct orte_rmgr_base_component_2_0_0_t {
mca_base_component_t rmgr_version; mca_base_component_t rmgr_version;
mca_base_component_data_1_0_0_t rmgr_data; mca_base_component_data_1_0_0_t rmgr_data;
orte_rmgr_base_component_init_fn_t rmgr_init; orte_rmgr_base_component_init_fn_t rmgr_init;
}; };
typedef struct orte_rmgr_base_component_1_3_0_t orte_rmgr_base_component_1_3_0_t; typedef struct orte_rmgr_base_component_2_0_0_t orte_rmgr_base_component_2_0_0_t;
typedef orte_rmgr_base_component_1_3_0_t orte_rmgr_base_component_t; typedef orte_rmgr_base_component_2_0_0_t orte_rmgr_base_component_t;
/** /**
* Macro for use in components that are of type rmgr v1.0.0 * Macro for use in components that are of type rmgr v2.0.0
*/ */
#define ORTE_RMGR_BASE_VERSION_1_3_0 \ #define ORTE_RMGR_BASE_VERSION_2_0_0 \
/* rmgr v1.0 is chained to MCA v1.0 */ \ /* rmgr v2.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \ MCA_BASE_VERSION_1_0_0, \
/* rmgr v1.3 */ \ /* rmgr v2.0 */ \
"rmgr", 1, 3, 0 "rmgr", 2, 0, 0
/** /**
* Global structure for accessing RAS functions * Global structure for accessing RAS functions

Просмотреть файл

@ -54,6 +54,8 @@ static int orte_rmgr_urm_spawn_job(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid, orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn, orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions); orte_proc_state_t cb_conditions);
@ -66,6 +68,8 @@ orte_rmgr_base_module_t orte_rmgr_urm_module = {
orte_rmgr_urm_module_init, orte_rmgr_urm_module_init,
orte_rmgr_urm_setup_job, orte_rmgr_urm_setup_job,
orte_rmgr_urm_spawn_job, orte_rmgr_urm_spawn_job,
orte_rmgr_base_connect,
orte_rmgr_base_disconnect,
orte_rmgr_urm_module_finalize, orte_rmgr_urm_module_finalize,
/** SUPPORT FUNCTIONS ***/ /** SUPPORT FUNCTIONS ***/
orte_rmgr_base_get_app_context, orte_rmgr_base_get_app_context,
@ -264,6 +268,8 @@ static int orte_rmgr_urm_spawn_job(
orte_app_context_t** app_context, orte_app_context_t** app_context,
orte_std_cntr_t num_context, orte_std_cntr_t num_context,
orte_jobid_t* jobid, orte_jobid_t* jobid,
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfunc, orte_rmgr_cb_fn_t cbfunc,
orte_proc_state_t cb_conditions) orte_proc_state_t cb_conditions)
{ {

Просмотреть файл

@ -49,10 +49,10 @@ orte_rmgr_urm_component_t mca_rmgr_urm_component = {
information about the component itself */ information about the component itself */
{ {
/* Indicate that we are a rmgr v1.3.0 component (which also /* Indicate that we are a rmgr v2.0.0 component (which also
implies a specific MCA version) */ implies a specific MCA version) */
ORTE_RMGR_BASE_VERSION_1_3_0, ORTE_RMGR_BASE_VERSION_2_0_0,
"urm", /* MCA component name */ "urm", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MAJOR_VERSION, /* MCA component major version */

Просмотреть файл

@ -450,7 +450,7 @@ int orterun(int argc, char *argv[])
/* Spawn the job */ /* Spawn the job */
cb_states = ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_AT_STG1; cb_states = ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_AT_STG1;
rc = orte_rmgr.spawn_job(apps, num_apps, &jobid, job_state_callback, cb_states); rc = orte_rmgr.spawn_job(apps, num_apps, &jobid, 0, NULL, job_state_callback, cb_states);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
/* JMS show_help */ /* JMS show_help */
opal_output(0, "%s: spawn failed with errno=%d\n", orterun_basename, rc); opal_output(0, "%s: spawn failed with errno=%d\n", orterun_basename, rc);