1
1

* fix some bit rot in tm pls/ras

* remove src/ directory for tm pls/ras

This commit was SVN r6937.
Этот коммит содержится в:
Brian Barrett 2005-08-19 14:46:11 +00:00
родитель 387dacef66
Коммит 80f27b5d87
19 изменённых файлов: 252 добавлений и 262 удалений

Просмотреть файл

@ -32,7 +32,8 @@ libmca_pls_base_la_SOURCES = \
pls_base_close.c \ pls_base_close.c \
pls_base_open.c \ pls_base_open.c \
pls_base_select.c \ pls_base_select.c \
pls_base_state.c pls_base_state.c \
pls_base_proxy.c
# Conditionally install the header files # Conditionally install the header files

Просмотреть файл

@ -25,6 +25,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "mca/mca.h" #include "mca/mca.h"
#include "mca/pls/pls.h" #include "mca/pls/pls.h"
#include "orte/mca/ras/base/ras_base_node.h"
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
@ -107,6 +108,16 @@ extern "C" {
*/ */
OMPI_DECLSPEC int orte_pls_base_set_progress_sched(int sched); OMPI_DECLSPEC int orte_pls_base_set_progress_sched(int sched);
/**
* Utilities for pls components that use proxy daemons
*/
int orte_pls_base_proxy_set_node_name(orte_ras_node_t* node,
orte_jobid_t jobid,
orte_process_name_t* name);
int orte_pls_base_proxy_terminate_job(orte_jobid_t jobid);
int orte_pls_base_proxy_terminate_proc(const orte_process_name_t *proc);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }
#endif #endif

216
orte/mca/pls/base/pls_base_proxy.c Обычный файл
Просмотреть файл

@ -0,0 +1,216 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/mca/pls/base/base.h"
#include "orte/include/orte_constants.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/rmgr/base/base.h"
#include "opal/util/output.h"
int
orte_pls_base_proxy_set_node_name(orte_ras_node_t* node,
orte_jobid_t jobid,
orte_process_name_t* name)
{
orte_gpr_value_t* values[1];
orte_gpr_value_t value;
orte_gpr_keyval_t kv_name = {{OBJ_CLASS(orte_gpr_keyval_t),0},ORTE_NODE_BOOTPROXY_KEY,ORTE_NAME};
orte_gpr_keyval_t* keyvals[1];
char* jobid_string;
size_t i;
int rc;
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&value.tokens, &value.num_tokens,
node->node_cellid, node->node_name))) {
ORTE_ERROR_LOG(rc);
free(jobid_string);
return rc;
}
asprintf(&kv_name.key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
kv_name.value.proc = *name;
keyvals[0] = &kv_name;
value.keyvals = keyvals;
value.cnt = 1;
value.addr_mode = ORTE_GPR_OVERWRITE;
value.segment = ORTE_NODE_SEGMENT;
values[0] = &value;
rc = orte_gpr.put(1, values);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
free(kv_name.key);
free(jobid_string);
for(i=0; i<value.num_tokens; i++)
free(value.tokens[i]);
free(value.tokens);
return rc;
}
/**
* Wait for a pending job to complete.
*/
static void orte_pls_rsh_terminate_job_rsp(
int status,
orte_process_name_t* peer,
orte_buffer_t* rsp,
orte_rml_tag_t tag,
void* cbdata)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_rmgr_base_unpack_rsp(rsp))) {
ORTE_ERROR_LOG(rc);
}
}
static void orte_pls_rsh_terminate_job_cb(
int status,
orte_process_name_t* peer,
orte_buffer_t* req,
orte_rml_tag_t tag,
void* cbdata)
{
/* wait for response */
int rc;
if (status < 0) {
ORTE_ERROR_LOG(status);
OBJ_RELEASE(req);
return;
}
if (0 > (rc = orte_rml.recv_buffer_nb(peer, ORTE_RML_TAG_RMGR_CLNT, 0, orte_pls_rsh_terminate_job_rsp, NULL))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(req);
}
int
orte_pls_base_proxy_terminate_job(orte_jobid_t jobid)
{
char *keys[2];
char *jobid_string;
orte_gpr_value_t** values = NULL;
size_t i, j, num_values = 0;
int rc;
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
keys[1] = NULL;
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
NULL,
keys,
&num_values,
&values
);
if (rc != ORTE_SUCCESS) {
free(jobid_string);
return rc;
}
if (0 == num_values) {
rc = ORTE_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_values; i++) {
orte_gpr_value_t* value = values[i];
for(j=0; j<value->cnt; j++) {
orte_gpr_keyval_t* keyval = value->keyvals[j];
orte_buffer_t *cmd = OBJ_NEW(orte_buffer_t);
int ret;
if (cmd == NULL) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (strcmp(keyval->key, keys[0]) != 0)
continue;
/* construct command */
ret = orte_rmgr_base_pack_cmd(cmd, ORTE_RMGR_CMD_TERM_JOB, jobid);
if (ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(cmd);
rc = ret;
continue;
}
/* send a terminate message to the bootproxy on each node */
if (0 > (ret = orte_rml.send_buffer_nb(
&keyval->value.proc,
cmd,
ORTE_RML_TAG_RMGR_SVC,
0,
orte_pls_rsh_terminate_job_cb,
NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(cmd);
rc = ret;
continue;
}
}
}
cleanup:
free(jobid_string);
free(keys[0]);
if (NULL != values) {
for(i=0; i<num_values; i++) {
if (NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
free(values);
}
return rc;
}
int
orte_pls_base_proxy_terminate_proc(const orte_process_name_t *proc)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -44,6 +44,7 @@
#include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_wait.h"
#include "orte/mca/ns/ns.h" #include "orte/mca/ns/ns.h"
#include "orte/mca/pls/pls.h" #include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/base.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/gpr/gpr.h" #include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
@ -198,54 +199,6 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
OBJ_RELEASE(info); OBJ_RELEASE(info);
} }
/**
* Set the daemons name in the registry.
*/
static int orte_pls_rsh_set_node_name(orte_ras_node_t* node, orte_jobid_t jobid, orte_process_name_t* name)
{
orte_gpr_value_t* values[1];
orte_gpr_value_t value;
orte_gpr_keyval_t kv_name = {{OBJ_CLASS(orte_gpr_keyval_t),0},ORTE_NODE_BOOTPROXY_KEY,ORTE_NAME};
orte_gpr_keyval_t* keyvals[1];
char* jobid_string;
size_t i;
int rc;
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&value.tokens, &value.num_tokens,
node->node_cellid, node->node_name))) {
ORTE_ERROR_LOG(rc);
free(jobid_string);
return rc;
}
asprintf(&kv_name.key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
kv_name.value.proc = *name;
keyvals[0] = &kv_name;
value.keyvals = keyvals;
value.cnt = 1;
value.addr_mode = ORTE_GPR_OVERWRITE;
value.segment = ORTE_NODE_SEGMENT;
values[0] = &value;
rc = orte_gpr.put(1, values);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
free(kv_name.key);
free(jobid_string);
for(i=0; i<value.num_tokens; i++)
free(value.tokens[i]);
free(value.tokens);
return rc;
}
/** /**
* Launch a daemon (bootproxy) on each node. The daemon will be responsible * Launch a daemon (bootproxy) on each node. The daemon will be responsible
@ -658,7 +611,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock); OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
/* save the daemons name on the node */ /* save the daemons name on the node */
if (ORTE_SUCCESS != (rc = orte_pls_rsh_set_node_name(node,jobid,name))) { if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
@ -691,142 +644,17 @@ cleanup:
} }
/**
* Wait for a pending job to complete.
*/
static void orte_pls_rsh_terminate_job_rsp(
int status,
orte_process_name_t* peer,
orte_buffer_t* rsp,
orte_rml_tag_t tag,
void* cbdata)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_rmgr_base_unpack_rsp(rsp))) {
ORTE_ERROR_LOG(rc);
}
}
static void orte_pls_rsh_terminate_job_cb(
int status,
orte_process_name_t* peer,
orte_buffer_t* req,
orte_rml_tag_t tag,
void* cbdata)
{
/* wait for response */
int rc;
if (status < 0) {
ORTE_ERROR_LOG(status);
OBJ_RELEASE(req);
return;
}
if (0 > (rc = orte_rml.recv_buffer_nb(peer, ORTE_RML_TAG_RMGR_CLNT, 0, orte_pls_rsh_terminate_job_rsp, NULL))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(req);
}
/** /**
* Query the registry for all nodes participating in the job * Query the registry for all nodes participating in the job
*/ */
int orte_pls_rsh_terminate_job(orte_jobid_t jobid) int orte_pls_rsh_terminate_job(orte_jobid_t jobid)
{ {
char *keys[2]; return orte_pls_base_proxy_terminate_job(jobid);
char *jobid_string;
orte_gpr_value_t** values = NULL;
size_t i, j, num_values = 0;
int rc;
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
keys[1] = NULL;
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
NULL,
keys,
&num_values,
&values
);
if (rc != ORTE_SUCCESS) {
free(jobid_string);
return rc;
}
if (0 == num_values) {
rc = ORTE_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_values; i++) {
orte_gpr_value_t* value = values[i];
for(j=0; j<value->cnt; j++) {
orte_gpr_keyval_t* keyval = value->keyvals[j];
orte_buffer_t *cmd = OBJ_NEW(orte_buffer_t);
int ret;
if (cmd == NULL) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (strcmp(keyval->key, keys[0]) != 0)
continue;
/* construct command */
ret = orte_rmgr_base_pack_cmd(cmd, ORTE_RMGR_CMD_TERM_JOB, jobid);
if (ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(cmd);
rc = ret;
continue;
}
/* send a terminate message to the bootproxy on each node */
if (0 > (ret = orte_rml.send_buffer_nb(
&keyval->value.proc,
cmd,
ORTE_RML_TAG_RMGR_SVC,
0,
orte_pls_rsh_terminate_job_cb,
NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(cmd);
rc = ret;
continue;
}
}
}
cleanup:
free(jobid_string);
free(keys[0]);
if (NULL != values) {
for(i=0; i<num_values; i++) {
if (NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
free(values);
}
return rc;
} }
int orte_pls_rsh_terminate_proc(const orte_process_name_t* proc) int orte_pls_rsh_terminate_proc(const orte_process_name_t* proc)
{ {
return ORTE_ERR_NOT_IMPLEMENTED; return orte_pls_base_proxy_terminate_proc(proc);
} }
int orte_pls_rsh_finalize(void) int orte_pls_rsh_finalize(void)

Просмотреть файл

@ -1 +1,2 @@
jsquyres jsquyres
brbarret

Просмотреть файл

@ -18,8 +18,14 @@
include $(top_ompi_srcdir)/config/Makefile.options include $(top_ompi_srcdir)/config/Makefile.options
sources = AM_CPPFLAGS = $(pls_tm_CPPFLAGS)
include src/Makefile.extra
sources = \
pls_tm.h \
pls_tm_child.c \
pls_tm_component.c \
pls_tm_module.c \
pls_tm_registry.c
# Make the output library in this directory, and name it either # Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

Просмотреть файл

Просмотреть файл

@ -42,7 +42,7 @@
#include "mca/errmgr/errmgr.h" #include "mca/errmgr/errmgr.h"
#include "mca/soh/soh_types.h" #include "mca/soh/soh_types.h"
#include "mca/gpr/gpr.h" #include "mca/gpr/gpr.h"
#include "mca/ns/base/ns_base_nds.h" #include "orte/mca/sds/base/base.h"
#include "mca/soh/soh.h" #include "mca/soh/soh.h"
#include "mca/rml/rml.h" #include "mca/rml/rml.h"
#include "mca/ns/ns.h" #include "mca/ns/ns.h"

Просмотреть файл

@ -44,7 +44,7 @@
#include "mca/errmgr/errmgr.h" #include "mca/errmgr/errmgr.h"
#include "mca/soh/soh_types.h" #include "mca/soh/soh_types.h"
#include "mca/gpr/gpr.h" #include "mca/gpr/gpr.h"
#include "mca/ns/base/ns_base_nds.h" #include "orte/mca/sds/base/base.h"
#include "mca/soh/soh.h" #include "mca/soh/soh.h"
#include "mca/rml/rml.h" #include "mca/rml/rml.h"
#include "mca/ns/ns.h" #include "mca/ns/ns.h"

Просмотреть файл

@ -1,27 +0,0 @@
# -*- makefile -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include \
-I$(top_ompi_srcdir)/src -I$(top_ompi_srcdir)/src/include \
$(pls_tm_CPPFLAGS)
sources += \
src/pls_tm.h \
src/pls_tm_child.c \
src/pls_tm_component.c \
src/pls_tm_module.c \
src/pls_tm_registry.c

Просмотреть файл

@ -1 +1,2 @@
jsquyres jsquyres
brbarret

Просмотреть файл

@ -18,8 +18,13 @@
include $(top_ompi_srcdir)/config/Makefile.options include $(top_ompi_srcdir)/config/Makefile.options
sources = AM_CPPFLAGS = $(ras_tm_CPPFLAGS)
include src/Makefile.extra
sources = \
ras_tm.h \
ras_tm_component.c \
ras_tm_module.c
# Make the output library in this directory, and name it either # Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -1,26 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_ompi_srcdir)/config/Makefile.options
AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include \
-I$(top_ompi_srcdir)/src -I$(top_ompi_srcdir)/src/include
noinst_LTLIBRARIES = libmca_ras_tm.la
libmca_ras_tm_la_SOURCES = \
ras_tm.h \
ras_tm_component.c \
ras_tm_module.c

Просмотреть файл

@ -1,26 +0,0 @@
# -*- makefile -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include \
-I$(top_ompi_srcdir)/src -I$(top_ompi_srcdir)/src/include \
$(pls_tm_CPPFLAGS)
sources += \
src/ras_tm.h \
src/ras_tm_component.c \
src/ras_tm_module.c