* checkpoint bproc changes
This commit was SVN r3697.
Этот коммит содержится в:
родитель
dd49abce62
Коммит
76d48e20f8
@ -16,6 +16,8 @@
|
|||||||
|
|
||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
#include "pcm_bproc.h"
|
#include "pcm_bproc.h"
|
||||||
#include "mca/pcm/pcm.h"
|
#include "mca/pcm/pcm.h"
|
||||||
#include "mca/pcm/base/base.h"
|
#include "mca/pcm/base/base.h"
|
||||||
@ -23,42 +25,67 @@
|
|||||||
|
|
||||||
|
|
||||||
ompi_list_t *
|
ompi_list_t *
|
||||||
mca_pcm_bproc_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
|
mca_pcm_bproc_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||||
mca_ns_base_jobid_t jobid,
|
mca_ns_base_jobid_t jobid,
|
||||||
int nodes, int procs)
|
int nodes, int procs)
|
||||||
{
|
{
|
||||||
|
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||||
|
|
||||||
|
if (NULL == me) {
|
||||||
|
errno = OMPI_ERR_BAD_PARAM;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
/* since we are using llm, don't worry about the other params */
|
||||||
|
|
||||||
|
return me->llm->llm_allocate_resources(me->llm, jobid, nodes, procs);;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
mca_pcm_bproc_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me,
|
mca_pcm_bproc_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||||
mca_ns_base_jobid_t jobid, ompi_list_t *schedlist)
|
mca_ns_base_jobid_t jobid, ompi_list_t *schedlist)
|
||||||
{
|
{
|
||||||
return OMPI_SUCCESS;
|
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||||
|
|
||||||
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
mca_pcm_bproc_kill_proc(struct mca_pcm_base_module_1_0_0_t* me,
|
mca_pcm_bproc_kill_proc(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||||
ompi_process_name_t *name, int flags)
|
ompi_process_name_t *name, int flags)
|
||||||
{
|
{
|
||||||
return OMPI_SUCCESS;
|
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||||
|
|
||||||
|
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||||
|
if (NULL == name) return OMPI_ERR_BAD_PARAM;
|
||||||
|
|
||||||
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
mca_pcm_bproc_kill_job(struct mca_pcm_base_module_1_0_0_t* me,
|
mca_pcm_bproc_kill_job(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||||
mca_ns_base_jobid_t jobid, int flags)
|
mca_ns_base_jobid_t jobid, int flags)
|
||||||
{
|
{
|
||||||
return OMPI_SUCCESS;
|
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||||
|
|
||||||
|
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||||
|
/* check for invalid jobid */
|
||||||
|
|
||||||
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
mca_pcm_bproc_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
|
mca_pcm_bproc_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||||
mca_ns_base_jobid_t jobid,
|
mca_ns_base_jobid_t jobid,
|
||||||
ompi_list_t *nodelist)
|
ompi_list_t *nodelist)
|
||||||
{
|
{
|
||||||
return OMPI_SUCCESS;
|
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||||
|
|
||||||
|
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||||
|
/* since we are using llm, don't worry about the other params */
|
||||||
|
|
||||||
|
return me->llm->llm_deallocate_resources(me->llm, jobid, nodelist);
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
|
|
||||||
#include "mca/pcm/pcm.h"
|
#include "mca/pcm/pcm.h"
|
||||||
|
#include "mca/llm/llm.h"
|
||||||
#include "include/types.h"
|
#include "include/types.h"
|
||||||
#include "class/ompi_list.h"
|
#include "class/ompi_list.h"
|
||||||
|
|
||||||
@ -44,8 +45,7 @@ extern "C" {
|
|||||||
* Startup / Shutdown
|
* Startup / Shutdown
|
||||||
*/
|
*/
|
||||||
struct mca_pcm_base_module_1_0_0_t* mca_pcm_bproc_init(int *priority,
|
struct mca_pcm_base_module_1_0_0_t* mca_pcm_bproc_init(int *priority,
|
||||||
bool *allow_multi_user_threads,
|
bool have_threasds,
|
||||||
bool *have_hidden_threads,
|
|
||||||
int constraints);
|
int constraints);
|
||||||
int mca_pcm_bproc_finalize(struct mca_pcm_base_module_1_0_0_t* me);
|
int mca_pcm_bproc_finalize(struct mca_pcm_base_module_1_0_0_t* me);
|
||||||
|
|
||||||
@ -65,42 +65,15 @@ extern "C" {
|
|||||||
mca_ns_base_jobid_t jobid,
|
mca_ns_base_jobid_t jobid,
|
||||||
ompi_list_t *nodelist);
|
ompi_list_t *nodelist);
|
||||||
|
|
||||||
/*
|
struct mca_pcm_bproc_module_t {
|
||||||
* Job management code
|
mca_pcm_base_module_t super;
|
||||||
*/
|
|
||||||
void mca_pcm_bproc_job_list_init(void);
|
|
||||||
void mca_pcm_bproc_job_list_fini(void);
|
|
||||||
|
|
||||||
int mca_pcm_bproc_add_started_pids(mca_ns_base_jobid_t jobid, pid_t child_pid,
|
mca_llm_base_module_t *llm;
|
||||||
mca_ns_base_vpid_t lower, mca_ns_base_vpid_t upper);
|
|
||||||
pid_t mca_pcm_bproc_get_started_pid(mca_ns_base_jobid_t jobid, mca_ns_base_vpid_t vpid,
|
|
||||||
bool remove_started_pid);
|
|
||||||
int mca_pcm_bproc_get_started_pid_list(mca_ns_base_jobid_t jobid, pid_t **pids, size_t *len,
|
|
||||||
bool remove_started_pids);
|
|
||||||
int mca_pcm_bproc_remove_job(mca_ns_base_jobid_t jobid);
|
|
||||||
|
|
||||||
struct mca_pcm_bproc_pids_t {
|
int constraints;
|
||||||
ompi_list_item_t super;
|
|
||||||
mca_ns_base_vpid_t lower;
|
|
||||||
mca_ns_base_vpid_t upper;
|
|
||||||
pid_t child;
|
|
||||||
};
|
};
|
||||||
typedef struct mca_pcm_bproc_pids_t mca_pcm_bproc_pids_t;
|
typedef struct mca_pcm_bproc_module_t mca_pcm_bproc_module_t;
|
||||||
OBJ_CLASS_DECLARATION(mca_pcm_bproc_pids_t);
|
|
||||||
|
|
||||||
struct mca_pcm_bproc_job_item_t {
|
|
||||||
ompi_list_item_t super;
|
|
||||||
mca_ns_base_jobid_t jobid;
|
|
||||||
ompi_list_t *pids;
|
|
||||||
};
|
|
||||||
typedef struct mca_pcm_bproc_job_item_t mca_pcm_bproc_job_item_t;
|
|
||||||
OBJ_CLASS_DECLARATION(mca_pcm_bproc_job_item_t);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Module variables
|
|
||||||
*/
|
|
||||||
extern int mca_pcm_bproc_output;
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
}
|
}
|
||||||
|
@ -25,11 +25,14 @@
|
|||||||
#include "mca/pcm/pcm.h"
|
#include "mca/pcm/pcm.h"
|
||||||
#include "mca/pcm/base/base.h"
|
#include "mca/pcm/base/base.h"
|
||||||
#include "mca/llm/base/base.h"
|
#include "mca/llm/base/base.h"
|
||||||
|
#include "runtime/runtime.h"
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <sys/bproc.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Struct of function pointers and all that to let us be initialized
|
* Struct of function pointers and all that to let us be initialized
|
||||||
@ -53,16 +56,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_bproc_component = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct mca_pcm_base_module_1_0_0_t mca_pcm_bproc_1_0_0 = {
|
|
||||||
mca_pcm_bproc_allocate_resources,
|
|
||||||
mca_pcm_bproc_spawn_procs,
|
|
||||||
mca_pcm_bproc_kill_proc,
|
|
||||||
mca_pcm_bproc_kill_job,
|
|
||||||
mca_pcm_bproc_deallocate_resources,
|
|
||||||
mca_pcm_bproc_finalize
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* need to create output stream to dump in file */
|
/* need to create output stream to dump in file */
|
||||||
ompi_output_stream_t mca_pcm_bproc_output_stream = {
|
ompi_output_stream_t mca_pcm_bproc_output_stream = {
|
||||||
false, /* lds_is_debugging BWB - change me for release */
|
false, /* lds_is_debugging BWB - change me for release */
|
||||||
@ -83,21 +76,10 @@ ompi_output_stream_t mca_pcm_bproc_output_stream = {
|
|||||||
* Module variables handles
|
* Module variables handles
|
||||||
*/
|
*/
|
||||||
static int mca_pcm_bproc_param_priority;
|
static int mca_pcm_bproc_param_priority;
|
||||||
static int mca_pcm_bproc_param_debug;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Component variables. All of these are shared among the module
|
|
||||||
* instances, so they don't need to go in a special structure or
|
|
||||||
* anything.
|
|
||||||
*/
|
|
||||||
int mca_pcm_bproc_output = 0;
|
|
||||||
|
|
||||||
int
|
int
|
||||||
mca_pcm_bproc_component_open(void)
|
mca_pcm_bproc_component_open(void)
|
||||||
{
|
{
|
||||||
mca_pcm_bproc_param_debug =
|
|
||||||
mca_base_param_register_int("pcm", "bproc", "debug", NULL, 100);
|
|
||||||
|
|
||||||
mca_pcm_bproc_param_priority =
|
mca_pcm_bproc_param_priority =
|
||||||
mca_base_param_register_int("pcm", "bproc", "priority", NULL, 5);
|
mca_base_param_register_int("pcm", "bproc", "priority", NULL, 5);
|
||||||
|
|
||||||
@ -114,28 +96,73 @@ mca_pcm_bproc_component_close(void)
|
|||||||
|
|
||||||
mca_pcm_base_module_t*
|
mca_pcm_base_module_t*
|
||||||
mca_pcm_bproc_init(int *priority,
|
mca_pcm_bproc_init(int *priority,
|
||||||
bool *allow_multi_user_threads,
|
bool have_threads,
|
||||||
bool *have_hidden_threads,
|
|
||||||
int constraints)
|
int constraints)
|
||||||
{
|
{
|
||||||
int debug;
|
int ret;
|
||||||
|
mca_pcm_bproc_module_t *me;
|
||||||
mca_base_param_lookup_int(mca_pcm_bproc_param_debug, &debug);
|
struct bproc_version_t vers;
|
||||||
mca_pcm_bproc_output = ompi_output_open(&mca_pcm_bproc_output_stream);
|
|
||||||
ompi_output_set_verbosity(mca_pcm_bproc_output, debug);
|
|
||||||
|
|
||||||
mca_base_param_lookup_int(mca_pcm_bproc_param_priority, priority);
|
mca_base_param_lookup_int(mca_pcm_bproc_param_priority, priority);
|
||||||
|
|
||||||
*allow_multi_user_threads = true;
|
/* we can start daemons, we can do qos, and it looks like we can spawn,
|
||||||
*have_hidden_threads = false;
|
so no constrains searching */
|
||||||
|
if (0 != (constraints & OMPI_RTE_SPAWN_MULTI_CELL)) {
|
||||||
|
errno = OMPI_ERR_BAD_PARAM;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = mca_llm_base_select("rsh", &(me->llm), have_threads);
|
||||||
|
|
||||||
|
if (OMPI_SUCCESS != ret) {
|
||||||
|
/* well, that can't be good. guess we can't run */
|
||||||
|
ompi_output_verbose(5, mca_pcm_base_output, "init: no llm found");
|
||||||
|
free(me);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* see if bproc is running */
|
||||||
|
ret = bproc_version(&vers);
|
||||||
|
if (ret != 0) {
|
||||||
|
ompi_output_verbose(5, mca_pcm_base_output,
|
||||||
|
"bproc: bproc_version() failed");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we're not on the master, forget it */
|
||||||
|
if (bproc_currnode() != BPROC_NODE_MASTER) {
|
||||||
|
ompi_output_verbose(5, mca_pcm_base_output,
|
||||||
|
"bproc: not on BPROC_NODE_MASTER");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
me->constraints = constraints;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fill in the function pointers
|
||||||
|
*/
|
||||||
|
me->super.pcm_allocate_resources = mca_pcm_bproc_allocate_resources;
|
||||||
|
me->super.pcm_spawn_procs = mca_pcm_bproc_spawn_procs;
|
||||||
|
me->super.pcm_kill_proc = mca_pcm_bproc_kill_proc;
|
||||||
|
me->super.pcm_kill_job = mca_pcm_bproc_kill_job;
|
||||||
|
me->super.pcm_deallocate_resources = mca_pcm_bproc_deallocate_resources;
|
||||||
|
me->super.pcm_finalize = mca_pcm_bproc_finalize;
|
||||||
|
|
||||||
|
return (mca_pcm_base_module_t*) me;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
mca_pcm_bproc_finalize(struct mca_pcm_base_module_1_0_0_t* me)
|
mca_pcm_bproc_finalize(struct mca_pcm_base_module_1_0_0_t* me_super)
|
||||||
{
|
{
|
||||||
|
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||||
|
|
||||||
|
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||||
|
|
||||||
|
me->llm->llm_finalize(me->llm);
|
||||||
|
|
||||||
|
free(me);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user