* checkpoint bproc changes
This commit was SVN r3697.
Этот коммит содержится в:
родитель
dd49abce62
Коммит
76d48e20f8
@ -16,6 +16,8 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include "pcm_bproc.h"
|
||||
#include "mca/pcm/pcm.h"
|
||||
#include "mca/pcm/base/base.h"
|
||||
@ -23,42 +25,67 @@
|
||||
|
||||
|
||||
ompi_list_t *
|
||||
mca_pcm_bproc_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_pcm_bproc_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||
mca_ns_base_jobid_t jobid,
|
||||
int nodes, int procs)
|
||||
{
|
||||
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||
|
||||
if (NULL == me) {
|
||||
errno = OMPI_ERR_BAD_PARAM;
|
||||
return NULL;
|
||||
}
|
||||
/* since we are using llm, don't worry about the other params */
|
||||
|
||||
return me->llm->llm_allocate_resources(me->llm, jobid, nodes, procs);;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pcm_bproc_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_pcm_bproc_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||
mca_ns_base_jobid_t jobid, ompi_list_t *schedlist)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pcm_bproc_kill_proc(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_pcm_bproc_kill_proc(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||
ompi_process_name_t *name, int flags)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||
|
||||
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||
if (NULL == name) return OMPI_ERR_BAD_PARAM;
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pcm_bproc_kill_job(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_pcm_bproc_kill_job(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||
mca_ns_base_jobid_t jobid, int flags)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||
|
||||
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||
/* check for invalid jobid */
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pcm_bproc_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_pcm_bproc_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||
mca_ns_base_jobid_t jobid,
|
||||
ompi_list_t *nodelist)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||
|
||||
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||
/* since we are using llm, don't worry about the other params */
|
||||
|
||||
return me->llm->llm_deallocate_resources(me->llm, jobid, nodelist);
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mca/pcm/pcm.h"
|
||||
#include "mca/llm/llm.h"
|
||||
#include "include/types.h"
|
||||
#include "class/ompi_list.h"
|
||||
|
||||
@ -44,63 +45,35 @@ extern "C" {
|
||||
* Startup / Shutdown
|
||||
*/
|
||||
struct mca_pcm_base_module_1_0_0_t* mca_pcm_bproc_init(int *priority,
|
||||
bool *allow_multi_user_threads,
|
||||
bool *have_hidden_threads,
|
||||
int constraints);
|
||||
bool have_threasds,
|
||||
int constraints);
|
||||
int mca_pcm_bproc_finalize(struct mca_pcm_base_module_1_0_0_t* me);
|
||||
|
||||
/*
|
||||
* Interface
|
||||
*/
|
||||
ompi_list_t* mca_pcm_bproc_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_ns_base_jobid_t jobid,
|
||||
int nodes, int procs);
|
||||
mca_ns_base_jobid_t jobid,
|
||||
int nodes, int procs);
|
||||
int mca_pcm_bproc_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_ns_base_jobid_t jobid, ompi_list_t *schedule_list);
|
||||
mca_ns_base_jobid_t jobid, ompi_list_t *schedule_list);
|
||||
int mca_pcm_bproc_kill_proc(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
ompi_process_name_t *name, int flags);
|
||||
ompi_process_name_t *name, int flags);
|
||||
int mca_pcm_bproc_kill_job(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_ns_base_jobid_t jobid, int flags);
|
||||
mca_ns_base_jobid_t jobid, int flags);
|
||||
int mca_pcm_bproc_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
mca_ns_base_jobid_t jobid,
|
||||
ompi_list_t *nodelist);
|
||||
mca_ns_base_jobid_t jobid,
|
||||
ompi_list_t *nodelist);
|
||||
|
||||
/*
|
||||
* Job management code
|
||||
*/
|
||||
void mca_pcm_bproc_job_list_init(void);
|
||||
void mca_pcm_bproc_job_list_fini(void);
|
||||
struct mca_pcm_bproc_module_t {
|
||||
mca_pcm_base_module_t super;
|
||||
|
||||
int mca_pcm_bproc_add_started_pids(mca_ns_base_jobid_t jobid, pid_t child_pid,
|
||||
mca_ns_base_vpid_t lower, mca_ns_base_vpid_t upper);
|
||||
pid_t mca_pcm_bproc_get_started_pid(mca_ns_base_jobid_t jobid, mca_ns_base_vpid_t vpid,
|
||||
bool remove_started_pid);
|
||||
int mca_pcm_bproc_get_started_pid_list(mca_ns_base_jobid_t jobid, pid_t **pids, size_t *len,
|
||||
bool remove_started_pids);
|
||||
int mca_pcm_bproc_remove_job(mca_ns_base_jobid_t jobid);
|
||||
mca_llm_base_module_t *llm;
|
||||
|
||||
struct mca_pcm_bproc_pids_t {
|
||||
ompi_list_item_t super;
|
||||
mca_ns_base_vpid_t lower;
|
||||
mca_ns_base_vpid_t upper;
|
||||
pid_t child;
|
||||
int constraints;
|
||||
};
|
||||
typedef struct mca_pcm_bproc_pids_t mca_pcm_bproc_pids_t;
|
||||
OBJ_CLASS_DECLARATION(mca_pcm_bproc_pids_t);
|
||||
typedef struct mca_pcm_bproc_module_t mca_pcm_bproc_module_t;
|
||||
|
||||
struct mca_pcm_bproc_job_item_t {
|
||||
ompi_list_item_t super;
|
||||
mca_ns_base_jobid_t jobid;
|
||||
ompi_list_t *pids;
|
||||
};
|
||||
typedef struct mca_pcm_bproc_job_item_t mca_pcm_bproc_job_item_t;
|
||||
OBJ_CLASS_DECLARATION(mca_pcm_bproc_job_item_t);
|
||||
|
||||
|
||||
/*
|
||||
* Module variables
|
||||
*/
|
||||
extern int mca_pcm_bproc_output;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -25,11 +25,14 @@
|
||||
#include "mca/pcm/pcm.h"
|
||||
#include "mca/pcm/base/base.h"
|
||||
#include "mca/llm/base/base.h"
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/bproc.h>
|
||||
|
||||
/*
|
||||
* Struct of function pointers and all that to let us be initialized
|
||||
@ -53,16 +56,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_bproc_component = {
|
||||
};
|
||||
|
||||
|
||||
struct mca_pcm_base_module_1_0_0_t mca_pcm_bproc_1_0_0 = {
|
||||
mca_pcm_bproc_allocate_resources,
|
||||
mca_pcm_bproc_spawn_procs,
|
||||
mca_pcm_bproc_kill_proc,
|
||||
mca_pcm_bproc_kill_job,
|
||||
mca_pcm_bproc_deallocate_resources,
|
||||
mca_pcm_bproc_finalize
|
||||
};
|
||||
|
||||
|
||||
/* need to create output stream to dump in file */
|
||||
ompi_output_stream_t mca_pcm_bproc_output_stream = {
|
||||
false, /* lds_is_debugging BWB - change me for release */
|
||||
@ -83,21 +76,10 @@ ompi_output_stream_t mca_pcm_bproc_output_stream = {
|
||||
* Module variables handles
|
||||
*/
|
||||
static int mca_pcm_bproc_param_priority;
|
||||
static int mca_pcm_bproc_param_debug;
|
||||
|
||||
/*
|
||||
* Component variables. All of these are shared among the module
|
||||
* instances, so they don't need to go in a special structure or
|
||||
* anything.
|
||||
*/
|
||||
int mca_pcm_bproc_output = 0;
|
||||
|
||||
int
|
||||
mca_pcm_bproc_component_open(void)
|
||||
{
|
||||
mca_pcm_bproc_param_debug =
|
||||
mca_base_param_register_int("pcm", "bproc", "debug", NULL, 100);
|
||||
|
||||
mca_pcm_bproc_param_priority =
|
||||
mca_base_param_register_int("pcm", "bproc", "priority", NULL, 5);
|
||||
|
||||
@ -114,28 +96,73 @@ mca_pcm_bproc_component_close(void)
|
||||
|
||||
mca_pcm_base_module_t*
|
||||
mca_pcm_bproc_init(int *priority,
|
||||
bool *allow_multi_user_threads,
|
||||
bool *have_hidden_threads,
|
||||
int constraints)
|
||||
bool have_threads,
|
||||
int constraints)
|
||||
{
|
||||
int debug;
|
||||
|
||||
mca_base_param_lookup_int(mca_pcm_bproc_param_debug, &debug);
|
||||
mca_pcm_bproc_output = ompi_output_open(&mca_pcm_bproc_output_stream);
|
||||
ompi_output_set_verbosity(mca_pcm_bproc_output, debug);
|
||||
int ret;
|
||||
mca_pcm_bproc_module_t *me;
|
||||
struct bproc_version_t vers;
|
||||
|
||||
mca_base_param_lookup_int(mca_pcm_bproc_param_priority, priority);
|
||||
|
||||
*allow_multi_user_threads = true;
|
||||
*have_hidden_threads = false;
|
||||
/* we can start daemons, we can do qos, and it looks like we can spawn,
|
||||
so no constrains searching */
|
||||
if (0 != (constraints & OMPI_RTE_SPAWN_MULTI_CELL)) {
|
||||
errno = OMPI_ERR_BAD_PARAM;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
ret = mca_llm_base_select("rsh", &(me->llm), have_threads);
|
||||
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
/* well, that can't be good. guess we can't run */
|
||||
ompi_output_verbose(5, mca_pcm_base_output, "init: no llm found");
|
||||
free(me);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* see if bproc is running */
|
||||
ret = bproc_version(&vers);
|
||||
if (ret != 0) {
|
||||
ompi_output_verbose(5, mca_pcm_base_output,
|
||||
"bproc: bproc_version() failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If we're not on the master, forget it */
|
||||
if (bproc_currnode() != BPROC_NODE_MASTER) {
|
||||
ompi_output_verbose(5, mca_pcm_base_output,
|
||||
"bproc: not on BPROC_NODE_MASTER");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
me->constraints = constraints;
|
||||
|
||||
/*
|
||||
* fill in the function pointers
|
||||
*/
|
||||
me->super.pcm_allocate_resources = mca_pcm_bproc_allocate_resources;
|
||||
me->super.pcm_spawn_procs = mca_pcm_bproc_spawn_procs;
|
||||
me->super.pcm_kill_proc = mca_pcm_bproc_kill_proc;
|
||||
me->super.pcm_kill_job = mca_pcm_bproc_kill_job;
|
||||
me->super.pcm_deallocate_resources = mca_pcm_bproc_deallocate_resources;
|
||||
me->super.pcm_finalize = mca_pcm_bproc_finalize;
|
||||
|
||||
return (mca_pcm_base_module_t*) me;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pcm_bproc_finalize(struct mca_pcm_base_module_1_0_0_t* me)
|
||||
mca_pcm_bproc_finalize(struct mca_pcm_base_module_1_0_0_t* me_super)
|
||||
{
|
||||
mca_pcm_bproc_module_t *me = (mca_pcm_bproc_module_t*) me_super;
|
||||
|
||||
if (NULL == me) return OMPI_ERR_BAD_PARAM;
|
||||
|
||||
me->llm->llm_finalize(me->llm);
|
||||
|
||||
free(me);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user